John Amanatides Original version of mpz/pprime_p.c
-Paul Zimmermann mpn/generic/mul_fft.c, dc_divrem_n.c, rootrem.c,
- old mpz/powm.c, old toom3 code.
+Paul Zimmermann mpn/generic/mul_fft.c, now defunct dc_divrem_n.c,
+ rootrem.c, old mpz/powm.c, old toom3 code.
-Ken Weber mpn/generic/bdivmod.c, old mpn/generic/gcd.c
+Ken Weber Now defunct mpn/generic/bdivmod.c, old mpn/generic/gcd.c
-Bennet Yee mpz/jacobi.c mpz/legendre.c
+Bennet Yee Previous versions of mpz/jacobi.c mpz/legendre.c
Andreas Schwab mpn/m68k/lshift.asm, mpn/m68k/rshift.asm
-Robert Harley Old mpn/generic/mul_n.c, many files in mpn/arm
+Robert Harley Old mpn/generic/mul_n.c, previous versions of files in
+ mpn/arm
Linus Nordberg Random number framework, original autoconfery
-Kent Boortz MacOS 9 port
+Kent Boortz MacOS 9 port, now defunct.
Kevin Ryde Most x86 assembly, new autoconfery, and countless other
things (please see the GMP manual for complete list)
Pedro Gimeno Mersenne Twister random generator, other random number
revisions
-Jason Moxham mpz/fac_ui.c and gen-fac_ui.c
+Jason Moxham Previous versions of mpz/fac_ui.c and gen-fac_ui.c
-Niels Möller mpn/generic/hgcd2.c, gcd.c, gcdext.c, matrix22_mul.c,
- hgcd.c, gcdext_1.c, gcd_subdiv_step.c, gcd_lehmer.c,
+Niels Möller gen-jacobitab.c,
+ mpn/generic/hgcd2.c, hgcd.c, hgcd_step.c,
+ hgcd_appr.c, hgcd_matrix.c, hgcd_reduce.c,
+ gcd.c, gcdext.c, matrix22_mul.c,
+ gcdext_1.c, gcd_subdiv_step.c, gcd_lehmer.c,
gcdext_subdiv_step.c, gcdext_lehmer.c,
+ jacobi_2.c, jacbase.c, hgcd_jacobi.c, hgcd2_jacobi.c
+ matrix22_mul1_inverse_vector.c,
toom_interpolate_7pts, mulmod_bnm1.c, dcpi1_bdiv_qr.c,
dcpi1_bdiv_q.c, sbpi1_bdiv_qr.c, sbpi1_bdiv_q.c,
toom_eval_dgr3_pm1.c, toom_eval_dgr3_pm2.c,
toom_eval_pm1.c, toom_eval_pm2.c, toom_eval_pm2exp.c,
- divexact.c, mpn/x86/invert_limb.asm,
- mpn/x86_64/invert_limb.asm, mpz/nextprime.c,
- mpz/divexact.c.
+ divexact.c, mod_1_1.c, div_qr_2.c,
+ div_qr_2n_pi1.c, div_qr_2u_pi1.c, broot.c,
+ brootinv.c,
+ mpn/x86/k7/invert_limb.asm, mod_1_1.asm,
+ mpn/x86_64/invert_limb.asm,
+ invert_limb_table.asm, mod_1_1.asm,
+ div_qr_2n_pi1.asm, div_qr_2u_pi1.asm,
+ mpn/x86_64/core2/aorsmul_1.asm,
+ mpz/nextprime.c, divexact.c, gcd.c, gcdext.c,
+ jacobi.c, combit.c, mini-gmp/mini-gmp.c.
Marco Bodrato mpn/generic/toom44_mul.c, toom4_sqr.c, toom53_mul.c,
- toom62_mul.c, toom43_mul.c, toom52_mul.c,
+ toom62_mul.c, toom43_mul.c, toom52_mul.c, toom54_mul.c,
toom_interpolate_6pts.c, toom_couple_handling.c,
toom63_mul.c, toom_interpolate_8pts.c,
toom6h_mul.c, toom6_sqr.c, toom_interpolate_12pts.c,
toom8h_mul.c, toom8_sqr.c, toom_interpolate_16pts.c,
mulmod_bnm1.c, sqrmod_bnm1.c, nussbaumer_mul.c,
toom_eval_pm2.c, toom_eval_pm2rexp.c,
- mullo_n.c, invert.c, invertappr.c.
-
-David Harvey mpn/x86_64/mul_basecase.asm
+ mullo_n.c, invert.c, invertappr.c;
+ mpz/fac_ui.c, 2fac_ui.c, mfac_uiui.c, oddfac_1.c,
+ primorial_ui.c, prodlimbs.c, goetgheluck_bin_uiui.c.
+
+David Harvey mpn/generic/add_err1_n.c, add_err2_n.c,
+ add_err3_n.c, sub_err1_n.c, sub_err2_n.c,
+ sub_err3_n.c, mulmid_basecase.c, mulmid_n.c,
+ toom42_mulmid.c,
+ mpn/x86_64/mul_basecase.asm, aors_err1_n.asm,
+ aors_err2_n.asm, aors_err3_n.asm,
+ mulmid_basecase.asm,
+ mpn/x86_64/core2/aors_err1_n.asm.
Martin Boij mpn/generic/perfpow.c
+
+Marc Glisse gmpxx.h improvements
-2012-05-06 Torbjorn Granlund <tege@gmplib.org>
+2013-09-29 Torbjorn Granlund <tege@gmplib.org>
- * Version 5.0.5 released.
+ * Version 5.1.3 released.
- * mpn/Makefile.am (TARG_DIST): Remove thumb, since directory now empty.
+ * Makefile.am (LIBGMP_LT_*, LIBGMPXX_LT_*): Bump version info.
+ * gmp-h.in: Bump version.
-2012-04-28 Torbjorn Granlund <tege@gmplib.org>
+2013-09-27 Niels Möller <nisse@lysator.liu.se>
- * mpn/thumb/add_n.s: Remove broken code.
- * mpn/thumb/sub_n.s: Likewise.
+ * NEWS: Mention the ia64 mpn_divrem_2 bugfix.
-2012-04-02 Torbjorn Granlund <tege@gmplib.org>
+2013-07-16 Torbjorn Granlund <tege@gmplib.org>
- * gmp-h.in (__GNU_MP_VERSION_PATCHLEVEL): Bump.
+ * doc/gmp.texi: Declare countless of function arguments as 'const'.
- * Makefile.am (LIBGMP_LT_*, LIBGMPXX_LT_*, LIBMP_LT_*):
- Bump version info.
+2013-07-15 Torbjorn Granlund <tege@gmplib.org>
- * gmp-h.in (_GMP_H_HAVE_FILE): Test also __STDIO_LOADED (for VMS).
+ * mpn/generic/sb_div_sec.c: Compute inverse as floor(B^2/(dh+1)), per
+ Niels' suggestion.
+ * mpn/generic/sbpi1_div_sec.c: Remove inverse rounding-up code.
-2012-03-27 Torbjorn Granlund <tege@gmplib.org>
+2013-07-12 Torbjorn Granlund <tege@gmplib.org>
- * config.guess: Fix typo in coreisbr recognition.
+ * mpn/generic/sbpi1_div_sec.c: Partial rewrite.
-2012-03-07 Torbjorn Granlund <tege@gmplib.org>
+2013-06-19 Torbjorn Granlund <tege@gmplib.org>
- * config.guess: Handle AMD 11h correctly.
+ * mpn/powerpc64/p6/lshift.asm: Rewrite switching-into-loop code.
+ * mpn/powerpc64/p6/rshift.asm: Likewise.
+ * mpn/powerpc64/p6/lshiftc.asm: Likewise.
-2012-03-04 Marco Bodrato <bodrato@mail.dm.unipi.it>
+2013-06-17 Torbjorn Granlund <tege@gmplib.org>
- * tests/mpz/t-invert.c: Avoid testing mod 0.
- * doc/gmp.texi (mpz_invert): Specify mod 0 is not handled.
+ * mpn/powerpc64/p6/lshift.asm: Fix typo in label reference.
+ For 32-bit mode, zero extend `n' argument and split retval.
+ * mpn/powerpc64/p6/rshift.asm: Likewise.
+ * mpn/powerpc64/p6/lshiftc.asm: Likewise.
-2012-02-24 Torbjorn Granlund <tege@gmplib.org>
+2013-06-09 Marc Glisse <marc.glisse@inria.fr>
- * tests/mpn/logic.c: New file.
- * tests/mpn/Makefile.am (check_PROGRAMS): Add logic.
+ * mpn/generic/get_d.c (mpn_get_d): Avoid signed overflow.
+ * mpz/kronzs.c (mpz_kronecker_si): Use ABS_CAST.
- * tests/mpz/t-invert.c: New file.
- * tests/mpz/Makefile.am (check_PROGRAMS): Add t-invert.
+2013-05-22 Torbjorn Granlund <tege@gmplib.org>
-2012-02-11 Marco Bodrato <bodrato@mail.dm.unipi.it>
+ * doc/gmp.texi (Reporting Bugs): Ask for configure's output.
- * doc/gmp.texi (Multiplication Algorithms): Add Toom[68]'n'half.
+ * mpn/ia64/divrem_2.asm: Don't clobber f16-f18.
-2012-02-10 Torbjorn Granlund <tege@gmplib.org>
+2013-05-20 Torbjorn Granlund <tege@gmplib.org>
- * Version 5.0.4 released.
+ * Version 5.1.2 released.
-2012-02-09 Marco Bodrato <bodrato@mail.dm.unipi.it>
+ * mpn/arm/udiv.asm: Change spacing to work around binutils bug.
- * gmp-impl.h (mpn_toom3*_itch): Support any recursion depth.
- * tests/refmpn.c (refmpn_mul): Restore tight allocations.
+2013-05-16 Torbjorn Granlund <tege@gmplib.org>
-2012-02-09 Marc Glisse <marc.glisse@inria.fr>
+ * Makefile.am (LIBGMP_LT_*, LIBGMPXX_LT_*): Bump version info.
+ * gmp-h.in: Bump version.
- * gmp-impl.h (ABS_CAST): New macro.
- * mpf/cmp_si.c: Use ABS_CAST.
- * mpf/get_si.c: Use ABS_CAST.
- * mpf/iset_si.c: Use ABS_CAST.
- * mpf/set_si.c: Use ABS_CAST.
- * mpq/set_si.c: Use ABS_CAST.
- * mpz/cmp_si.c: Use ABS_CAST.
- * mpz/get_si.c: Use ABS_CAST.
- * mpz/iset_si.c: Use ABS_CAST.
- * mpz/mul_i.h: Use ABS_CAST.
- * mpz/set_si.c: Use ABS_CAST.
+ * tests/misc.c (tests_hardware_getround, tests_hardware_setround):
+ Avoid assembly dependency unless WANT_ASSEMBLY.
-2012-02-09 Torbjorn Granlund <tege@gmplib.org>
+ * configure.ac (WANT_ASSEMBLY): Conditionally define.
- * gmp-h.in (__GNU_MP_VERSION_PATCHLEVEL): Bump.
+2013-05-14 Torbjorn Granlund <tege@gmplib.org>
- * Makefile.am (LIBGMP_LT_*, LIBGMPXX_LT_*, LIBMP_LT_*):
- Bump version info.
+ * configure.ac (arm1156): Don't fall back to plain v6 compiler option.
-2012-02-08 Torbjorn Granlund <tege@gmplib.org>
+2013-05-10 Torbjorn Granlund <tege@gmplib.org>
- * mpn/powerpc32/divrem_2.asm: Fix off-by-one condition in invert_limb
- code.
+ * mpn/x86/p6/mmx/gmp-mparam.h: Set down SQR_TOOM2_THRESHOLD to parent
+ directory value.
-2012-02-08 Niels Möller <nisse@lysator.liu.se>
+2013-05-09 Torbjorn Granlund <tege@gmplib.org>
- * doc/gmp.texi (mpz_gcdext): Describe cofactor canonicalization.
- (mpn_gcdext): Copied doc updates from main repo.
+ * mpn/x86_64/bd1/mul_1.asm: Fix typo.
-2012-02-07 Niels Möller <nisse@lysator.liu.se>
+2013-04-29 Torbjorn Granlund <tege@gmplib.org>
- * mpn/generic/gcdext.c (mpn_gcdext): Fixed assert, related to the
- special case A = (2k+1) G, B = 2 G.
+ * configure.ac (sparc-*-*): Recognise t5 along with t3 and t4.
+ Remove sparc64/ultrasparct1 from path_64 for T3, T3, and T5.
-2012-02-06 Niels Möller <nisse@lysator.liu.se>
+2013-04-27 Mike Frysinger <vapier@gentoo.org>
- * mpn/generic/hgcd.c (hgcd_matrix_update_q): Fixed carry handling
- bug.
+ * configure.ac (arm*-*-*): Set up path also for plainest CPU variants.
- * tests/mpz/t-gcd.c (main): Omit tests with urandomb operands.
- * tests/mpn/t-hgcd.c (main): Likewise.
+2013-03-19 Torbjorn Granlund <tege@gmplib.org>
-2012-02-05 Niels Möller <nisse@lysator.liu.se>
+ * tests/arm32check.c: Get printing of clobbered register right.
- * tests/mpz/t-gcd.c (main): Add tests with rrandomb operands.
- * tests/mpn/t-hgcd.c (main): Likewise.
+ * tests/Makefile.am (EXTRA_libtests_la_SOURCES): Add arm32call.asm and
+ arm32check.c.
- * mpn/generic/gcdext_subdiv_step.c (mpn_gcdext_subdiv_step):
- Bugfix, in u1 += q * u0, handle carry in all cases. Also normalize
- the product q * u0.
+2013-03-18 Torbjorn Granlund <tege@gmplib.org>
-2012-02-04 Marco Bodrato <bodrato@mail.dm.unipi.it>
+ * configure.ac (arm*-*-*): Define CALLING_CONVENTIONS_OBJS.
- * tests/refmpn.c (refmpn_mul): More conservative allocations.
+ * tests/arm32call.asm: New file.
+ * tests/arm32check.c: New file.
-2012-02-03 Torbjorn Granlund <tege@gmplib.org>
+ * mpn/arm/arm-defs.m4 (LEA): Rewrite to properly handle repeated use.
+ (EPILOGUE_cpu): Define.
- * mpn/x86_64/bd1/gmp-mparam.h: New file.
+ * mpn/x86/darwin.m4 (m4append): Move definition from here...
+ * mpn/asm-defs.m4: ...to here.
- * longlong.h (udiv_qrnnd from sdiv_qrnnd): Declare udiv_w_sdiv.
+2012-03-17 Marc Glisse <marc.glisse@inria.fr>
- * mpn/generic/udiv_w_sdiv.c: Use c89 function header.
+ * tests/cxx/t-do-exceptions-work-at-all-with-this-compiler.cc: New file.
+ * tests/cxx/Makefile.am: Add new file. Reorder the tests.
-2012-02-02 Marco Bodrato <bodrato@mail.dm.unipi.it>
+2013-03-11 Torbjorn Granlund <tege@gmplib.org>
- * mpn/generic/toom_interpolate_16pts.c: Correct an unlikely 32-bit bug.
+ * tests/mpz/t-powm_ui.c: Test larger arguments.
-2012-02-02 Torbjorn Granlund <tege@gmplib.org>
+ * mpz/powm_ui.c (mod): Adhere to mpn_mu_div_qr's overlap requirements.
- * mpn/generic/toom63_mul.c: Allow s+t==n by adjusting an ASSERT.
- * mpn/generic/toom_interpolate_8pts.c: Perform final incr iff s+t!=n.
+2013-02-25 Niels Möller <nisse@lysator.liu.se>
- * tests/mpn/t-toom6h.c (MIN_BN): Make more consistent with ASSERT in
- tested function.
+ * mini-gmp/tests/t-double.c (testmain): Declare double variables
+ as volatile, to drop extended precision.
-2012-02-01 Torbjorn Granlund <tege@gmplib.org>
+ * mini-gmp/tests/testutils.c (testfree): New function. Use it
+ everywhere where test programs deallocate storage allocated via
+ the mini-gmp allocation functions, including uses of mpz_get_str
+ for various test failure messages.
- * tests/mpn/t-mul.c: New file.
- * tests/mpn/Makefile.am: Compile it.
+2013-02-20 Niels Möller <nisse@lysator.liu.se>
-2012-01-31 Torbjorn Granlund <tege@gmplib.org>
+ * tests/mpq/t-get_d.c (check_random): Rewrote to make test less
+ dependent on float operations. Fixes problem with m68k-linux and
+ extended float precision.
- * mpn/generic/powm_sec.c (SQR_BASECASE_LIM): New name for
- SQR_BASECASE_MAX.
- (SQR_BASECASE_LIM, fat variant): Define to read __gmpn_cpuvec.
- (SQR_BASECASE_LIM, native variant): Define to SQR_TOOM2_THRESHOLD
- straight, without arithmetic.
- (mpn_local_sqr): Use BELOW_THRESHOLD as per Marco's suggestion.
+2013-02-19 Marco Bodrato <bodrato@mail.dm.unipi.it>
-2012-01-30 Torbjorn Granlund <tege@gmplib.org>
+ * mini-gmp/mini-gmp.c: Move asserts to work-around a compiler bug.
- * tests/mpz/t-powm.c: Ensure all sizes are seen.
+ * mini-gmp/tests/t-reuse.c: Fix typo causing the same negation
+ condition to be applied to all operands. (See 2013-02-03, Torbjorn)
-2012-01-27 Torbjorn Granlund <tege@gmplib.org>
+2013-02-17 Marc Glisse <marc.glisse@inria.fr>
- * Version 5.0.3 released.
+ * cxx/osdoprnti.cc: Use <stdarg.h> and <string.h> rather than <cstdarg>
+ and <cstring> (revert 2002-12-21).
- * Upgrade to libtool 2.4.2.
+ * tests/cxx/Makefile.am: Link with libm.
+ * tests/cxx/t-ops2.cc: Comment about more tests. Use <math.h> rather
+ than <cmath> and using namespace. Don't include <iostream>.
-2012-01-25 Torbjorn Granlund <tege@gmplib.org>
+2013-02-16 Marc Glisse <marc.glisse@inria.fr>
- * tune/tuneup.c: Remove unused tuneup variables.
+ * gmpxx.h: Include <algorithm>.
-2012-01-23 Torbjorn Granlund <tege@gmplib.org>
+2013-02-16 Torbjorn Granlund <tege@gmplib.org>
- * mpn/powerpc64/mode64/p6/gmp-mparam.h: New file.
- * mpn/powerpc64/mode64/p7/gmp-mparam.h: New file.
- * mpn/x86_64/bobcat/gmp-mparam.h: New file.
+ * mpn/x86_64/x86_64-defs.m4 (PROTECT): Emit '.hidden' instead of
+ '.protected" to please Sun's assembler, but also for semantic reasons.
-2012-01-18 Marc Glisse <marc.glisse@inria.fr>
+2013-02-10 Torbjorn Granlund <tege@gmplib.org>
- * doc/gmp.texi (mpf_class::mpf_class): Use mp_bitcnt_t.
+ * Version 5.1.1 released.
-2012-01-17 Torbjorn Granlund <tege@gmplib.org>
+2013-02-07 Marco Bodrato <bodrato@mail.dm.unipi.it>
- * gmp-h.in (__GNU_MP_VERSION_PATCHLEVEL): Bump.
+ * tune/speed.h (SPEED_ROUTINE_MPN_MUL): Use operands from struct s.
+ * tune/README: Document new parameter syntax mpn_mul.<#> .
- * Makefile.am (LIBGMP_LT_*, LIBGMPXX_LT_*, LIBMP_LT_*):
- Bump version info.
+2013-02-06 Niels Möller <nisse@lysator.liu.se>
- * configure.in: Add ultrasparc T4 support.
+ * tests/mpz/t-jac.c (check_large_quotients): Rewrote. Now uses a
+ more efficient method for generating the test inputs.
- * demos/isprime.c (main): Run 25 millerrabin tests.
+2013-02-05 Torbjorn Granlund <tege@gmplib.org>
-2012-01-15 Niels Möller <nisse@lysator.liu.se>
+ * tests/mpn/t-div.c: Limit random dbits to avoid an infinite loop.
- * mpz/scan0.c (mpz_scan0): Use ~(mp_bitcnt_t) 0, rather than
- ULONG_MAX, when returning "infinity".
- * mpz/scan1.c (mpz_scan1): Likewise.
+2013-02-03 Torbjorn Granlund <tege@gmplib.org>
-2011-12-30 Torbjorn Granlund <tege@gmplib.org>
+ * tests/mpz/reuse.c: Fix typo causing the same negation condition to be
+ applied to all operands. Fix condition for when to invoke mpz_remove.
+ Make different-size random operands.
- * mpz/hamdist.c: Fix typo in a return statement.
+2013-02-02 Marco Bodrato <bodrato@mail.dm.unipi.it>
-2011-12-08 Torbjorn Granlund <tege@gmplib.org>
+ * mpz/remove.c: Correct the sign in case of reuse.
- * mpn/generic/powm_sec.c: Handle fat binaries better.
+2013-02-01 Torbjorn Granlund <tege@gmplib.org>
-2011-12-07 Torbjorn Granlund <tege@gmplib.org>
+ * gmp-impl.h (DIGITS_IN_BASE_PER_LIMB): Add a cast.
+ (LIMBS_PER_DIGIT_IN_BASE): Likewise.
- * configure.in: Fix typo making HAVE_NATIVE_mpn_X fail for fat
- functions.
+ * tests/refmpn.c (refmpn_mul): Use toom6h instead of toom44 for the
+ largest operands.
- * mpn/x86_64/fat/fat.c (__gmpn_cpuvec_init): Add a missing break.
+2013-01-31 Torbjorn Granlund <tege@gmplib.org>
-2011-12-01 Torbjorn Granlund <tege@gmplib.org>
+ * mpn/generic/toom44_mul.c: Revert last change in favour of a simple
+ change (thanks Marco!).
+ * mpn/generic/toom4_sqr.c: Likewise.
- * mpn/x86_64/fat/fat.c: Copy fake cpuid code from x86/fat/fat.c.
+2013-01-30 Torbjorn Granlund <tege@gmplib.org>
- * gmp-impl.h (DECL_divexact_1): Fix typo in return type.
+ * mpn/generic/toom44_mul.c (MAYBE_mul_toom44): Take toom6h and toom8h
+ into account, using new macro MUL_NEXTALG_THRESHOLD.
+ * mpn/generic/toom4_sqr.c (MAYBE_sqr_toom4): Likewise.
-2011-11-28 Torbjorn Granlund <tege@gmplib.org>
+2013-01-26 Marco Bodrato <bodrato@mail.dm.unipi.it>
- * mpn/generic/udiv_w_sdiv.c: Use CNST_LIMB for some constants.
+ * mpz/remove.c: init+set=init_set, cast before shifting.
-2011-11-25 Torbjorn Granlund <tege@gmplib.org>
+ * mpz/cmp_si.c: Use ABS_CAST.
- * configure.in: Overhaul x86/x86_64 support, merging three case
- statements into one.
+2013-01-26 Torbjorn Granlund <tege@gmplib.org>
-2011-11-24 Torbjorn Granlund <tege@gmplib.org>
+ * tests/mpn/logic.c: Set things up to always test library logops, not
+ gmp-impl.h's inlined variants. Test also mpn_com.
- * doc/gmp.texi (Formatted Output Strings): Clarify rules for mpf_t
- precision.
+ * tests/mpn/t-mod_1.c: Test also mpn_mod_1s_3p.
-2011-11-21 Torbjorn Granlund <tege@gmplib.org>
+ * mpn/generic/mod_1_3.c: Swap some lines to make it similar to mod_4.c.
- * gmp-h.in (__GNU_MP_RELEASE): Renamed from typo name.
+ * tests/mpz/reuse.c: Fix typo in last change.
-2011-11-20 Torbjorn Granlund <tege@gmplib.org>
+2013-01-23 Marco Bodrato <bodrato@mail.dm.unipi.it>
- * configure.in: Split x86 CPUs into more subtypes for more accurate
- passing of gcc flags.
+ * mini-gmp/mini-gmp.c (mpz_cmpabs_d, mpz_cmp_d): Simplify.
+ (mpz_set_str): Behaviour more adherent to the real GMP.
- * configure.in: Pass -m32 for powerpc64 with abi=32, using via _maybe
- mechanism.
+ * mini-gmp/tests/t-str.c: Cast size_t to unsigned long, for printf.
+ * mini-gmp/tests/t-import.c: Likewise.
+ * mini-gmp/tests/t-comb.c: Remove an unused var.
+ * mini-gmp/tests/t-div.c: Remove unused args passed to fprintf.
+ * mini-gmp/tests/t-double.c: Use float immediates with float vars.
-2011-11-15 Torbjorn Granlund <tege@gmplib.org>
+2013-01-22 Torbjorn Granlund <tege@gmplib.org>
- * mpn/generic/powm_sec.c (mpn_local_sqr): Remove forgotten TMP_* calls.
- (redcify): Likewise.
- (mpn_powm_sec): Likewise.
+ * Makefile.am (LIBGMP_LT_*, LIBGMPXX_LT_*): Bump version info.
+ * gmp-h.in: Bump version.
- * mpn/generic/powm_sec.c (mpn_powm_sec): Use mpn_tabselect also in
- initialisation.
+ * tests/mpz/reuse.c: Delete always zero 'failures' and code depending
+ on it. Replace rotating progress with real measure.
-2011-10-15 Torbjorn Granlund <tege@gmplib.org>
+ * Makefile.am (check-mini-gmp): Fix typo in last change.
- * configure.in (s390): Rewrite support to handle known CPUs.
- * config.guess: Recognise s390 CPUs.
- * config.sub: Match s390 CPUs.
- * acinclude.m4 (S390_PATTERN, S390X_PATTERN): New defines.
+2013-01-22 Niels Möller <nisse@lysator.liu.se>
-2011-10-14 Torbjorn Granlund <tege@gmplib.org>
+ * mini-gmp/mini-gmp.c (mpz_cmp_d): Simplified, just sort out
+ signs, then call mpz_cmpabs_d.
- From Per Olofsson:
- * mpn/generic/popham.c: Add __GMP_NOTHROW to make it match gmp.h.
+ * mini-gmp/tests/testutils.h: Include stdio.h and stdlib.h.
+ (numberof): New define.
- * configure.in: AC_DEFINE HAVE_HOST_CPU_s390_zarch.
- * longlong.h (s390): Use it.
- (s390 umul_ppmm): Fix typo in pure C variant.
+ * mini-gmp/tests/t-cmp_d.c: New file, copied from
+ tests/mpz/t-cmp_d.c with minor changes.
+ * mini-gmp/tests/Makefile (CHECK_PROGRAMS): Added t-cmp_d,
-2011-10-13 Torbjorn Granlund <tege@gmplib.org>
+ * mini-gmp/mini-gmp.c (mpz_cmpabs_d): New function.
+ * mini-gmp/mini-gmp.h: Declare it.
- * longlong.h (s390): Put back an accidentally deleted #else.
+2013-01-21 Niels Möller <nisse@lysator.liu.se>
- * configure.in (s390): Unset extra_functions for s390x.
+ * mini-gmp/tests/t-str.c (testmain): Test mpz_out_str, using
+ the tmpfile function for i/o.
-2011-10-12 Torbjorn Granlund <tege@gmplib.org>
+2013-01-20 Torbjorn Granlund <tege@gmplib.org>
- * longlong.h (s390 umul_ppmm): With new-enough gcc, avoid asm.
+ * Makefile.am (check-mini-gmp): Set also DYLD_LIBRARY_PATH for the
+ benefit of Darwin.
- From Andreas Krebbel:
- * longlong.h (s390 umul_ppmm): Support 32-bit limbs with gcc using
- 64-bit registers.
- (s390 udiv_qrnnd): Likewise.
+ * tests/mpn/t-div.c: Test mpn_sb_div_qr_sec and mpn_sb_div_r_sec.
+ (main): Separate divisor into normalised (dnp) and unnormalised (dup),
+ pass appropriate variant to each function.
+ (main): Make negative `test' index value mean divisor bits, for better
+ small operands coverage.
+ (main): Put random junk at qp[] instead of zeroing.
-2011-10-11 Torbjorn Granlund <tege@gmplib.org>
+ * tests/mpz/t-remove.c: Back out last change which left `divisor_size'
+ uninitialised; achieve change's aim with a parameter tweak.
- * configure.in (s390x): Pass -mzarch to gcc in 32-bit mode.
+2013-01-20 Marco Bodrato <bodrato@mail.dm.unipi.it>
- * longlong.h (s390x): Add __CLOBBER_CC for relevant asm patterns.
+ * mini-gmp/tests/testutils.c (testhalves): New function, test default
+ memory functions.
+ * mini-gmp/tests/testutils.h (testhalves): Declare it
+ * mini-gmp/tests/t-logops.c: Use testhalves.
-2011-10-10 Torbjorn Granlund <tege@gmplib.org>
+ * mini-gmp/mini-gmp.c (mpz_init_set_str): New function.
+ * mini-gmp/mini-gmp.h (mpz_init_set_str): Declare it.
+ * mini-gmp/tests/t-str.c: Test mpz_init_set_str.
- From Marco Trudel:
- * tests/mpz/t-scan.c (check_ref): Fix loop end bound.
+2013-01-20 Torbjorn Granlund <tege@gmplib.org>
-2011-10-09 Torbjorn Granlund <tege@gmplib.org>
+ * tests/memory.c (PTRLIMB): New macro, used for conformant casting.
- * longlong.h (s390x): Put back UDItype casts to make gcc reloading use
- right more for constants.
- (s390x count_leading_zeros): Disable until we support z10 specifically.
- (s390x add_ssaaaa): Remove algsi/slgsi until we support z10.
+2013-01-19 Marco Bodrato <bodrato@mail.dm.unipi.it>
-2011-10-07 Torbjorn Granlund <tege@gmplib.org>
+ * mini-gmp/tests/t-double.c (testmain): Get the current free
+ function using mp_get_memory_functions.
+ * mini-gmp/tests/t-str.c (testmain): Likewise.
- * longlong.h (s390): Add 32-bit zarch umul_ppmm and udiv_qrnnd.
- (s390): Overhaul 32-bit and 64-bit code.
+ * mini-gmp/tests/testutils.h (tu_free): Remove declaration.
-2011-10-04 Torbjorn Granlund <tege@gmplib.org>
+ * mini-gmp/tests/testutils.c (block_check, tu_free): Mark static.
- * mpn/Makefile.am (TARG_DIST): Add s390_32 and s390_64, remove s390.
+ * tests/mpz/t-set_str.c: Check also failing conditions.
- * doc/gmp.texi (Custom Allocation): Rephrase a paragraph.
+ * tests/mpz/t-remove.c: Test removal of 1.
- * demos/factorize.c: Run 25 Miller-Rabin tests.
+2013-01-18 Niels Möller <nisse@lysator.liu.se>
- * mpz/nextprime.c: Run 25 mpz_millerrabin tests (was 10).
+ * mini-gmp/tests/t-str.c (test_small): New function, exercising
+ parsing of whitespace and base prefixes.
+ (testmain): Call it.
-2011-10-03 Torbjorn Granlund <tege@gmplib.org>
+ * mini-gmp/tests/t-gcd.c (gcdext_valid_p): Fixed memory leak.
- * configure.in: Support s390x.
+ * mini-gmp/tests/t-double.c (testmain): Call tu_free rather than
+ free, for storage allocated by mpz_get_str.
+ * mini-gmp/tests/t-str.c (testmain): Likewise.
- * longlong.h: Add support for 64-bit s390x.
+ * mini-gmp/tests/testutils.c (block_init, block_check): New
+ functions.
+ (tu_alloc, tu_realloc, tu_free): New functions.
+ (main): Use mp_set_memory_functions.
+ * mini-gmp/tests/testutils.h (tu_free): Declare.
- * mpn/s390_64: New directory.
- * mpn/s390_32: Directory renamed from mpn/s390.
+ * mini-gmp/tests/testutils.h: New file, declarations for test
+ programs.
-2011-09-26 Torbjorn Granlund <tege@gmplib.org>
+ * mini-gmp/tests/testutils.c (main): New file, with shared main
+ function for all the test programs. Also includes mini-gmp.c.
+ Calls testmain after initialization. All other test programs
+ updated to define testmain rather than main.
- * mpn/sh/sh2/submul_1.s: Make this old submul_1 implementation
- actually compute intended function.
+2013-01-18 Marco Bodrato <bodrato@mail.dm.unipi.it>
-2011-09-25 Torbjorn Granlund <tege@gmplib.org>
+ * mini-gmp/tests/t-signed.c: Slightly larger coverage.
+ * mini-gmp/tests/t-double.c: Test also mpz_init_set_d.
- * mpn/sh: Migrate files to '.asm'.
- * configure.in: Recognise sh3 and sh4.
+2013-01-18 Torbjorn Granlund <tege@gmplib.org>
-2011-08-18 Torbjorn Granlund <tege@gmplib.org>
+ * mpn/generic/set_str.c (normalization_steps): Eliminate set-but-unused
+ variable.
- * printf/doprntf.c (__gmp_doprnt_mpf): For DOPRNT_CONV_FIXED, ask for
- one more digit.
+ * mini-gmp/tests/t-div.c: Test mpz_divisible_p and mpz_divisible_ui_p.
-2011-08-17 Torbjorn Granlund <tege@gmplib.org>
+ * tests/tests.h (TESTS_REPS): Fix printf argument type clashes.
- * mpf/sub.c: Fix typo in copy condition. Delay an allocation.
+ * mini-gmp/tests/t-div.c: Test also mpz_mod, mpz_mod_ui. Compare
+ mpz_divisible_p just to ceil, to save time.
-2011-08-10 Torbjorn Granlund <tege@gmplib.org>
+ * mini-gmp/mini-gmp.c: Prefix some names with GMP_.
- * tests/rand/t-lc2exp.c (check_bigc): Call abort after reporting error.
+2013-01-16 Marco Bodrato <bodrato@mail.dm.unipi.it>
-2011-07-15 Torbjorn Granlund <tege@gmplib.org>
+ * mini-gmp/tests/t-double.c: Test mpz_cmp_d.
+ * mini-gmp/mini-gmp.c (mpz_cmp_d): Correct multiword comparison.
- * mpn/arm/invert_limb.asm: Swap around some registers to silence 'as'
- warnings.
+ * mini-gmp/mini-gmp.c (mpz_set_str): Handle the empty string.
+ * mini-gmp/tests/t-str.c: Test base <= 0.
-2011-07-14 Torbjorn Granlund <tege@gmplib.org>
+2013-01-15 Niels Möller <nisse@lysator.liu.se>
- * mpn/generic/dcpi1_bdiv_q.c (mpn_dcpi1_bdiv_q): Get mpn_sub_1 size
- argument right.
+ * mini-gmp/tests/t-str.c (main): Use x->_mp_d rather than x[0]._mp_d.
+ * mini-gmp/tests/t-invert.c (main): Likewise.
-2011-07-04 Torbjorn Granlund <tege@gmplib.org>
+ * mini-gmp/tests/t-mul.c (main): Test mpn_mul_n and mpn_sqr.
- * tests/misc/t-locale.c: Disable test for mingw.
+ * mini-gmp/tests/hex-random.h (enum hex_random_op): New value
+ OP_SQR.
- * configure.in (x86_64 *-*-mingw*): Handle also cygwin here; clear out
- extra_functions_64.
+ * mini-gmp/tests/mini-random.c (mini_random_op3): Renamed, from...
+ (mini_random_op): ... old name. Updated callers.
+ (mini_random_op2): New function.
-2011-07-02 Torbjorn Granlund <tege@gmplib.org>
+ * mini-gmp/tests/hex-random.c (hex_random_op3): Renamed, from...
+ (hex_random_op): ... old name. Updated callers.
+ (hex_random_op2): New function.
- * config.guess: Don't print newline in x86 cpuid function.
- Rewrite x86-64 cpu recognition asm code to work under Windoze.
+2013-01-15 Marco Bodrato <bodrato@mail.dm.unipi.it>
-2011-06-16 Torbjorn Granlund <tege@gmplib.org>
+ * mini-gmp/tests/t-logops.c: Improve popcount/hamdist testing.
+ * mini-gmp/tests/t-signed.c: Test more cases.
- * acinclude.m4 (GMP_ASM_RODATA): Fix typo in 2011-04-10 change.
+2013-01-15 Torbjorn Granlund <tege@gmplib.org>
- * configure.in: Surround tr ranges with [] for portability.
+ From Mike Frysinger:
+ * configure.ac: Add x32 ABI for x86_64.
-2011-05-08 Marc Glisse <marc.glisse@inria.fr>
+2013-01-14 Niels Möller <nisse@lysator.liu.se>
- * doc/gmp.texi (gmp_randclass::get_f): Replace unsigned long
- with mp_bitcnt_t.
+ * mini-gmp/tests/t-str.c (main): Added tests for mpn_get_str and
+ mpn_set_str.
-2011-05-07 Torbjorn Granlund <tege@gmplib.org>
+2013-01-14 Marco Bodrato <bodrato@mail.dm.unipi.it>
- * Version 5.0.2 released.
+ * doc/gmp.texi (gmp_version): Remove "was used" repetition.
+ (Upward compatibility): Mention mpn_bdivmod, GMP 4 -> GMP 5.
- * gmp-h.in (__GNU_MP_VERSION_PATCHLEVEL): Bump.
+2013-01-13 Marc Glisse <marc.glisse@inria.fr>
- * Makefile.am (LIBGMP_LT_*, LIBGMPXX_LT_*, LIBMP_LT_*):
- Bump version info.
+ * doc/gmp.texi: Let mpn_sqrtrem reference mpn_perfect_square_p instead
+ of mpz_perfect_square_p.
-2011-05-05 Marc Glisse <marc.glisse@inria.fr>
+2013-01-11 Marco Bodrato <bodrato@mail.dm.unipi.it>
- [These changes were made after the 5.0.2 release, but inserted here to
- match the change chronology of the main repository.]
+ * mini-gmp/tests/t-comb.c: New test program, testing both
+ mpz_fac_ui and mpz_bin_uiui.
+ * mini-gmp/tests/Makefile (CHECK_PROGRAMS): Added t-comb.
- * mpn/x86_64/fat/fat.c: Update for Sandy Bridge.
- * config.guess: warning to keep it in sync with fat.c.
+ * mini-gmp/mini-gmp.c (mpz_mul_si): Simplify.
+ (mpz_mul_ui, mpz_mul, mpz_div_qr): Replace init+REALLOC with init2.
-2011-05-05 Torbjorn Granlund <tege@gmplib.org>
+ * mini-gmp/mini-gmp.c (NEG_CAST): New macro.
+ (mpz_mul_si, mpz_set_si, mpz_cmp_si): Use NEG_CAST.
- * mpn/x86_64/fat/fat_entry.asm: (PIC_OR_DARWIN): New symbol. Use it to
- work around Darwin problems.
+ * mini-gmp/mini-gmp.c (mpz_set_si, mpz_cmp_si): Simplify by using
+ the _ui variant.
-2011-05-02 Marc Glisse <marc.glisse@inria.fr>
+ * mini-gmp/tests/t-root.c: Use mpz_ui_pow_ui, when base fits an ui.
- * configfsf.guess: Update to version of 2011-02-02.
- * configfsf.sub: Update to version of 2011-03-23.
+ * mini-gmp/tests/t-mul.c: Test also mpz_mul_si.
+ * mini-gmp/tests/t-sub.c: Test also mpz_ui_sub.
-2011-04-30 Marc Glisse <marc.glisse@inria.fr>
+ * mini-gmp/mini-gmp.c (mpz_fits_slong_p): Correct range.
+ * mini-gmp/tests/t-signed.c: New test program, for get/set/cmp_si.
+ * mini-gmp/tests/Makefile (CHECK_PROGRAMS): Added t-signed.
- * gmp-h.in (mpz_cdiv_q_2exp): Use mp_bitcnt_t to match the definition
- and the documentation.
- (mpz_remove): Likewise.
- (mpf_eq): Likewise.
+ * mini-gmp/mini-gmp.c (mpz_hamdist): Handle different sizes.
+ * mini-gmp/tests/t-logops.c: Test also popcount and hamdist.
- * ltmain.sh: Remove.
- * .bootstrap: Let libtoolize generate ltmain.sh.
+2013-01-10 Marco Bodrato <bodrato@mail.dm.unipi.it>
- * doc/gmp.texi (mpf_urandomb): Explicit the fact that it does not
- change the precision.
+ * mpz/export.c: Less restrictive ASSERTs.
+ * mini-gmp/mini-gmp.c (mpz_export, mpz_import): Likewise.
+ * mini-gmp/tests/t-import.c: Test also size=0 or count=0.
-2011-04-28 Torbjorn Granlund <tege@gmplib.org>
+2013-01-10 Torbjorn Granlund <tege@gmplib.org>
- [This change was made after the 5.0.2 release, but inserted here to
- match the change chronology of the main repository.]
+ * mini-gmp/tests/t-import.c (main): Don't drop off functon end.
- * configure.in (x86_64): Support bobcat specifically.
- (x86): Match bobcat and bulldozer, handle like k10.
+ * Makefile.am (check-mini-gmp): Set LD_LIBRARY_PATH to allow testing
+ with dynamic main GMP build.
-2011-04-27 Torbjorn Granlund <tege@gmplib.org>
+2013-01-09 Marco Bodrato <bodrato@mail.dm.unipi.it>
- * tune/speed.h (speed_cyclecounter): Always use PIC variant when
- compiled with Apple's GCC.
+ * mini-gmp/mini-gmp.c (mpz_export): Support op=0 countp=NULL.
-2011-04-26 Torbjorn Granlund <tege@gmplib.org>
+2013-01-08 Niels Möller <nisse@lysator.liu.se>
- * mpn/sparc32/sparc-defs.m4 (changecom): Don't redefine '!' as it
- interferes with expressions.
+ * mini-gmp/tests/t-import.c: New test program, testing both
+ mpz_import and mpz_export.
+ * mini-gmp/tests/Makefile (CHECK_PROGRAMS): Added t-import.
-2011-04-10 Niels Möller <nisse@lysator.liu.se>
+ * mini-gmp/tests/mini-random.c (mini_rrandomb_export): New
+ function.
+ * mini-gmp/tests/mini-random.h: Declare it.
+ * mini-gmp/tests/hex-random.c (hex_rrandomb_export): New function.
+ * mini-gmp/tests/hex-random.h: Declare it.
- [This change was made after the 5.0.2 release, but inserted here to
- match the change chronology of the main repository.]
+ * mini-gmp/mini-gmp.c (mpz_export): Compute accurate word count up
+ front, to avoid generating any high zero words.
- * configure.in: Add invert_limb_table to extra_functions_64 on
- x86_64.
+2013-01-07 Marco Bodrato <bodrato@mail.dm.unipi.it>
-2011-04-10 Torbjorn Granlund <tege@gmplib.org>
+ * mini-gmp/README: Document base limitation for conversions.
+ * mini-gmp/mini-gmp.c (mpz_set_str): Remove goto.
+ (mpz_import, mpz_export): Correctly use order/endianess.
- * acinclude.m4 (GMP_ASM_RODATA): Make 'foo' larger to avoid clang
- problems.
+2013-01-05 Torbjorn Granlund <tege@gmplib.org>
-2011-03-28 Torbjorn Granlund <tege@gmplib.org>
+ * longlong.h (aarch64): Make add_ssaaaa and sub_ddmmss actually work.
- * mpn/x86/invert_limb.asm: Protect movzwl register parameters from
- being interpreted as m4 macro parameters.
+2013-01-04 Marco Bodrato <bodrato@mail.dm.unipi.it>
-2011-03-21 Torbjorn Granlund <tege@gmplib.org>
+ From shuax:
+ * mini-gmp/mini-gmp.c (mpz_import): Reset limb after storing it.
- * configure.in (hppa): Under linux, treat 64-bit processors as if they
- were 32-bit processors.
+2013-01-04 Torbjorn Granlund <tege@gmplib.org>
-2011-03-15 Marco Bodrato <bodrato@mail.dm.unipi.it>
+ From Marko Lindqvist:
+ * configure.ac: Use AC_CONFIG_HEADERS instead of the obsolete
+ AM_CONFIG_HEADER.
- * mpn/generic/toom_interpolate_16pts.c: Remove ambiguity.
+2013-01-02 Marco Bodrato <bodrato@mail.dm.unipi.it>
-2011-03-12 Torbjorn Granlund <tege@gmplib.org>
+ * tests/mpz/bit.c: Wider testing for mpz_combit.
+ * tests/mpz/logic.c: Check the -2^n case.
- * tune/powerpc.asm: Use powerpc syntax, not power syntax.
+ * mpz/ior.c: Fixed an allocation bug in the -2^n case.
-2011-03-09 Marc Glisse <marc.glisse@inria.fr>
+2012-12-31 Torbjorn Granlund <tege@gmplib.org>
- * doc/gmp.texi: Remove void return type from constructors. Document
- explicit constructors. Document mpf_class::mpf_class(mpf_t).
+ * mpn/generic/get_d.c: Minor reorg, add vax D code.
-2011-02-24 Torbjorn Granlund <tege@gmplib.org>
+ * gmp-impl.h (double_extract): New union type for vax D floats.
- * mpn/x86/p6/sse2/mod_1_4.asm: Fix typo in MULFUNC_PROLOGUE.
+ * tests/mpq/t-get_d.c (check_random): Limit exponents on vax.
-2011-02-04 Torbjorn Granlund <tege@gmplib.org>
+2012-12-30 Marco Bodrato <bodrato@mail.dm.unipi.it>
- * mpn/x86_64/core2/popcount.asm: Add a MULFUNC_PROLOGUE.
- * mpn/x86_64/pentium4/popcount.asm: Likewise.
+ * tests/mpz/bit.c (check_clr_extend): Check _set shrink.
-2011-01-31 Torbjorn Granlund <tege@gmplib.org>
+2012-12-29 Torbjorn Granlund <tege@gmplib.org>
- [These changes were made after the 5.0.2 release, but inserted here to
- match the change chronology of the main repository.]
+ * demos/calc/calc.c: Remove generated file from repo.
+ * demos/calc/calc.h: Likewise.
+ * demos/calc/calclex.c: Likewise.
- * config.guess: Recognise new Intel processors.
+2012-12-27 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/generic/get_d.c: Complete rewrite of non-IEEE code.
+
+ * tests/mpq/t-get_d.c (main): Suppress check_random for vax.
+
+2012-12-25 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/bdiv_q_1.asm: Use LEA for binvert_limb_table.
+
+2012-12-23 Torbjorn Granlund <tege@gmplib.org>
+
+ * tests/mpz/t-get_d.c (check_onebit): Decrease vax limit to avoid
+ overflow in last, unused 'want' value.
+
+ * config.guess: Recognise AMD family 22 as a future bobcat.
+
+2012-12-21 Torbjorn Granlund <tege@gmplib.org>
+
+ * configure.ac: Rename configure.in.
+
+2012-12-17 Torbjorn Granlund <tege@gmplib.org>
+
+ * Version 5.1.0 released.
+
+ * configure.in (none-*-*): Allow this again, but print a warning.
+
+2012-12-17 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpz/n_pow_ui.c: Fix typos in an ASSERT.
+
+2012-12-16 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/generic/mu_div_qr.c (mpn_preinv_mu_div_qr): Explicitly use
+ MPN_COPY_INCR for slightly overlapping copy.
+
+2012-12-15 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * tests/mpn/toom-sqr-shared.h: Skip ALLOCs if the test is skipped.
+
+2012-12-13 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/dos64.m4 (PIC): Move definition early.
+ (JMPENT): Remove PIC variant.
+
+ * mpn/x86_64/darwin.m4 (JUMPTABSECT): Define to .text, instead of
+ something sensible.
+
+2012-12-12 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/x86_64-defs.m4 (JMPENT): New macro.
+ * mpn/x86_64/dos64.m4: Likewise.
+ * mpn/x86_64/darwin.m4: Likewise.
+ * mpn/x86_64/mod_34lsub1.asm: Use JMPENT to properly support PIC.
+ * mpn/x86_64/mullo_basecase.asm: Likewise.
+ * mpn/x86_64/sqr_basecase.asm: Likewise.
+
+2012-12-11 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/mod_34lsub1.asm: Try different jump table for the benefit
+ of broken Apple linkers.
+
+2012-12-09 Torbjorn Granlund <tege@gmplib.org>
+
+ * configure.in: Make GMP_NONSTD_ABI ABI specific.
+
+2012-12-08 Torbjorn Granlund <tege@gmplib.org>
+
+ * Makefile.am (LIBGMP_LT_*, LIBGMPXX_LT_*): Bump version info.
+ * gmp-h.in: Bump version.
+
+2012-12-06 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * tests/mpq/reuse.c: New test (adapted from mpf/reuse.c).
+ * tests/mpq/Makefile.am (check_PROGRAMS): Add reuse.
+
+ * mpz/abs.c: Use NEWALLOC.
+ * mpz/neg.c: Likewise.
+ * mpz/com.c: Reduce branches.
+
+2012-12-05 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/generic/brootinv.c (mpn_brootinv): Make valgrind happier, at
+ the cost of a redundant MPN_ZERO.
+
+ * mpz/jacobi.c (mpz_jacobi): Check for asize == 0 or bsize == 0
+ before using the low limbs.
+
+2012-12-05 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/generic/set_str.c (mpn_dc_set_str): Work around a valgrind issue.
+
+ * mpz/powm_ui.c: Don't assume >= 2 limbs in mod argument.
+
+ * tests/tests.h (TESTS_REPS): Handle float GMP_CHECK_REPFACTOR.
+
+ * longlong.h: Refine cpp test for vax.
+ * tests/mpn/t-get_d.c: Likewise.
+ * tests/mpz/t-get_d.c: Likewise.
+ * tests/mpz/t-cmp_d.c: Likewise.
+ * tests/mpz/t-get_d.c: Likewise.
+ * tests/mpq/t-get_d.c: Likewise.
+ * tests/mpf/t-get_d.c: Likewise.
+
+2012-11-30 Torbjorn Granlund <tege@gmplib.org>
+
+ * gen-fac.c (gen_consts): Correct printf types.
+
+ * mpn/arm/v7a/cora15/gmp-mparam.h: New file.
+
+ * configure.in (arm*-*-*): New compiler optional "tune". Pass value for
+ selected processors. Add more specific path components.
+
+2012-11-29 Torbjorn Granlund <tege@gmplib.org>
+
+ From Andoni Morales Alastruey:
+ * longlong.h: Conditionalise ARM asm on !__thumb__.
+
+2012-11-28 Torbjorn Granlund <tege@gmplib.org>
+
+ * config.guess (arm*-*-*): Support specific ARM processors.
+ * config.sub: Match arm CPUs.
+ * configure.in (arm*-*-*): Likewise.
+
+ * mpz/powm.c: Move new_b out since it lives on through b.
+
+ * configure.in (arm*-*-*): Pass -marm to deal with compilers defaulting
+ to thumb code.
+
+2012-11-26 Torbjorn Granlund <tege@gmplib.org>
+
+ * tests/cxx/t-ops2.cc (checkz): Reduce huge numbers to avoid vax
+ overflow.
+
+2012-11-25 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/generic/get_d.c: Reinsert non-IEEE code.
+
+ * mpn/vax/add_n.asm: New file.
+ * mpn/vax/add_n.s: Remove.
+ * mpn/vax/addmul_1.asm: New file.
+ * mpn/vax/addmul_1.s: Remove.
+ * mpn/vax/lshift.asm: New file.
+ * mpn/vax/lshift.s: Remove.
+ * mpn/vax/mul_1.asm: New file.
+ * mpn/vax/mul_1.s: Remove.
+ * mpn/vax/rshift.asm: New file.
+ * mpn/vax/rshift.s: Remove.
+ * mpn/vax/sub_n.asm: New file.
+ * mpn/vax/sub_n.s: Remove.
+ * mpn/vax/submul_1.asm: New file.
+ * mpn/vax/submul_1.s: Remove.
+
+ * mpn/vax/elf.m4: New file.
+ * configure.in (vax*-*-*elf*): New case, grabbing vax/elf.m4.
+
+ * tests/mpn/t-get_d.c (check_onebit): Get vax bounds right.
+ (main): Switch off check_rand for vax.
+
+2012-11-22 Niels Möller <nisse@lysator.liu.se>
+
+ * mini-gmp/tests/run-tests: Copied latest version from GNU Nettle.
+ Minor fix to the use of $EMULATOR, and proper copyright notice.
+
+2012-11-16 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/generic/powm_sec.c (redcify): Use mpn_sb_div_r_sec.
+
+ * mpn/generic/sb_div_sec.c: New file.
+ * mpn/generic/sbpi1_div_sec.c: New file.
+ * configure.in (gmp_mpn_functions): Add new files.
+ * gmp-impl.h: Declare new functions.
+
+2012-11-12 Torbjorn Granlund <tege@gmplib.org>
+
+ * longlong.h: Add ARM64 support.
+ * longlong.h: Add AVR support.
+
+ * mpn/powerpc64/mode64/divrem_1.asm: Tune, simplify.
+
+ * mpq/md_2exp.c: Use MPN_COPY_INCR, not MPN_COPY_DECR.
+ * tests/mpq/t-md_2exp.c (check_random): New function.
+
+2012-11-10 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/generic/remove.c (mpn_bdiv_qr_wrap): Make static.
+
+2012-11-04 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpz/powm_ui.c: Rewrite.
+
+2012-11-01 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/generic/brootinv.c (mpn_brootinv): Input size in limbs
+ rather than bits. Use single-precision iterations for the first
+ limb.
+ * mpn/generic/perfpow.c (is_kth_power): Update mpn_brootinv call.
+ * tests/mpn/t-brootinv.c (main): Likewise.
+ * tune/speed.h (SPEED_ROUTINE_MPN_BROOTINV): Likewise.
+ * gmp-impl.h (mpn_brootinv): Updated prototype.
+
+ * mpn/generic/hgcd2.c (mpn_hgcd2): Removed redundant loop exit
+ tests in the single-precision loop.
+
+ * mpz/combit.c (mpz_combit): Rewrite, optimizing for the common
+ case.
+
+2012-10-31 Niels Möller <nisse@lysator.liu.se>
+
+ * tests/mpn/Makefile.am (check_PROGRAMS): Added t-brootinv.
+ * tests/mpn/t-brootinv.c: New file
+
+ * mpn/generic/broot.c (mpn_broot_invm1): Avoid a mullo_n in the
+ loop, and do powering as a plain mpn_sqr followed by mpn_powlo.
+
+ * tune/speed.c (routine): Added mpn_broot, mpn_broot_invm1,
+ mpn_brootinv.
+
+ * tune/common.c (speed_mpn_broot, speed_mpn_broot_invm1)
+ (speed_mpn_brootinv): New functions.
+ * tune/speed.h (SPEED_ROUTINE_MPN_BROOT)
+ (SPEED_ROUTINE_MPN_BROOTINV): New macros.
+
+ * mpn/generic/broot.c (mpn_broot_invm1): Made non-static (mainly
+ for benchmarking).
+ * gmp-impl.h (mpn_broot_invm1): Declare it.
+
+2012-10-28 Torbjorn Granlund <tege@gmplib.org>
+
+ * configure.in (gmp_mpn_functions): Add new files.
+ * gmp-impl.h: Declare new functions.
+ * mpn/generic/perfpow.c: Overhaul.
+ (binv_root, binv_sqroot): Remove.
+ * mpn/generic/brootinv.c: New file, code from overhauled binv_root.
+ * mpn/generic/bsqrtinv.c: New file, code from overhauled binv_sqroot.
+ * mpn/generic/bsqrt.c: New file.
+
+ * tests/mpn/t-broot.c: Add a forgotten TMP_MARK.
+
+2012-10-28 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/generic/broot.c (mpn_broot): New file and function.
+ * configure.in (gmp_mpn_functions): Add broot.
+ * gmp-impl.h (mpn_broot): Declare.
+ * tests/mpn/t-broot.c: New testcase.
+ * tests/mpn/Makefile.am (check_PROGRAMS): Added t-broot.
+
+2012-10-27 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/generic/remove.c: Get remainder allocation right.
+
+2012-10-25 Torbjorn Granlund <tege@gmplib.org>
+
+ * longlong.h: De-support old POWER asm syntax.
+
+ * tests/mpz/t-remove.c: Run more tests, but use a tad smaller operands.
+
+ * mpn/generic/remove.c (mpn_bdiv_qr_wrap): New function.
+ (mpn_remove): Call mpn_bdiv_qr_wrap.
+ * mpz/remove.c: Enable suppressed mpn_remove call.
+
+2012-10-17 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpz/powm_ui.c (mpz_powm_ui): Deflect to mpz_powm for large exponent.
+
+2012-09-10 Torbjorn Granlund <tege@gmplib.org>
+
+ * demos/factorize.c: Rewrite no more current form. Implement Lucas
+ prime proving, and make its use the default.
+ * demos/primes.h: New file.
+
+2012-08-24 Torbjorn Granlund <tege@gmplib.org>
+
+ * demos/factorize.c: Overhaul.
+
+2012-08-06 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * doc/gmp.texi (mpn_neg): Correctly document returned type.
+
+ * gmp-impl.h (_mpz_newalloc, log_n_max): mark with inline (spotted by Niels).
+
+2012-07-28 Marc Glisse <marc.glisse@inria.fr>
+
+ * gmpxx.h (std::common_type): New partial specializations with builtin
+ types.
+ * tests/cxx/t-cxx11.cc: Test it.
+
+2012-07-21 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/powerpc32/vmx/mod_34lsub1.asm: Fix r0 clobbering issue with
+ "large" code affecting elf+darwin PIC.
+
+2012-07-21 Marc Glisse <marc.glisse@inria.fr>
+
+ * gmpxx.h (__GMPXX_CONSTANT): Disable for g++-3.4.
+
+2012-06-26 Torbjorn Granlund <tege@gmplib.org>
+
+ * Makefile.am (LIBMP_LT_*): Remove these.
+
+2012-06-26 Marc Glisse <marc.glisse@inria.fr>
+
+ * Makefile.am (LIBGMP_LT_*, LIBGMPXX_LT_*): Update comment for 5.1.0.
+
+2012-06-24 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * configure.in (CALLING_CONVENTIONS_OBJS): Disable any use of
+ assembly code with the --disable-assembly option.
+ * mpz/oddfac_1.c: Use the ASSERT_CODE macro.
+ * gen-trialdivtab.c (mpz_log2): Use mpz_sizeinbase (., 2).
+
+ * gmp-impl.h (MPN_SIZEINBASE_16): Replace with MPN_SIZEINBASE_2EXP
+ from mpz/export.c .
+ * mpz/export.c (MPN_SIZEINBASE_2EXP): Removed.
+ * mpn/generic/sizeinbase.c: Use MPN_SIZEINBASE.
+
+ * mpz/nextprime.c: Use MPN_SIZEINBASE_2EXP to count bits.
+ * mpn/generic/perfpow.c: Likewise.
+ * mpn/generic/rootrem.c: Likewise.
+ * mpz/get_d_2exp.c: Likewise.
+ * mpn/generic/powm_sec.c: Likewise, nailify.
+ * mpn/generic/powlo.c: Likewise.
+ * mpn/generic/powm.c: Likewise.
+
+ * mini-gmp/mini-gmp.c (mpz_div_r_2exp, mpz_div_q_2exp): Improve
+ adjustment condition.
+
+2012-06-23 Marc Glisse <marc.glisse@inria.fr>
+
+ * gmpxx.h (numeric_limits): Make content public.
+ * cxx/limits.cc: New file, proper declarations.
+ * Makefile.am: List new file.
+ * cxx/Makefile.am: Likewise.
+ * cxx/t-misc.cc: Add minimal test for numeric_limits.
+
+2012-06-09 Marc Glisse <marc.glisse@inria.fr>
+
+ * gmpxx.h (__gmp_resolve_expr::srcptr_type): New typedef.
+ (__gmp_temp): Wrapper for mp*_class, the constructor copies the
+ precision of its second argument for mpf_t.
+ (__gmp_expr::eval(p, prec)): Remove.
+ (__gmp_expr::eval(p)): Use __gmp_temp.
+ (__gmp_set_expr): Never pass prec to eval().
+
+2012-06-08 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * gmp-impl.h (__GMP_WITHIN_CONFIGURE): Use the same #if as in gmp-h.in.
+ (MPN_NORMALIZE_NOT_ZERO): Tighter ASSERT.
+ (MPZ_NEWALLOC): New macro.
+ * mpq: Use the new macro when possible.
+ * mpz/bin_uiui.c: Likewise.
+ * mpz/oddfac_1.c: Likewise.
+ * mpz/prodlimbs.c: Likewise.
+
+ * mini-gmp/mini-gmp.c (mpz_realloc): remove a branch.
+
+2012-06-04 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/powerpc64/aix.m4 (ASM_START): Claim machine type "any".
+
+2012-06-03 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/generic/gcdext.c (mpn_gcdext): Deleted code for handling
+ impossible case u1 == 0, Simplified test for unlikely case u0 == 0.
+
+2012-06-02 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/arm/lshiftc.asm: New file.
+
+2012-06-01 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/arm/aorslsh1_n.asm: Use cmp/cmn instead of subs/adds in more
+ places.
+
+ * mpz/get_str.c: Don't strip leading zeros since current mpn_get_str
+ won't generate any. Misc streamlining.
+ * mpz/out_str.c: Analogous changes.
+
+ * tests/mpz/io.c: Use a wider range of bases.
+
+ * tests/mpz/t-cong.c (check_random): Rewrite random generation for
+ exponentially distributed operand sizes.
+
+2012-06-01 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpq: Use more macros and MPZ_REALLOC return value when possible.
+
+ * gmp-impl.h (LIMBS): Removed, was an alias for PTR.
+ * mpz/combit.c: Use PTR and CNST_LIMB.
+
+ * tests/mpn/t-bdiv.c: Test also mpn_bdiv_qr.
+ * mpn/generic/bdiv_qr.c: Add an ASSERT.
+
+ * mpn/generic/remove.c: Add a zero limb to use bdiv_qr...
+
+2012-05-31 Marc Glisse <marc.glisse@inria.fr>
+
+ * gmpxx.h (mpq_class::mpq_class): Handle mpq_class(0,1).
+ * tests/cxx/t-constr.cc: Test it.
+
+2012-05-30 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64 (FUNC_ENTRY): New name for DOS64_ENTRY.
+ * mpn/x86_64 (FUNC_EXIT): New name for DOS64_EXIT.
+
+2012-05-29 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpz/remove.c: Optimise branches.
+
+ * mpn/generic/toom6h_mul.c: less branches in the LIKELY balanced path.
+ * mpn/generic/toom8h_mul.c: Likewise.
+
+2012-05-29 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/arm/v5/mod_1_1.asm: New file.
+
+2012-05-28 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/generic/gcdext.c (compute_v): Simplified carry handling a
+ bit, reduced stated scratch need from 2n+1 to 2n. Also comment and
+ ASSERT improvements.
+
+2012-05-27 Torbjorn Granlund <tege@gmplib.org>
+
+ * config.guess: Add new x86 CPUs.
+ * mpn/x86/fat/fat.c: Likewise.
+ * mpn/x86_64/fat/fat.c: Likewise.
+
+2012-05-27 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/x86_64/fat/fat.c: abort iff longmode-capable-bit is turned off.
+
+ * mpn/generic/toom8h_mul.c: mark UNLIKELY branches.
+
+2012-05-26 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpz: Use MPZ_REALLOC return value when possible.
+
+2012-05-25 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mini-gmp/tests/t-div.c: Test all _qr, _q, _r variants.
+ * mini-gmp/tests/t-lcm.c: Test the _ui variant.
+
+ * mini-gmp/mini-gmp.c (mpz_mod, mpz_mod_ui): New functions.
+ * mini-gmp/mini-gmp.h (mpz_mod, mpz_mod_ui): Prototypes.
+
+ * mpz/scan1.c: Simplify, and add a shortcut for scan1(z, 0).
+
+2012-05-24 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpz/n_pow_ui.c: Cast non-limb count_leading_zeros argument.
+
+2012-05-24 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpz/remove.c: Support negative divisor.
+ * tests/mpz/t-remove.c: Test negative divisor.
+
+2012-05-23 Torbjorn Granlund <tege@gmplib.org>
+
+ * tests/mpz/reuse.c: Major rewrite.
+
+2012-05-23 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpz/sqrt.c: Further simplify.
+ * mpz/sqrtrem.c: Likewise.
+
+ * Mark failing branches with UNLIKELY. Many files affected.
+
+2012-05-22 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpz/sqrt.c: Allocate less for overlapping operands, simplify.
+ * mpz/sqrtrem.c: Likewise.
+
+2012-05-21 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/generic/toom8_sqr.c: Reduce branches for recursion.
+ * mpn/generic/toom8h_mul.c: Likewise.
+
+ * tests/mpn/t-toom8h.c: Don't use GMP_NUMB_BITS when not yet defined.
+
+2012-05-20 Torbjorn Granlund <tege@gmplib.org>
+
+ * tests/mpz/t-gcd.c: Rewrite.
+
+2012-05-19 Torbjorn Granlund <tege@gmplib.org>
+
+ * tests/mpz/t-gcd.c: Generate larger operands for better gcd code
+ coverage; distribute size exponentially.
+
+2012-05-17 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpf/pow_ui.c: Simplify.
+ * tests/mpf/reuse.c (dsi_func): Exercise pow_ui.
+
+ * tests/mpf/t-set_ui.c (check_data): LONG_HIGHBIT -> ULONG_HIGHBIT.
+ * tests/mpf/t-set.c (check_random): New check, both set and init_set.
+
+ * tests/cxx/t-ops.cc (check_mpq): Check squaring.
+ * tests/mpq/t-equal.c (check_various): Check different den-size.
+
+ * mpn/generic/mullo_n.c: Disable MAYBE_ if WANT_FAT_BINARY.
+ * mpz/cmpabs_d.c: Remove an unused branch.
+
+ * tests/mpz/t-get_d_2exp.c (check_zero): New check.
+ * tests/mpz/t-inp_str.c: A few more cases.
+ * tests/mpz/t-cmp_d.c: More bases and symbols, a few cases.
+
+ * mpz/rootrem.c: Correctly handle odd roots of negatives.
+ * tests/mpz/t-root.c: Test it.
+
+2012-05-16 Torbjorn Granlund <tege@gmplib.org>
+
+ * tests/mpf/t-eq.c (check_random): New function, meat from old main().
+ (check_data): New function.
+
+2012-05-13 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/arm/rsh1aors_n.asm: New file.
+ * mpn/arm/v5/mod_1_2.asm: New file.
+
+2012-05-11 Marc Glisse <marc.glisse@inria.fr>
+
+ * gmpxx.h (explicit operator bool): New functions.
+ * tests/cxx/t-cxx11.cc: Test the above.
+
+2012-05-10 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * gmp-impl.h (__gmpn_cpuvec_initialized): Was __gmpn_cpuvec.initialized
+ * mpn/x86/fat/fat.c: Use separated _initialized variable.
+ * mpn/x86_64/fat/fat.c: Likewise.
+ * tests/mpn/t-fat.c: Likewise.
+
+ * mpn/generic/toom2_sqr.c: Override global __gmpn_cpuvec_initialized.
+ * mpn/generic/toom22_mul.c: Likewise.
+ * mpn/generic/toom3_sqr.c: Likewise.
+ * mpn/generic/toom33_mul.c: Likewise.
+
+2012-05-09 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mini-gmp/mini-gmp.c: merge mpz_rootrem and mpz_sqrtrem.
+
+ * mpn/generic/sqrtrem.c (invsqrttab): Reduce size removing common byte.
+
+ * mpz/bin_uiui.c (mul3, mul4, mul8): Remove unneeded shifts.
+ (MAXFACS): Redefine, using the shared (safer) log_n_max.
+
+2012-05-08 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/minithres/gmp-mparam.h (REDC_1_TO_REDC_N_THRESHOLD): Up to 9, for
+ coherency with ASSERT in mpn/generic/redc_n.c.
+
+2012-05-07 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/minithres/gmp-mparam.h: Updated TOOM6 and FAC_DSC.
+ * tests/mpn/toom-sqr-shared.h: Don't test if no range.
+
+ * mpz/oddfac_1.c: Add ASSERTs to warn about small threshold.
+ * tune/tuneup.c: Update minimal threshold for FAC_DSC.
+
+2012-05-06 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/arm/v6/sqr_basecase.asm: Simplify n=4 code.
+
+2012-05-05 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/generic/invert.c: Mark a branch UNLIKELY.
+ * tune/tuneup.c (tune_fac_u): Update DSC_THRESHOLD minimum.
+ * gmp-impl.h (FAC_???_THRESHOLD): Update default values.
+ (ABOVE_THRESHOLD): New definition with __builtin_constant_p.
+
+ * mpn/generic/toom22_mul.c: Disable MAYBE_ if WANT_FAT_BINARY.
+ * mpn/generic/toom33_mul.c: Likewise.
+ * mpn/generic/toom2_sqr.c: Likewise.
+ * mpn/generic/toom3_sqr.c: Likewise.
+
+2012-05-04 Torbjorn Granlund <tege@gmplib.org>
+
+ * tune/tuneup.c: Measure POWM_SEC_TABLE after the REDC thresholds.
+
+2012-05-03 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/generic/powm_sec.c: Use redc_2.
+ (INNERLOOP): Use this mechanism, like plain powm.c.
+ (WANT_CACHE_SECURITY): Remove, feature now unconditional.
+
+2012-05-02 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpz/bin_uiui.c: Make use of CNST_LIMB.
+
+2012-05-02 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpz/mfac_uiui.c: Support limb != ui.
+
+2012-05-02 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/arm/logops_n.asm: Work around register clobbering issue.
+
+ * mpn/arm/aorscnd_n.asm: New file.
+
+2012-05-01 Torbjorn Granlund <tege@gmplib.org>
+
+ * configure.in: Put arm dirs in path in proper prio order.
+
+ * mpn/arm/logops_n.asm: New file.
+
+ * mpz/2fac_ui.c: Fix assumed typo.
+
+ * mpn/arm/v6/gmp-mparam.h: New file.
+
+ * mpn/arm/v5/gcd_1.asm: Hack for undefined BMOD_1_TO_MOD_1_THRESHOLD.
+ * mpn/arm/v6t2/gcd_1.asm: Likewise.
+
+2012-04-30 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/arm/v6/sqr_basecase.asm: New file.
+
+2012-04-30 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/generic/comb_tables.c: New file.
+ * configure.in: Add it.
+ * gen-fac.c: Define table limits.
+ * gmp-impl.h: Declare tables.
+ (log_n_max): New static function.
+ * mpz/2fac_ui.c: Use shared tables.
+ * mpz/bin_uiui.c: Likewise.
+ * mpz/oddfac_1.c: Likewise.
+ * mpz/primorial_ui.c: Likewise.
+
+ * mpz/mfac_uiui.c: New file.
+ * Makefile.am: Compile it.
+ * mpz/Makefile.am (libmpz_la_SOURCES): Add mpz_mfac_uiui.c
+ * gmp-h.in (mpz_mfac_uiui): Declare.
+
+ * tests/mpz/t-mfac_uiui.c: New file.
+ * tests/mpz/Makefile.am: Run it.
+
+ * doc/gmp.texi: Document mpz_mfac_uiui, collapsing with other factorial functions.
+
+ * tests/mpz/t-lcm.c: Test zero too.
+
+ * mpz/prodlimbs.c: Simplify threshold (should be tuned, not guessed).
+
+2012-04-29 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/arm/aors_n.asm: Tune for more stable performance.
+
+ * mpn/arm/aorslsh1_n.asm: New file.
+
+ * mpn/arm/mod_34lsub1.asm: New file.
+
+ * mpn/arm/v6t2/divrem_1.asm: New file.
+
+2012-04-28 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/thumb/add_n.asm: New file.
+ * mpn/thumb/sub_n.asm: New file.
+ * mpn/thumb/add_n.s: Remove broken code.
+ * mpn/thumb/sub_n.s: Likewise.
+
+ * mpn/arm/v6/addmul_1.asm: Rewrite for stable speed, smaller size.
+ * mpn/arm/v6/mul_1.asm: Likewise.
+
+2012-04-27 Torbjorn Granlund <tege@gmplib.org>
+
+ * configure.in: Search arm/v6t2 for arm7.
+
+ * mpn/arm/v5/gcd_1.asm: New file.
+ * mpn/arm/v6t2/gcd_1.asm: New file.
+
+ * mpn/arm/mode1o.asm: New file.
+ * mpn/arm/v6t2/mode1o.asm: New file.
+
+ * mpn/arm/arm-defs.m4 (LEA): New define.
+ * mpn/arm/invert_limb.asm: Use LEA.
+
+2012-04-26 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpz/bin_uiui.c (bc_bin_uiui): Nail support.
+ * tests/cxx/t-ops2.cc: Test 0/3.
+ * oddfac_1.c: assume n > 26.
+ * tests/mpz/t-jac.c (mpn_jacobi_n): Enlarge tested sizes.
+
+2012-04-24 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/arm/v6/addmul_2.asm: New file.
+ * mpn/arm/v6/mul_2.asm: New file.
+
+2012-04-23 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/arm/aorsmul_1.asm: Tweak loop control for a 6% speed increase.
+
+2012-04-22 Torbjorn Granlund <tege@gmplib.org>
+
+ * configure.in: Recognise ARM sub-architectures.
+
+ * configfsf.guess: Update to current FSF version.
+ * configfsf.sub: Likewise.
+
+ * mpn/arm/bdiv_dbm1c.asm: New file.
+
+ * mpn/arm/v6/mul_1.asm: New file.
+ * mpn/arm/v6/addmul_1.asm: New file.
+
+2012-04-22 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * gen-fac.c: Renamed, was gen-fac_ui.c .
+ * Makefile.am: Renamed gen-fac.c and fac_table.h .
+ * gmp-impl.h: #include "fac_table.h".
+ * mpz/oddfac_1.c: Use generated constant.
+ * mpz/bin_ui.c: Small optimisations.
+
+ * tune/common.c (speed_mpz_bin_ui): New function.
+ * tune/speed.h: Declare it.
+ * tune/speed.c: Use it.
+
+2012-04-21 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/arm/mul_1.asm: Cleanup.
+ * mpn/arm/copyi.asm: Cleanup, assume allocate-on-write cache.
+ * mpn/arm/copyd.asm: Likewise.
+
+ * mpn/arm/add_n.asm: Delete.
+ * mpn/arm/sub_n.asm: Delete.
+ * mpn/arm/aors_n.asm: New file, made from old files.
+
+ * mpn/arm/addmul_1.asm: Delete.
+ * mpn/arm/submul_1.asm: Delete.
+ * mpn/arm/aorsmul_1.asm: New file, made from old files.
+
+ * mpn/arm/com.asm: New file.
+ * mpn/arm/lshift.asm: New file.
+ * mpn/arm/rshift.asm: New file.
+
+2012-04-20 Torbjorn Granlund <tege@gmplib.org>
+
+ * tests/mpq/io.c: New file.
+ * tests/mpq/Makefile.am: Run it.
+
+ * mpz/clrbit.c: Simplify along the lines of setbit.c.
+
+2012-04-20 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpz/setbit.c: Simplify.
+
+ * gmp-impl.h (LOG2C): Define.
+ * mpz/fac_ui.c (LOG2C): Remove.
+ * mpz/2fac_ui.c (LOG2C): Remove.
+ * mpz/oddfac_1.c (LOG2C): Remove.
+ * mpn/generic/binvert.c (LOG2C): Remove.
+ * mpn/generic/invertappr.c (LOG2C): Remove.
+
+ * mpz/bin_uiui.c (mpz_goetgheluck_bin_uiui): Move declarations,
+ and assume that n and k are not small.
+
+2012-04-19 Torbjorn Granlund <tege@gmplib.org>
+
+ * tests/mpz/Makefile.am (check_PROGRAMS): Add t-remove.
+
+ * tests/mpz/t-remove.c: Clear out mpz variables.
+
+ * tests/mpz/t-cong.c (check_random): Use much larger numbers.
+ (check_data): Check congruences mod 0.
+
+ * tests/mpz/t-divis.c: Test divisibility by zero.
+
+ * tests/mpz/reuse.c: Test mpz_mod.
+
+ * mpz/setbit.c: Remove dead code. Use CNST_LIMB.
+ * mpz/clrbit.c: Use CNST_LIMB.
+
+2012-04-19 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * primesieve.c: New file, with functions from mpz/oddfac_1.c .
+ * mpz/oddfac_1.c (bitwise_primesieve): Re-moved.
+ * Makefile.am (libgmp_la_SOURCES): Add primesieve.c .
+ * gmp-impl.h (gmp_primesieve): Declare.
+
+ * mpz/bin_uiui.c (mpz_goetgheluck_bin_uiui): New, factor-based
+ implementation.
+ * tests/mpz/t-bin.c: Extend tests, to cover _goetgheluck.
+
+ * mpz/primorial_ui.c: New file.
+ * mpz/Makefile.am (libmpz_la_SOURCES): Add mpz/primorial_ui.c
+ * Makefile.am (MPZ_OBJECTS): Add mpz/primorial_ui$U.lo
+ * gmp-h.in (mpz_primorial_ui): Declare.
+ * tests/mpz/t-primorial_ui.c: New test for the new function.
+ * tests/mpz/Makefile.am (check_PROGRAMS): Add t-primorial_ui.
+ * doc/gmp.texi: Short documentation for the new function.
+
+2012-04-17 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/coreisbr/aorsmul_1.asm: Fix some DOS64 issues.
+ * mpn/x86_64/coreisbr/mul_1.asm: Likewise.
+
+ * mpn/x86_64/fastsse/lshiftc-movdqu2.asm: Adhere to DOS64 register
+ partitioning rules.
+
+ * mpn/x86_64/fastsse/copyi-palignr.asm: Implement temporary workaround
+ to overlap issue.
+
+2012-04-17 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpz/bin_uiui.c: Support small limbs (fallback on bin_ui).
+
+ * tests/mpn/toom-sqr-shared.h: Use a restricted range.
+ * tests/mpn/t-toom2-sqr.c: Specify correct range.
+ * tests/mpn/t-toom3-sqr.c: Likewise.
+ * tests/mpn/t-toom4-sqr.c: Likewise.
+ * tests/mpn/t-toom6-sqr.c: Likewise.
+ * tests/mpn/t-toom8-sqr.c: Likewise, but extended.
+ * tests/mpn/Makefile.am (check_PROGRAMS): Add t-toom?-sqr tests.
+
+ * mpn/generic/sbpi1_bdiv_q.c: Move ASSERTs, to support qp = np.
+
+2012-04-17 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/copyd.asm: Rewrite.
+ * mpn/x86_64/copyi.asm: Rewrite.
+
+2012-04-16 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/fastsse/lshift-movdqu2.asm: Add DOS entry/exit sequences.
+ * mpn/x86_64/fastsse/rshift-movdqu2.asm: Likewise.
+ * mpn/x86_64/fastsse/lshiftc-movdqu2.asm: Likewise.
+
+ * mpn/x86_64/x86_64-defs.m4 (palignr): New macro.
+ (x86_opcode_regxmm, x86_opcode_regxmm_list): New, made from x86 mmx
+ counterparts.
+ (x86_lookup): Copy from x86/x86-defs.m4.
+ * mpn/x86_64/fastsse/copyd-palignr.asm: Use palignr macro.
+ * mpn/x86_64/fastsse/copyi-palignr.asm: Likewise.
+
+2012-04-15 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * tests/mpz/t-bin.c: Add more tests on small values.
+ * mpz/bin_uiui.c (mpz_bdiv_bin_uiui): Smaller temporary areas.
+
+2012-04-15 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/fastsse/copyd-palignr.asm: New file.
+ * mpn/x86_64/fastsse/copyi-palignr.asm: New file.
+ * mpn/x86_64/core2/copyd.asm: New file.
+ * mpn/x86_64/core2/copyi.asm: New file.
+ * mpn/x86_64/nano/copyd.asm: New file.
+ * mpn/x86_64/nano/copyi.asm: New file.
+ * mpn/x86_64/atom/copyd.asm: New file.
+ * mpn/x86_64/atom/copyi.asm: New file.
+
+2012-04-13 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpz/bin_uiui.c: Rewrite (some parts are Torbjorn's).
+ * gen-fac_ui.c: Generate new constants for bin_uiui.
+
+ * mini-gmp/mini-gmp.h (mpz_fac_ui, mpz_bin_uiui): New definitions.
+ * mini-gmp/mini-gmp.c (mpz_fac_ui, mpz_bin_uiui): Trivial
+ implementation.
+
+ * tests/mpz/t-fac_ui.c: Check Wilson's theorem on a big value.
+
+ * mpn/generic/invert.c: Remove support for scratch == NULL.
+ * tune/speed.h (SPEED_ROUTINE_MPN_MUPI_DIV_QR): Allocate scratch
+ space for mpn_invert.
+
+ * mpz/mul_i.h: Small clean-up.
+
+ * tests/mpn/toom-sqr-shared.h: New file.
+ * tests/mpn/t-toom2-sqr.c: New file.
+ * tests/mpn/t-toom3-sqr.c: New file.
+ * tests/mpn/t-toom4-sqr.c: New file.
+ * tests/mpn/t-toom6-sqr.c: New file.
+ * tests/mpn/t-toom8-sqr.c: New file.
+ * tests/mpn/Makefile.am (EXTRA_DIST): Add toom-sqr-shared.h .
+
+ * mpn/generic/toom62_mul.c: Use add_n, sub_n, when possible.
+
+2012-04-12 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/fastsse/lshift-movdqu2.asm: New file.
+ * mpn/x86_64/fastsse/rshift-movdqu2.asm: New file.
+ * mpn/x86_64/fastsse/lshiftc-movdqu2.asm: New file.
+ * mpn/x86_64/coreisbr/lshift.asm: New file.
+ * mpn/x86_64/coreisbr/rshift.asm: New file.
+ * mpn/x86_64/coreisbr/lshiftc.asm: New file.
+ * mpn/x86_64/k10/lshift.asm: New file.
+ * mpn/x86_64/k10/rshift.asm: New file.
+ * mpn/x86_64/k10/lshiftc.asm: New file.
+
+ * mpn/x86_64/fastsse/lshift.asm: Simplify to very basic form.
+
+2012-04-11 Niels Möller <nisse@lysator.liu.se>
+
+ * Makefile.am (check-mini-gmp): Pass -I../.. in EXTRA_CFLAGS, to
+ locate gmp.h.
+
+2012-04-11 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mini-gmp/mini-gmp.h (mpz_root, mpz_rootrem): define (correctly).
+ * mini-gmp/mini-gmp.c (mpz_rootrem): Extended code from _root.
+ (mpz_root): Use mpz_rootrem.
+ (mpz_mul_ui): Correctly handle negative operands.
+
+ * mini-gmp/tests/Makefile (CHECK_PROGRAMS): add t-root.
+ * mini-gmp/tests/t-root.c: New file.
+ * mini-gmp/tests/t-reuse.c: Enable root{,rem} tests.
+
+2012-04-10 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * gen-fac_ui.c (mpz_root): Remove.
+ * mini-gmp/mini-gmp.c (mpz_root): New, support negative operands.
+ * mini-gmp/mini-gmp.h (mpz_root): define.
+ (mpz_out_str): Test also __STDIO_LOADED (for VMS).
+ * mpz/2fac_ui.c: Cosmetic change.
+
+2012-04-07 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/ia64/gcd_1.asm: Rewrite inner loop to use ctz table.
+
+2012-04-05 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/powerpc64/p7/popcount.asm: Properly extend arg n for mode32.
+ * mpn/powerpc64/p7/hamdist.asm: Likewise.
+
+2012-04-04 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/powerpc64/p7/popcount.asm: New file.
+ * mpn/powerpc64/p7/hamdist.asm: New file.
+
+ * longlong.h (ARM count_leading_zeros): Enable for more arch versions.
+
+ * mpn/x86_64/gcd_1.asm: Make room for DOS64 regparm shadow area.
+ * mpn/x86_64/core2/gcd_1.asm: Likewise.
+
+2012-04-03 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/coreisbr/aorrlsh_n.asm: Make it actually work for DOS64.
+
+2012-04-02 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpz/oddfac_1.c: Initalize size for ASSERT.
+
+2012-04-02 Torbjorn Granlund <tege@gmplib.org>
+
+ * gmp-h.in (_GMP_H_HAVE_FILE): Test also __STDIO_LOADED (for VMS).
+
+ * gmp-impl.h (doprnt_format_t, etc): Remove bogus __GMP_DECLSPECs.
+
+2012-03-30 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/x86_64/sqr_basecase.asm: Speed-up for small cases.
+
+2012-03-29 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/sparc64/gcd_1.asm: New file.
+
+2012-03-27 Torbjorn Granlund <tege@gmplib.org>
+
+ * config.guess: Fix typo in coreisbr recognition.
+
+2012-03-26 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/x86_64/gcd_1.asm: Reduce latency.
+ * mpn/x86_64/mul_basecase.asm: Save one jump.
+
+ * mpz/iset_ui.c: Don't realloc.
+
+2012-03-20 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mp_clz_tab.c: Add __clz_tab[128].
+ * longlong.h (count_trailing_zeros): Use it in pure C variant.
+
+2012-03-20 Torbjorn Granlund <tege@gmplib.org>
+
+ * configure.in (x86 fat_path): Add many missing directories.
+ * mpn/x86/fat/fat.c (__gmpn_cpuvec_init): Rewrite.
+ (fake_cpuid_table): Add many more CPUs.
+
+ * mpn/x86_64/fat/fat.c (__gmpn_cpuvec_init): Minor spacing cleanup.
+
+2012-03-19 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86/x86-defs.m4 (CALL, PIC_WITH_EBX): New macros.
+ * mpn/x86/darwin.m4: Likewise.
+ * mpn/x86/k7/gcd_1.asm: Use new macros to support PIC.
+ * mpn/x86/p6/gcd_1.asm: Likewise.
+
+2012-03-19 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * gen-fac_ui.c: Generate more constants (possible mini-mpz_root).
+ * mpz/oddfac_1.c: Improve ASSERTs.
+ (log_n_max): Use precomputed table.
+
+ * longlong.h (_PROTO): Remove.
+
+2012-03-18 Torbjorn Granlund <tege@gmplib.org>
+
+ * longlong.h (count_trailing_zeros): Write better pure C default
+ variant.
+
+ * mpn/x86/p6/gcd_1.asm: Remove forgotten x86_64 reference.
+
+ * mpn/x86/p6/gmp-mparam.h: Update, to get BMOD_1_TO_MOD_1_THRESHOLD
+ defined for fat binaries.
+
+2012-03-17 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86/k7/gcd_1.asm: Rewrite.
+ * mpn/x86/p6/gcd_1.asm: New file.
+
+ * mpn/x86_64/core2/gcd_1.asm: Conditionally suppress reduction calls.
+ * mpn/x86_64/gcd_1.asm: Rewrite.
+
+2012-03-15 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/generic/gcd_1.c: Parameterise zerotab code.
+
+ * mpn/x86_64/nano/gcd_1.asm: New file, grabbing core2 asm file.
+
+ * mpn/x86_64/core2/gcd_1.asm: Speed up loop code, simplify non-loop
+ code.
+
+2012-03-13 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/core2/gcd_1.asm: Add hack to support fat builds.
+
+ * mpn/x86_64/core2/gcd_1.asm: Shorten critical path.
+
+2012-03-12 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/core2/gcd_1.asm: New file.
+ * mpn/x86_64/k10/gcd_1.asm: New file, grabbing core2 asm file.
+ * mpn/x86_64/bd1/gcd_1.asm: Likewise.
+
+ * mpn/x86_64/bobcat/sqr_basecase.asm: New file.
+ * mpn/x86_64/bobcat/mul_basecase.asm: Minor tuning.
+
+2012-03-10 Torbjorn Granlund <tege@gmplib.org>
+
+ * configure.in (fat_functions): Add addlsh1_n, addlsh2_n, addmul_2,
+ mullo_basecase, redc_1, redc_2, sublsh1_n.
+
+ * gmp-impl.h (struct cpuvec_t): Add fields for new fat functions.
+ * gmp-impl.h: Adjust corresponding declarations.
+
+ * mpn/generic/redc_2.c (mpn_addmul_2): Make static.
+
+ * mpn/x86_64/fat/fat_entry.asm (FAT_INIT): Expand before fat_init to
+ reduce branch offsets. Pass plain 0,1,3... in %al since we'd else run
+ out of 8-bit range.
+
+ * mpn/x86_64/fat/fat_entry.asm (fat_init): Scale passed index value.
+ * mpn/x86/fat/fat_entry.asm (fat_init): Use movzbl for expanding index
+ value.
+
+ * mpn/x86_64/x86_64-defs.m4 (CPUVEC_FUNCS_LIST): Add new fat functions.
+ * mpn/x86/x86-defs.m4 (CPUVEC_FUNCS_LIST): Likewise.
+ * mpn/x86_64/fat/fat.c (__gmpn_cpuvec): Likewise.
+ * mpn/x86/fat/fat.c (__gmpn_cpuvec): Likewise.
+
+ * mpn/x86_64/fat/redc_2.c: New file.
+ * mpn/x86/fat/mullo_basecase.c: New file.
+ * mpn/x86/fat/redc_1.c: New file.
+ * mpn/x86/fat/redc_2.c: New file.
+
+ * tests/mpn/t-fat.c: Test mullo_basecase.
+
+2012-03-08 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/coreisbr/addmul_2.asm: Port to DOS64.
+
+2012-02-29 Marc Glisse <marc.glisse@inria.fr>
+
+ * gmpxx.h: Ignore partial C++11 support in g++-4.6.
+ * tests/cxx/t-cxx11.cc: Likewise.
+
+ * gmpxx.h (operator""): New functions.
+ * tests/cxx/t-cxx11.cc: Test the above.
+ * doc/gmp.texi: Document the above.
+
+2012-03-08 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * acinclude.m4 (GMP_H_ANSI): Remove.
+ * configure.in: Don't use GMP_H_ANSI.
+ * gmp-h.in (__GMP_HAVE_PROTOTYPES): Remove.
+
+2012-03-08 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/fat/fat.c (fake_cpuid_table): Recognise "bulldozer".
+ (__gmpn_cpuvec_init): Overhaul to match configure.in.
+
+ * configure.in: Adjust bulldozer path_64.
+
+2012-03-07 Torbjorn Granlund <tege@gmplib.org>
+
+ * configure.in (x86_64 fat_path): List recently added AMD directories.
+
+ * mpn/x86_64/bobcat/copyi.asm: New file.
+ * mpn/x86_64/bobcat/copyd.asm: New file.
+
+ * config.guess: Handle AMD 11h correctly.
+
+ * tune/tuneup.c (tune_redc): Better handle situation where redc_2 is
+ never faster.
+
+2012-03-06 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/bobcat/mul_basecase.asm: New file.
+
+2012-03-04 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/bobcat/mul_1.asm: New file.
+ * mpn/x86_64/bobcat/aorsmul_1.asm: New file.
+
+2012-03-04 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpz/invert.c: Remove mod 0 branch.
+ * tests/mpz/t-invert.c: Avoid testing mod 0.
+ * doc/gmp.texi (mpz_invert): Specify mod 0 is not handled.
+
+ * gmp-h.in (__gmp_signed, __gmp_const): Remove.
+ (__GMP_HAVE_TOKEN_PASTE, __GMP_HAVE_CONST): Remove.
+ * gmp-impl.h: Strip __GMP_HAVE_TOKEN_PASTE and __GMP_HAVE_CONST.
+ * demos/expr/: Strip __gmp_const usage from all files.
+
+ * tests/mpz/t-powm.c (allsizes_seen): Require unsigned*.
+
+2012-03-03 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/k8/gmp-mparam.h: New file.
+ * mpn/x86_64/k10/gmp-mparam.h: New file.
+
+ * mpn/generic/hgcd_step.c (mpn_hgcd_step): Remove unused variables.
+ * mpn/generic/hgcd_jacobi.c (hgcd_jacobi_step): Likewise.
+ * mpn/generic/hgcd_reduce.c (hgcd_matrix_apply): Likewise.
+ * mpn/generic/mu_bdiv_qr.c: Likewise.
+ * mpz/jacobi.c: Likewise.
+ * mpz/mod.c: Likewise.
+
+ * mpn/generic/toom42_mul.c: Remove unread variable.
+ * mpn/generic/set_str.c (mpn_set_str_compute_powtab): Likewise.
+ * mpn/generic/rootrem.c (mpn_rootrem_internal): Likewise.
+ * tests/refmpn.c (refmpn_mul): Likewise.
+ * mpn/generic/hgcd_appr.c (mpn_hgcd_appr): Propagate mask computation
+ into ASSERT, remove variable.
+
+ * gmp-h.in (__GMP_PROTO): Remove.
+ * Strip __GMP_PROTO usage from all files.
+ * Strip prototype parameter names from all files.
+
+2012-03-01 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * doc/gmp.texi (mpz_invert): Correctly document result range.
+ * tests/mpz/t-invert.c: Small range correction.
+
+2012-03-01 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/mullo_basecase.asm: New file.
+
+2012-02-29 Marc Glisse <marc.glisse@inria.fr>
+
+ * gmpxx.h (std::numeric_limits): New partial specialization.
+
+2012-02-29 Niels Möller <nisse@lysator.liu.se>
+
+ * mini-gmp/tests/t-reuse.c: New test case, based on
+ tests/mpz/reuse.c.
+
+ * mini-gmp/mini-gmp.c (mpz_cdiv_r_ui): New function.
+ (mpz_fdiv_r_ui): New function.
+ (mpz_tdiv_r_ui): New function.
+ (mpz_powm_ui): New function.
+ (mpz_pow_ui): New function.
+ (mpz_ui_pow_ui): Use mpz_pow_ui.
+ (mpz_gcdext): Fixed input/output overlap, for the case of one
+ input being zero.
+ (mpz_sqrtrem): Fix for the case r NULL, U zero.
+
+ * Makefile.am (check-mini-gmp): Use $(MAKE).
+ (clean-mini-gmp): New target.
+ (clean-local, distclean-local): New automake targets. Depend on
+ clean-mini-gmp.
+
+2012-02-28 Niels Möller <nisse@lysator.liu.se>
+
+ * Makefile.am (check-mini-gmp): New target, for running the
+ mini-gmp testsuite.
+
+ * mini-gmp/tests/Makefile (srcdir, MINI_GMP_DIR): New make
+ variables. These can be overridden when using a separate build
+ directory.
+ (EXTRA_CFLAGS): Renamed, was OPTFLAGS.
+
+ * mini-gmp/mini-gmp.c (mpz_abs_add): Don't cache limb pointers
+ over MPZ_REALLOC, since that breaks in-place operation. Bug
+ spotted by Torbjörn.
+ (mpz_and, mpz_ior, mpz_xor): Likewise.
+ (mpz_cmp): Fixed comparison of negative numbers.
+
+2012-02-27 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/fastsse/lshiftc.asm: New file.
+ * mpn/x86_64/fastsse/com.asm: New file.
+
+ * mpn/x86_64/bd1/popcount.asm: New file.
+ * mpn/x86_64/bd1/hamdist.asm: New file.
+
+ * mpn/x86_64/fastsse/copyi.asm: New file.
+ * mpn/x86_64/fastsse/copyd.asm: New file.
+ * mpn/x86_64/fastsse/lshift.asm: New file.
+
+2012-02-26 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/coreisbr/addmul_2.asm: New file.
+
+ * tests/devel/try.c (param_init): Don't require addmul_N to handle
+ overlap.
+
+ * mpn/x86_64/bd1/mul_1.asm: New file.
+ * mpn/x86_64/bd1/aorsmul_1.asm: New file.
+
+2012-02-26 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpz/2fac_ui.c: New file: implements n!!.
+ * Makefile.am (MPZ_OBJECTS): Add mpz/2fac_ui.
+ * gmp-h.in: Declare mpz_2fac_ui.
+ * tests/mpz/t-fac.c: Test mpz_2fac_ui.
+ * doc/gmp.texi: Document mpz_2fac_ui.
+ * mpz/Makefile.am (libmpz_la_SOURCES): Add 2fac_ui.c.
+
+ * mpz/oddfac_1.c (mpz_oddfac_1): Use umul_ppmm when size = 2.
+
+2012-02-26 Niels Möller <nisse@lysator.liu.se>
+
+ * mini-gmp: New subdirectory. For use by GMP bootstrap, and as a
+ fallback for applications needing bignums but not high
+ performance.
+
+ * bootstrap.c: New file, replacing dumbmp.c. Uses mini-gmp for the
+ standard GMP functions, and then defines the few functions
+ particular for the bootstrap.
+ * dumbmp.c: Deleted file. A few functions moved to bootstrap.c.
+
+ * gen-bases.c: Include bootstrap.c, not dumbmp.c.
+ * gen-fac_ui.c: Likewise.
+ * gen-trialdivtab.c: Likewise.
+ * gen-fib.c: Include bootstrap.c, not dumbmp.c. Use assert rather
+ than ASSERT. Deleted casts of xmalloc return value.
+ * gen-psqr.c: Likewise.
+ (COLLAPSE_ELEMENT): Use memmove rather than mem_copyi.
+
+ * Makefile.am: Replaced all uses of dumbmp.c by bootstrap.c.
+ (EXTRA_DIST, dist-hook): Arrange for distribution of the mini-gmp
+ files.
+
+2012-02-24 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpz/invert.c: Use ABSIZ, MPZ_EQUAL_1_P.
+ * mpz/abs.c: Collapse MPZ_REALLOC(x,.) and PTR(x).
+ * mpz/aors_ui.h: Likewise.
+ * mpz/com.c: Likewise.
+ * mpz/neg.c: Likewise.
+
+ * mpz/invert.c: Reply "no-inverse" when modulus is zero.
+ * tests/mpz/t-invert.c: Add more checks.
+ * doc/gmp.texi (mpz_invert): Inverse can not be zero.
+
+2012-02-24 Torbjorn Granlund <tege@gmplib.org>
+
+ * tests/mpn/logic.c: New file.
+ * tests/mpn/Makefile.am (check_PROGRAMS): Add logic.
+
+ * tests/mpz/t-invert.c: New file.
+ * tests/mpz/Makefile.am (check_PROGRAMS): Add t-invert.
+
+2012-02-24 Marc Glisse <marc.glisse@inria.fr>
+
+ * tests/mpq/t-cmp.c: Move NUM and DEN macros...
+ * tests/mpq/t-cmp_ui.c: Likewise...
+ * gmp-impl.h: ... to here.
+
+ * mpq/abs.c: Use NUM, DEN, SIZ, ALLOC, PTR, MPZ_REALLOC.
+ * mpq/aors.c: Likewise.
+ * mpq/canonicalize.c: Likewise.
+ * mpq/clear.c: Likewise.
+ * mpq/cmp.c: Likewise.
+ * mpq/cmp_si.c: Likewise.
+ * mpq/cmp_ui.c: Likewise.
+ * mpq/div.c: Likewise.
+ * mpq/equal.c: Likewise.
+ * mpq/get_d.c: Likewise.
+ * mpq/get_den.c: Likewise.
+ * mpq/get_num.c: Likewise.
+ * mpq/get_str.c: Likewise.
+ * mpq/init.c: Likewise.
+ * mpq/inp_str.c: Likewise.
+ * mpq/inv.c: Likewise.
+ * mpq/md_2exp.c: Likewise.
+ * mpq/mul.c: Likewise.
+ * mpq/neg.c: Likewise.
+ * mpq/set.c: Likewise.
+ * mpq/set_d.c: Likewise.
+ * mpq/set_den.c: Likewise.
+ * mpq/set_f.c: Likewise.
+ * mpq/set_num.c: Likewise.
+ * mpq/set_si.c: Likewise.
+ * mpq/set_str.c: Likewise.
+ * mpq/set_ui.c: Likewise.
+ * mpq/set_z.c: Likewise.
+ * mpq/swap.c: Likewise.
+
+ * tests/mpq/t-inv.c: New test file.
+ * tests/mpq/Makefile.am: Add the above.
+
+ * gmpxx.h (__gmp_set_expr): Use mpq_set_z.
+
+ * mpq/md_2exp.c: Collapse MPZ_REALLOC(x,.) and PTR(x).
+ * mpq/set_d.c: Likewise.
+ * mpq/set_f.c: Likewise.
+
+2012-02-24 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/x86_64/core2/aorsmul_1.asm: Added mpn_addmul_1c and
+ mpn_submul_1c entry points.
+
+2012-02-23 Marc Glisse <marc.glisse@inria.fr>
+
+ * mpz/abs.c: Use ALLOC, SIZ, ABSIZ, PTR, MPZ_REALLOC.
+ * mpz/aors_ui.h: Likewise.
+ * mpz/array_init.c: Likewise.
+ * mpz/cdiv_q.c: Likewise.
+ * mpz/cdiv_qr.c: Likewise.
+ * mpz/cdiv_r.c: Likewise.
+ * mpz/clear.c: Likewise.
+ * mpz/clrbit.c: Likewise.
+ * mpz/cmp_si.c: Likewise.
+ * mpz/com.c: Likewise.
+ * mpz/fdiv_q.c: Likewise.
+ * mpz/fdiv_qr.c: Likewise.
+ * mpz/fdiv_r.c: Likewise.
+ * mpz/get_si.c: Likewise.
+ * mpz/get_str.c: Likewise.
+ * mpz/init.c: Likewise.
+ * mpz/inp_str.c: Likewise.
+ * mpz/iset.c: Likewise.
+ * mpz/iset_d.c: Likewise.
+ * mpz/iset_si.c: Likewise.
+ * mpz/iset_str.c: Likewise.
+ * mpz/iset_ui.c: Likewise.
+ * mpz/mod.c: Likewise.
+ * mpz/neg.c: Likewise.
+ * mpz/out_str.c: Likewise.
+ * mpz/random2.c: Likewise.
+ * mpz/set_si.c: Likewise.
+ * mpz/set_str.c: Likewise.
+ * mpz/set_ui.c: Likewise.
+ * mpz/setbit.c: Likewise.
+ * mpz/sqrt.c: Likewise.
+ * mpz/swap.c: Likewise.
+ * mpz/tdiv_r_2exp.c: Likewise.
+
+ * tests/cxx/t-ops.cc: Test mpz_abs reallocation.
+
+2012-02-23 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/core2/rsh1aors_n.asm: Complete rewrite.
+ * mpn/x86_64/coreisbr/rsh1aors_n.asm: Move old core2 code here.
+
+ * mpn/x86_64/redc_1.asm: Make it work for DOS64 (broken in last edit).
+
+2012-02-20 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/generic/toom_interpolate_8pts.c: Compute carry iif non-trivial.
+
+ * mpz/gcdext.c: Adapt to relaxed mpn_gcdext's input requirements.
+
+ * mpz/and.c: Use mpn_ logic everywhere. Reduce branches.
+ * mpz/ior.c: Likewise.
+ * mpz/xor.c: Likewise.
+
+2012-02-20 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/coreisbr/mul_1.asm: New file.
+
+ * mpn/x86_64/coreisbr/aorsmul_1.asm: New file.
+
+ * mpn/x86_64/mod_34lsub1.asm: Avoid ",pt" branch hint since many
+ assemblers don't support it.
+
+2012-02-19 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/generic/redc_1.c: Put back mpn_add_n call, return its carry.
+ Reintroduce previously removed RP argument.
+ * mpn/x86_64/redc_1.asm: Likewise.
+
+ * mpn/generic/redc_2.c: Remove mpn_sub_n call, return carry from
+ mpn_add_n call.
+
+ * gmp-impl.h (mpn_redc_1, mpn_redc_2): Now return an mp_limb_t.
+
+ * tune/speed.h (SPEED_ROUTINE_REDC_1): Adopt to pass RP argument.
+
+ * tests/refmpn.c (refmpn_redc_1): Adopt to new redc_1 interface.
+
+ * mpn/generic/powm.c (MPN_REDC_1): Pass rp parameter to mpn_redc_1.
+ * mpn/generic/powm_sec.c (MPN_REDC_1_SEC): Likewise.
+ * mpn/generic/powm.c (MPN_REDC_2): New macro, use for mpn_redc_2.
+
+2012-02-18 Marc Glisse <marc.glisse@inria.fr>
+
+ * gmpxx.h (std::common_type): New partial specialization in C++11.
+ * tests/cxx/t-cxx11.cc: Test it.
+
+ * gmpxx.h: Don't declare long double functions that are never defined.
+
+ * gmpxx.h (__gmp_binary_expr): Let things happen in place: q=q*q+z*z
+ becomes tmp=z*z, q=q*q, q+=tmp.
+ * tests/cxx/t-binary.cc: More variable reuse tests.
+
+2012-02-17 Marc Glisse <marc.glisse@inria.fr>
+
+ * gmp-h.in (__GMP_WITHIN_GMP): Test with #ifdef instead of #if, for
+ the benefit of applications using gcc -Wundef.
+ (__GMP_WITHIN_GMPXX): Likewise.
+
+2012-02-16 Marc Glisse <marc.glisse@inria.fr>
+
+ * gmpxx.h (__gmp_binary_expr): Let things happen in place: e=a*b-c*d
+ becomes tmp=c*d, e=a*b, e-=tmp.
+ * tests/cxx/t-binary.cc: More variable reuse tests.
+
+2012-02-15 Niels Möller <nisse@lysator.liu.se>
+
+ * tune/tuneup.c (mul_toom43_to_toom54_threshold): New global.
+ (tune_mul): Added tuning of MUL_TOOM43_TO_TOOM54_THRESHOLD.
+ * tune/speed.h (SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM54_MUL): New macro.
+ (SPEED_ROUTINE_MPN_TOOM54_FOR_TOOM43_MUL): New macro.
+ Prototypes for corresponding functions.
+ * tune/common.c (speed_mpn_toom43_for_toom54_mul): New function.
+ (speed_mpn_toom54_for_toom43_mul): New function.
+
+ * gmp-impl.h (MPN_TOOM43_MUL_MINSIZE): Corrected constant.
+ (MPN_TOOM53_MUL_MINSIZE): Likewise.
+ (MPN_TOOM54_MUL_MINSIZE): New constant.
+ (mpn_toom54_mul): Added prototype.
+ (MUL_TOOM43_TO_TOOM54_THRESHOLD): New threshold. Default value and
+ tuning setup.
+
+2012-02-14 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/generic/toom54_mul.c: New file, originally contributed by
+ Marco.
+ * gmp-impl.h (mpn_toom54_mul_itch): New function.
+ * configure.in (gmp_mpn_functions): Added toom54_mul.
+ * tests/mpn/t-toom54.c: New file.
+ * tests/mpn/Makefile.am (check_PROGRAMS): Added t-toom54.
+
+2012-02-13 Niels Möller <nisse@lysator.liu.se>
+
+ * configure.in: Display summary of options.
+
+2012-02-11 Torbjorn Granlund <tege@gmplib.org>
+
+ * tests/tests.h (TESTS_REPS): Print any non-standard repetitions.
+
+2012-02-11 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * doc/gmp.texi (Factorial): Shortly describe current algorithm.
+ (Multiplication Algorithms): Add Toom[68]'n'half, (too) shortly.
+ * gmp-impl.h (ASSERT_ALWAYS): Consider failures UNLIKELY.
+
+2012-02-10 Niels Möller <nisse@lysator.liu.se>
+
+ * tests/mpz/t-gcd.c (gcdext_valid_p): Enforce sligthly stricter
+ bound for cofactors.
+
+ * mpn/generic/gcdext_lehmer.c (mpn_gcdext_hook): Corrected
+ handling of unlikely (maybe impossible?) case u1n < un. Related to
+ the 2012-02-05 bugfix of gcdext_subdiv_step.c in the gmp-5.0 repo.
+
+2012-02-09 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * gmp-impl.h (mpn_toom3*_itch): Support any recursion depth.
+ * tests/refmpn.c (refmpn_mul): Restore tight allocations.
+
+ * mpz/oddfac_1.c (mpz_oddfac_1): Get ready for n!!
+ * gmp-impl.h (mpz_oddfac_1): Update signature.
+ * mpz/fac_ui.c (mpz_fac_ui): Update call to mpz_oddfac_1.
+
+2012-02-09 Marc Glisse <marc.glisse@inria.fr>
+
+ * gmp-impl.h (ABS_CAST): New macro.
+ * mpf/cmp_si.c: Use ABS_CAST.
+ * mpf/get_si.c: Use ABS_CAST.
+ * mpf/iset_si.c: Use ABS_CAST.
+ * mpf/set_si.c: Use ABS_CAST.
+ * mpq/set_si.c: Use ABS_CAST.
+ * mpz/cmp_si.c: Use ABS_CAST.
+ * mpz/get_si.c: Use ABS_CAST.
+ * mpz/iset_si.c: Use ABS_CAST.
+ * mpz/mul_i.h: Use ABS_CAST.
+ * mpz/set_si.c: Use ABS_CAST.
+
+2012-02-08 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/powerpc32/divrem_2.asm: Fix off-by-one condition in invert_limb
+ code.
+
+2012-02-08 Niels Möller <nisse@lysator.liu.se>
+
+ * doc/gmp.texi (mpz_gcdext): Clarified corner cases in cofactor
+ canonicalization.
+
+2012-02-07 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/generic/gcdext.c (mpn_gcdext): Fixed assert, related to the
+ special case A = (2k+1) G, B = 2 G. Fix copied from gmp-5.0 repo.
+
+2012-02-06 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/generic/hgcd_matrix.c (hgcd_matrix_update_q): Fixed carry
+ handling bug. Fix copied from gmp-5.0 repo, where the function is
+ found in hgcd.c.
+
+ * tests/mpz/t-gcd.c (main): Use mpz_rrandomb for test operands,
+ not mpz_urandomb. Change copied from gmp-5.0 repo.
+ * tests/mpn/t-hgcd.c (main): Likewise.
+
+2012-02-04 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * tests/refmpn.c (refmpn_mul): More conservative allocations.
+
+2012-02-03 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/bd1/gmp-mparam.h: New file.
+
+ * longlong.h (udiv_qrnnd from sdiv_qrnnd): Declare udiv_w_sdiv.
+
+ * mpn/generic/udiv_w_sdiv.c: Use c89 function header.
+
+2012-02-03 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpz/fac_ui.c: mpz_oddfac_1 removed, with many related functions.
+ * mpz/oddfac_1.c: New file, mpz_oddfac_1 implementation.
+ * gmp-impl.h: mpz_oddfac_1 declaration.
+ * Makefile.am (MPZ_OBJECTS): add mpz/oddfac_1$U.lo .
+ * mpz/Makefile.am (libmpz_la_SOURCES): add oddfac_1.c .
+ * tune/Makefile.am (fac_ui.c): include mpz/oddfac_1.c .
+
+2012-02-02 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/generic/toom_interpolate_16pts.c: Correct an unlikely 32-bit bug.
+
+2012-02-02 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/generic/toom63_mul.c: Allow s+t==n by adjusting an ASSERT.
+ * mpn/generic/toom_interpolate_8pts.c: Perform final incr iff s+t!=n.
+
+ * tests/mpn/t-toom6h.c (MIN_BN): Make more consistent with ASSERT in
+ tested function.
+
+2012-02-01 Torbjorn Granlund <tege@gmplib.org>
+
+ * tests/mpn/t-mul.c: New file.
+ * tests/mpn/Makefile.am: Compile it.
+
+2012-02-01 Marc Glisse <marc.glisse@inria.fr>
+
+ * gmpxx.h: Remove check for g++ older than 2.91.
+
+2012-02-01 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/generic/mul.c: Added diagram on where toom functions can be
+ called.
+
+2012-02-01 Marc Glisse <marc.glisse@inria.fr>
+
+ * gmpxx.h (__gmp_unary_expr): Make the constructor explicit.
+ (__gmp_expr(__gmp_expr&&)): New move constructors.
+ (__gmp_expr::operator=(__gmp_expr&&)): New move assignments.
+ (swap): Mark as noexcept.
+ (__GMPXX_USE_CXX11): New macro.
+ (__GMPXX_NOEXCEPT): New macro.
+ * tests/cxx/t-cxx11.cc: New file.
+ * tests/cxx/Makefile.am: Added t-cxx11.
+
+2012-01-31 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/generic/powm_sec.c (SQR_BASECASE_LIM): New name for
+ SQR_BASECASE_MAX.
+ (SQR_BASECASE_LIM, fat variant): Define to read __gmpn_cpuvec.
+ (SQR_BASECASE_LIM, native variant): Define to SQR_TOOM2_THRESHOLD
+ straight, without arithmetic.
+ (mpn_local_sqr): Use BELOW_THRESHOLD as per Marco's suggestion.
+
+2012-01-30 Torbjorn Granlund <tege@gmplib.org>
+
+ * tests/mpz/t-powm.c: Ensure all sizes are seen.
+
+2012-01-30 Marc Glisse <marc.glisse@inria.fr>
+
+ * gmpxx.h (__gmp_binary_expr): Let things happen in place: d=a+b+c
+ when d != c.
+ * tests/cxx/t-binary.cc: Test variable reuse: c=a+b+c.
+
+2012-01-28 Marc Glisse <marc.glisse@inria.fr>
+
+ * gmpxx.h: Don't compute -LONG_MIN.
+
+ * doc/gmp.texi (gmp_randclass::get_z_bits): Use mp_bitcnt_t.
+ * gmpxx.h: Replace unsigned long with mp_bitcnt_t.
+
+2012-01-27 Torbjorn Granlund <tege@gmplib.org>
+
+ * Upgrade to libtool 2.4.2.
+
+2012-01-26 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * tests/mpz/t-fac_ui.c: Increase default test cases.
+
+ * mpz/prodlimbs.c: New file, mpz_prodlimbs implementation.
+ * gmp-impl.h: mpz_prodlimbs declaration.
+ * Makefile.am (MPZ_OBJECTS): add mpz/prodlimbs$U.lo .
+ * mpz/Makefile.am (libmpz_la_SOURCES): add prodlimbs.c .
+ (fac_ui.h): remove target (moved up one directory).
+ * mpz/fac_ui.c: mpz_prodlimbs removed, micro-optimisations.
+
+2012-01-25 Torbjorn Granlund <tege@gmplib.org>
+
+ * tune/tuneup.c: Remove unused tuneup variables.
+
+2012-01-20 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpz/fac_ui.c: Reduce branches in basecases.
+
+2012-01-18 Marc Glisse <marc.glisse@inria.fr>
+
+ * doc/gmp.texi (mpf_class::mpf_class): Use mp_bitcnt_t.
+
+2012-01-17 Torbjorn Granlund <tege@gmplib.org>
+
+ * configure.in: Add ultrasparc T4 support.
+
+ * demos/isprime.c (main): Run 25 millerrabin tests.
+
+2012-01-16 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpz/fac_ui.c (SIEVE_SEED): Define value for small limb size.
+ (mpz_oddswing_1): Reduce the number of divisions.
+ (mpz_oddfac_1): Reduce memory usage.
+ * mpn/minithres/gmp-mparam.h: Correct minimum for FAC_DSC_.
+ * tune/tuneup.c (tune_fac_ui): Likewise.
+
+2012-01-15 Niels Möller <nisse@lysator.liu.se>
+
+ * mpz/scan0.c (mpz_scan0): Use ~(mp_bitcnt_t) 0, rather than
+ ULONG_MAX, when returning "infinity".
+ * mpz/scan1.c (mpz_scan1): Likewise.
+
+2012-01-12 Torbjorn Granlund <tege@gmplib.org>
+
+ * tests/t-popc.c: Test longer bit strings.
+
+2012-01-12 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpz/divexact.c: Tight realloc, delayed if variables are reused.
+ * mpz/lcm.c: Smaller temp space, avoid goto.
+ * gmp-impl.h (popc_limb): avoid double & (for 8-bits limb).
+
+2012-01-10 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/minithres/gmp-mparam.h: New FAC_ODD_ and FAC_DSC_ thresholds.
+ * tune/tuneup.c (tune_fac_ui): Correct minimum for FAC_DSC_.
+
+2012-01-07 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpz/mul_2exp.c: Rewrite.
+ * mpz/tdiv_q_2exp.c: Rewrite.
+
+2012-01-05 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * gen-fac_ui.c: Remove currently unused constants; add new odd
+ double factorial table.
+ * mpz/fac_ui.c (RECURSIVE_PROD_THRESHOLD): Increase default.
+ (mpz_oddfac_1): New function: a merge of _bc_odd and _dsc_odd.
+ (mpz_prodlimbs): More in-place computations.
+
+ * tune/tuneup.c (tune_fac_ui): min_is_always for FAC_ODD_.
+
+2012-01-02 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * tune/tuneup.c (tune_fac_ui): Compute FAC_DSC before FAC_ODD.
+
+2011-12-31 Torbjorn Granlund <tege@gmplib.org>
+
+ * Makefile.am (fac_ui.h): Put file in top-level dir, not in mpz.
+
+2011-12-31 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * tune/Makefile.am (fac_ui.c): New target.
+ (nodist_tuneup_SOURCES,CLEANFILES): Add fac_ui.c.
+ * tune/tuneup.c (mpz_fac_ui_tune): Declare prototype.
+ (fac_odd_threshold,fac_dsc_threshold): New global variables.
+ (speed_mpz_fac_ui_tune,tune_fac_ui): New functions.
+ (all): Call tune_fac_ui.
+ * gmp-impl.h (FAC_ODD_THRESHOLD,FAC_DSC_THRESHOLD):
+ New thresholds: default values, and setup for tuning.
+ (FAC_DSC_THRESHOLD_LIMIT): Define (when tuning).
+ * mpz/fac_ui.c (FAC_ODD_THRESHOLD,FAC_DSC_THRESHOLD):
+ Default values removed.
+
+2011-12-30 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpz/hamdist.c: Fix typo in a return statement.
+
+ * mpn/generic/powm_sec.c (SQR_BASECASE_MAX): Set safely from
+ SQR_TOOM2_THRESHOLD.
+
+2011-12-17 Torbjorn Granlund <tege@gmplib.org>
+
+ * tests/mpz/t-perfpow.c: Decrease default # of tests.
+
+2011-12-16 Torbjorn Granlund <tege@gmplib.org>
+
+ * tests/refmpn.c (AORS_1): Fix typo in variable type.
+
+2011-12-10 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/generic/sbpi1_bdiv_q.c: Delay quotient limb stores in order to
+ allow quotient and dividend to completely overlap.
+ * mpn/generic/sbpi1_bdiv_qr.c: Likewise.
+
+2011-12-10 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpz/fac_ui.c: fac_bc_ui inlined in fac_ui.
+
+2011-12-08 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/generic/powm_sec.c: Handle fat binaries better.
+
+ * mpz/fac_ui.c (mpz_bc_fac_1): Fix typo in allocation size.
+
+ * mpn/x86/fat/com.c: New file.
+
+ * mpn/x86_64/pentium4/aors_n.asm: Make it actually work for DOS64.
+ * mpn/x86_64/pentium4/rsh1aors_n.asm: Conditionalise jump on DOS64
+ to avoid overhead for standard ABIs.
+
+ * mpn/x86_64/gcd_1.asm: Support DOS64.
+
+2011-12-07 Torbjorn Granlund <tege@gmplib.org>
+
+ * configure.in: Fix typo making HAVE_NATIVE_mpn_X fail for fat
+ functions.
+
+ * mpn/x86_64/fat/fat.c (__gmpn_cpuvec_init): Add a missing break.
+
+2011-12-07 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * gen-fac_ui.c: Generate two more tables: odd factorial, swing.
+
+ * mpz/fac_ui.c: Rewrite.
+
+2011-12-06 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/generic/hgcd.c (mpn_hgcd): Use hgcd_reduce for first
+ recursive call.
+
+2011-12-06 Torbjorn Granlund <tege@gmplib.org>
+
+ * tune/mod_1_1-1.c: Redefine the mpn_ functions, not __gmpn_ (for the
+ benefit of fat builds).
+ * tune/mod_1_1-2.c: Likewise.
+
+2011-12-05 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86/fat/lshiftc.c: New file.
+ * mpn/x86/fat/mod_1_1.c: New file.
+ * mpn/x86/fat/mod_1_2.c: New file.
+ * mpn/x86/fat/mod_1_4.c: New file.
+
+ * mpn/x86/fat/diveby3.c: Remove no longer fat function.
+ * mpn/x86_64/fat/diveby3.c: Likewise.
+
+ * mpn/x86_64/fat/gcd_1.c: Remove since always provided as asm.
+ * mpn/x86_64/fat/mode1o.c: Likewise.
+
+ * configure.in (fat_functions): Update to more relevant function set.
+ Add special handling for mod_1_N_cps functions.
+ * gmp-impl.h (struct cpuvec_t) : Corresponding changes. Also add
+ vrious declarations for new functions.
+ * mpn/x86/x86-defs.m4 (CPUVEC_FUNCS_LIST): Corresponding changes.
+ * mpn/x86_64/x86_64-defs.m4 (CPUVEC_FUNCS_LIST): Corresponding changes.
+ * mpn/x86/fat/fat.c (__gmpn_cpuvec): Corresponding changes.
+ * mpn/x86_64/fat/fat.c (__gmpn_cpuvec): Corresponding changes.
+
+ * mpn/x86_64: Port most remaining x86_64 files to DOS64.
+
+ * mpn/x86_64/coreisbr/aors_n.asm: Add forgotten DOS64_EXIT.
+
+ * mpn/x86_64/x86_64-defs.m4 (LEA): Handle non-PIC code.
+ * mpn/x86_64/darwin.m4 (LEA): Likewise.
+
+2011-12-04 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/fat/fat.c (MAKE_FMS): Rewrite to handle modern CPUs.
+ * mpn/x86/fat/fat.c (MAKE_FMS): Likewise.
+
+ * mpn/x86_64/darwin.m4 (PROTECT): Define to potentially useful value.
+
+2011-12-02 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/invert_limb_table.asm: Use PROTECT.
+ * mpn/x86_64/invert_limb.asm: Likewise.
+
+ * mpn/x86_64/darwin.m4 (PROTECT, IFELF): New defines.
+ * mpn/x86_64/dos64.m4 (PROTECT, IFELF): New defines.
+ * mpn/x86_64/x86_64-defs.m4 (PROTECT, IFELF): New defines.
+
+2011-12-01 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/fat/fat.c: Copy fake cpuid code from x86/fat/fat.c.
+
+ * mpn/x86_64 (STD64, IFSTD): New names for ELF64, IFELF (since these
+ denote all standard calling conventions).
+
+ * mpn/x86_64: Add DOS64 ABI support to more files.
+
+ * mpn/x86_64/mod_1_1.asm: Finish DOS64 support.
+ * mpn/x86_64/mod_1_2.asm: Likewise.
+ * mpn/x86_64/mod_1_4.asm: Likewise.
+
+ * configure.in: Add GMP_NONSTD_ABI also for fat builds.
+
+ * mpn/x86_64/fat/fat_entry.asm: Rewrite to support DOS64.
+
+ * mpn/x86_64/dos64.m4 (IFDOS, IFSTD): New defines.
+ * mpn/x86_64/x86_64-defs (IFDOS, IFSTD): New defines.
+
+ * mpn/x86_64/dive_1.asm: Add DOS64 ABI support.
+ * mpn/x86_64/mode1o.asm: Likewise.
+
+ * mpn/x86_64/mod_34lsub1.asm: Enable for DOS64.
+
+ * mpn/x86_64/invert_limb.asm: Wrap .protected decl.
+
+ * gmp-impl.h (DECL_divexact_1): Fix typo in return type.
+
+ * mpn/x86_64/dos64.m4 (LEA): New define.
+ (PIC): Define.
+
+2011-11-29 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64: Add DOS64 ABI support to most files.
+
+2011-11-28 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/mul_basecase.asm: Support ABI DOS64.
+ * mpn/x86_64/sqr_basecase.asm: Support ABI DOS64.
+ * mpn/x86_64/aorsmul_1.asm: Support ABI DOS64.
+ * mpn/x86_64/mul_1.asm: Support ABI DOS64.
+
+ * mpn/x86_64/x86_64-defs.m4 (DOS64_ENTRY, DOS64_EXIT): New, empty defs.
+
+ * mpn/x86_64/dos64.m4: New file.
+
+ * mpn/asm-defs.m4 (ABI_SUPPORT): New dummy macro.
+
+ * configure.in (64-bit mingw/cygwin): Define HOST_DOS64,GMP_NONSTD_ABI.
+ No longer clear out path_64.
+ (mpn code selection loop): Handle GMP_NONSTD_ABI.
+
+ * mpn/generic/udiv_w_sdiv.c: Use CNST_LIMB for some constants.
+
+2011-11-25 Torbjorn Granlund <tege@gmplib.org>
+
+ * x86/*: Many new gmp-mparam.h file for 64-bit CPUs in 32-bit mode.
+
+ * configure.in: Overhaul x86/x86_64 support, merging three case
+ statements into one.
+
+2011-11-24 Torbjorn Granlund <tege@gmplib.org>
+
+ * doc/gmp.texi (Formatted Output Strings): Clarify rules for mpf_t
+ precision.
+
+ * mpn/powerpc32/p7/gmp-mparam.h: New file.
+
+ * tune/tuneup.c (tune_mu_div, tune_mu_bdiv): Up min_size to karatsuba's
+ threshold.
+
+2011-11-22 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/powerpc64/mode64/p6/aorsmul_1.asm: New file.
+
+ * configure.in: Don't fail fat builds under 64-bit DOS.
+
+ * mpn/powerpc64/mode64/aors_n.asm: Align loop for slightly better
+ power5 performance.
+
+2011-11-21 Torbjorn Granlund <tege@gmplib.org>
+
+ * gmp-h.in (__GNU_MP_RELEASE): Renamed from typo name.
+
+2011-11-20 Torbjorn Granlund <tege@gmplib.org>
+
+ * configure.in: Split x86 CPUs into more subtypes for more accurate
+ passing of gcc flags.
+
+ * mpn/powerpc32/p3-p7/aors_n.asm: New file.
+
+ * configure.in: Pass -m32 for powerpc64 with abi=32, using via _maybe
+ mechanism.
+
+ * configure.in: Support powerpc32/p3-p7 directory for affected CPUs.
+
+2011-11-17 Torbjorn Granlund <tege@gmplib.org>
+
+ * tune/speed.c (routine): Add mpn_tabselect.
+ * tune/common.c (speed_mpn_tabselect): New function.
+ * tune/speed.h (SPEED_ROUTINE_MPN_COPY_CALL): New macro, made from
+ old SPEED_ROUTINE_MPN_COPY.
+ (SPEED_ROUTINE_MPN_COPY): Just invoke SPEED_ROUTINE_MPN_COPY_CALL.
+ (SPEED_ROUTINE_MPN_TABSELECT): New macro.
+
+2011-11-17 Niels Möller <nisse@lysator.liu.se>
+
+ * tune/tuneup.c (tune_hgcd_appr): Increase stop_since_change.
+
+2011-11-16 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/powerpc32/tabselect.asm: New file.
+
+ * mpn/powerpc64/mode64/aorscnd_n.asm: New file.
+
+2011-11-15 Niels Möller <nisse@lysator.liu.se>
+
+ * tune/speed.h (speed_mpn_hgcd_appr_lehmer): New prototype.
+ (mpn_hgcd_lehmer_itch): Likewise.
+ (mpn_hgcd_appr_lehmer): Likewise.
+ (mpn_hgcd_appr_lehmer_itch): Likewise.
+ (MPN_HGCD_LEHMER_ITCH): Deleted macro.
+
+ * tune/speed.c (routine): Added mpn_hgcd_appr_lehmer.
+
+ * tune/common.c (speed_mpn_hgcd_lehmer): Use mpn_hgcd_lehmer_itch
+ rather than similarly named macro.
+ (speed_mpn_hgcd_appr_lehmer): New function.
+
+ * tune/Makefile.am (libspeed_la_SOURCES): Added
+ hgcd_appr_lehmer.c.
+
+ * tune/hgcd_appr_lehmer.c: New file.
+
+ * tune/tuneup.c (tune_hgcd_appr): Increased min_size to 50; some
+ machines got small thresholds which appear to be bogus.
+
+2011-11-15 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/generic/powm_sec.c (mpn_local_sqr): Remove forgotten TMP_* calls.
+ (redcify): Likewise.
+ (mpn_powm_sec): Likewise.
+
+ * mpn/generic/powm_sec.c (mpn_powm_sec): Rework scratch usage
+ (mpn_powm_sec_itch): Rewrite.
+
+ * mpn/generic/powm_sec.c (mpn_powm_sec): Use mpn_tabselect also in
+ initialisation.
+
+ * configure.in: Amend 2011-11-03 gcc_cflags change.
+
+ * mpn/powerpc64/tabselect.asm: New file.
+ * mpn/x86_64/tabselect.asm: New file.
+ * mpn/x86/tabselect.asm: New file.
+ * mpn/ia64/tabselect.asm: New file.
+
+ * mpn/asm-defs.m4 (define_mpn): Add tabselect.
+
+ * configure.in (gmp_mpn_functions): Add tabselect.
+ (HAVE_NATIVE): Add entries for addncd_n, subcnd_n, tabselect.
+
+ * mpn/generic/powm_sec.c: Remove mpn_tabselect implementation.
+ * mpn/generic/tabselect.c: New file with removed code.
+
+2011-11-13 Torbjorn Granlund <tege@gmplib.org>
+
+ * tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Add powm_sec.c.
+
+ * mpn/generic/powm_sec.c (win_size): Use POWM_SEC_TABLE
+ (POWM_SEC_TABLE): Define default.
+
+ * tune/tuneup.c (tune_powm_sec): New function computing POWM_SEC_TABLE.
+ (all): Call new function.
+
+ * mpn/generic/powm_sec.c (win_size): Define only when
+ TUNE_PROGRAM_BUILD is not set.
+
+2011-11-13 Niels Möller <nisse@lysator.liu.se>
+
+ * tune/tuneup.c (tune_hgcd_appr): Use default min_size.
+ (tune_hgcd_reduce): Increase max_size and step_factor, to 7000
+ and 0.04, respectively.
+
+2011-11-11 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/powerpc64/mode64/sqr_diag_addlsh1.asm: Remove.
+
+2011-11-11 Niels Möller <nisse@lysator.liu.se>
+
+ * tune/hgcd_reduce_2.c: New file.
+ * tune/hgcd_reduce_1.c: New file.
+
+ * tune/tuneup.c (hgcd_appr_threshold): New threshold variable.
+ (hgcd_reduce_threshold): Likewise.
+ (tune_hgcd_appr): New function.
+ (tune_hgcd_reduce): New function.
+ (all): Call tune_hgcd_appr and tune_hgcd_reduce.
+
+ * tune/speed.h (speed_mpn_hgcd_reduce): Declaration.
+ (speed_mpn_hgcd_reduce_[12]): Likewise.
+ (mpn_hgcd_reduce_[12]): Likewise.
+ (SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL): New macro.
+
+ * tune/speed.c (routine): Added mpn_hgcd_reduce,
+ mpn_hgcd_reduce_1, and mpn_hgcd_reduce_2.
+
+ * tune/common.c (speed_mpn_hgcd_reduce): New function.
+ (speed_mpn_hgcd_reduce_[12]): Likewise.
+
+ * tune/Makefile.am (libspeed_la_SOURCES): Added hgcd_reduce_1.c
+ hgcd_reduce_2.c.
+ (TUNE_MPN_SRCS_BASIC): Added hgcd_appr.c and hgcd_reduce.c.
+
+ * mpn/generic/hgcd_appr.c (submul, hgcd_matrix_apply): Deleted
+ functions, earlier copied to hgcd_reduce.c.
+ (mpn_hgcd_appr): Use hgcd_reduce.
+
+2011-11-09 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/powerpc64/mode64/sqr_basecase.asm: New file.
+
+ * mpn/x86_64/aorscnd_n.asm: New file.
+
+ * tune/speed.c (routine): Add measuring of mpn_addcnd_n, mpn_subcnd_n.
+ * tune/common.c (speed_mpn_addcnd_n,speed_mpn_subcnd_n): New functions.
+ * tune/speed.h: Declare them.
+
+ * tests/devel/try.c: Add tests for mpn_addcnd_n and mpn_subcnd_n.
+ * tests/refmpn.c (refmpn_addcnd_n, refmpn_subcnd_n): New functions.
+ * tests/tests.h: Declare them.
+
+ * configure.in (gmp_mpn_functions): Add addcnd_n and subcnd_n.
+
+2011-11-07 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/generic/redc_1.c: Just reduce U operand using Hensel norm, but
+ not fully canonically; leave add_n and conditional sub_n to caller.
+ Therefore omit R argument.
+
+ * mpn/generic/redc_1_sec.c: Remove.
+
+ * gmp-impl.h (mpn_redc_1): Update declaration.
+ (mpn_redc_1_sec): Remove declaration.
+
+ * configure.in (gmp_mpn_functions): Remove redc_1.
+
+ * mpn/x86_64/redc_1.asm: Adopt to new defined functionality/interface.
+ * tune/speed.h (SPEED_ROUTINE_REDC_1): Likewise.
+
+ * tests/refmpn.c (refmpn_redc_1): Likewise; also call refmpn_addmul_1
+ instead of mpn_addmul_1.
+
+ * mpn/generic/powm.c (MPN_REDC_1): New macro, use for mpn_redc_1.
+ * mpn/generic/powm_sec.c (MPN_REDC_1_SEC): New macro, use for
+ mpn_redc_1_sec.
+
+2011-11-03 Torbjorn Granlund <tege@gmplib.org>
+
+ * dumbmp.c (mpz_sub): Abort for non-handled case.
+
+ * mpn/powerpc64/mode64/lshiftc.asm: Move file from here...
+ * mpn/powerpc64/lshiftc.asm: ...to here, with trivial modifications.
+
+ * configure.in: Pass -m32 in more cases, using _maybe mechanism.
+ Inherit default gcc_cflags in more places.
+
+ * mpn/powerpc64/mode64/p7/gmp-mparam.h: New file.
+
+2011-11-02 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/s390_64/invert_limb.asm: Slight optimisation.
+
+ * configure.in (s390): Set gcc_32_cflags_maybe.
+
+ * mpn/s390_32/gmp-mparam.h: Put in proper data.
+ * mpn/s390_32/esame/gmp-mparam.h: New file.
+
+ * mpn/x86_64/bobcat/gmp-mparam.h: New file.
+
+ * mpn/s390_32/lshift.asm: New file.
+ * mpn/s390_32/rshift.asm: New file.
+ * mpn/s390_32/lshiftc.asm: New file.
+
+2011-10-31 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/powerpc64/sqr_diagonal.asm: Move from here...
+ * mpn/powerpc64/mode32/sqr_diagonal.asm: ...to here.
+
+ * mpn/powerpc64/mode64/sqr_diag_addlsh1.asm: New file.
+
+ * mpn/s390_64/sqr_basecase.asm: Rewrite sqr_diag_addlsh1 code.
+ * mpn/s390_32/esame/sqr_basecase.asm: Likewise.
+
+2011-10-29 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/s390_64/lshift.asm: Complete rewrite.
+ * mpn/s390_64/rshift.asm: Likewise.
+
+ * mpn/s390_64/lshiftc.asm: New file.
+
+2011-10-28 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/s390_32/esame/aors_n.asm: New file, with rewritten add/sub code.
+
+2011-10-27 Torbjorn Granlund <tege@gmplib.org>
+
+ From Per Olofsson:
+ * gmp-impl.h (BSWAP_LIMB): Rename variable to avoid BSWAP_LIMB_FETCH
+ clash.
+
+ * mpn/s390_32/esame/mul_basecase.asm: New file.
+
+ * mpn/s390_32/esame/sqr_basecase.asm: New file.
+
+ * mpn/s390_32/logops_n.asm: New file.
+
+ * mpn/s390_64/logops_n.asm: Fix rp=up code. Remove a leftover insn.
+
+2011-10-26 Niels Möller <nisse@lysator.liu.se>
+
+ * gmp-impl.h (mpn_hgcd_reduce, mpn_hgcd_reduce_itch): Added
+ prototypes.
+ (HGCD_APPR_THRESHOLD): Set up threshold for tuning.
+ (HGCD_REDUCE_THRESHOLD): Likewise.
+
+ * configure.in (gmp_mpn_functions): Added hgcd_reduce.
+
+ * mpn/generic/hgcd_reduce.c: New file.
+
+2011-10-24 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/sqr_basecase.asm: Put intermediate result into R, don't
+ allocate any stack space.
+
+2011-10-23 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/s390_64/logops_n.asm: Use nc, oc, xc when possible.
+
+ * tune/common.c (speed_mpn_and_n, speed_mpn_andn_n, etc):
+ Pass correct input args.
+
+ * mpn/s390_64/mod_34lsub1.asm: Use llgfr for zero extensions.
+
+ * mpn/s390_64/mul_basecase.asm: New file.
+
+ * mpn/s390_64/sqr_basecase.asm: New file.
+ * mpn/s390_64/sqr_diag_addlsh1.asm: Removed, lives on in sqr_basecase.
+
+ * mpn/s390_64/bdiv_dbm1c.asm: Shave off 1 c/l.
+
+ * mpn/s390_64/aorrlsh1_n.asm: New file, developed from aorslsh1_n.asm.
+ * mpn/s390_64/sublsh1_n.asm: New file.
+ * mpn/s390_64/aorslsh1_n.asm: Remove file.
+
+2011-10-22 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/s390_64/logops_n.asm: New file.
+
+ * mpn/s390_64/aors_n.asm: New file, with rewritten add/sub code.
+
+2011-10-20 Torbjorn Granlund <tege@gmplib.org>
+
+ * tune/speed.h (SPEED_ROUTINE_MPN_SQR_DIAL_ADDLSH1_CALL): New macro.
+ * tune/common.c (speed_mpn_sqr_diag_addlsh1): New function.
+ * tune/speed.c (routine): Measure mpn_sqr_diag_addlsh1.
+
+ * mpn/s390_64/sqr_diag_addlsh1.asm: Rewrite like s390_32/esame code.
+
+ * mpn/s390_32/esame/sqr_diag_addlsh1.asm: Save just needed registers.
+
+2011-10-19 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/s390_32/esame/add_n.asm: Rewrite, similar to s390_64 code.
+ * mpn/s390_32/esame/add_n.asm: Likewise.
+
+2011-10-17 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/s390_32/esame/aorslsh1_n.asm: New file.
+
+2011-10-16 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/s390_32/esame/sqr_diag_addlsh1.asm: New file.
+
+ * mpn/s390_32/copyi.asm: New file.
+ * mpn/s390_32/copyd.asm: New file.
+
+ * mpn/s390_64/copyd.asm: Optimise.
+
+ * mpn/s390_64/copyi.asm: Rewrite along the lines of glibc memcpy.
+
+ * mpn/s390_64/aorslsh1_n.asm: New file.
+
+ * mpn/s390_64/mod_34lsub1.asm: New file.
+
+ * mpn/s390_64/sqr_diag_addlsh1.asm: New file.
+
+2011-10-15 Torbjorn Granlund <tege@gmplib.org>
+
+ * configure.in (s390): Rewrite support to handle known CPUs.
+ * config.guess: Recognise s390 CPUs.
+ * config.sub: Match s390 CPUs.
+ * acinclude.m4 (S390_PATTERN, S390X_PATTERN): New defines.
+
+2011-10-14 Torbjorn Granlund <tege@gmplib.org>
+
+ From Per Olofsson:
+ * mpn/generic/popham.c: Add __GMP_NOTHROW to make it match gmp.h.
+ * mpn/generic/gcd_1.c: Separate declarations and initialisers for the
+ benefit of C++.
+
+ * configure.in: AC_DEFINE HAVE_HOST_CPU_s390_zarch.
+ * longlong.h (s390): Use it.
+ (s390 umul_ppmm): Fix typo in pure C variant.
+
+2011-10-13 Torbjorn Granlund <tege@gmplib.org>
+
+ * longlong.h (s390): Put back an accidentally deleted #else.
+
+ * configure.in (s390): Unset extra_functions for s390x.
+
+2011-10-12 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/s390_64/lshift.asm: Reduce register usage.
+ * mpn/s390_64/rshift.asm: Likewise.
+
+ * longlong.h (s390 umul_ppmm): With new-enough gcc, avoid asm.
+
+ From Andreas Krebbel:
+ * longlong.h (s390 umul_ppmm): Support 32-bit limbs with gcc using
+ 64-bit registers.
+ (s390 udiv_qrnnd): Likewise.
+
+2011-10-11 Torbjorn Granlund <tege@gmplib.org>
+
+ * configure.in (s390x): Pass -mzarch to gcc in 32-bit mode.
+
+ * longlong.h (s390x): Add __CLOBBER_CC for relevant asm patterns.
+ * mpn/generic/mod_1_1.c (s390x add_mssaaaa): Likewise.
+
+ * mpn/s390_64/copyd.asm: New file.
+
+2011-10-10 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/generic/hgcd_appr.c: Deleted debugging code.
+
+ * tests/mpn/t-hgcd_appr.c (main): Added -v flag.
+ (hgcd_appr_valid_p): Increased margin of non-minimality for
+ divide-and-conquer algorithm. Display bit counts only if
+ -v is used.
+
+ * mpn/generic/hgcd_appr.c (submul): New (static) function.
+ (hgcd_matrix_apply): New function.
+ (mpn_hgcd_appr_itch): Account for divide-and-conquer algorithm.
+ (mpn_hgcd_appr): Implemented divide-and-conquer.
+
+2011-10-10 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/generic/mod_1_1.c (add_mssaaaa): Add s390x variant. Put arm code
+ inside __GNUC__.
+
+ * tune/time.c (STCK): Use proper memory constraint.
+
+ From Marco Trudel:
+ * tests/mpz/t-scan.c (check_ref): Fix loop end bound.
+
+2011-10-10 Niels Möller <nisse@lysator.liu.se>
+
+ * gmp-impl.h: (HGCD_APPR_THRESHOLD): New threshold.
+
+ * mpn/generic/hgcd_appr.c (mpn_hgcd_appr): Interface change.
+ Destroy inputs, let caller make working copies if needed.
+ (mpn_hgcd_appr_itch): Reduced scratch need.
+ * gmp-impl.h: Updated mpn_hgcd_appr prototype.
+ * tests/mpn/t-hgcd_appr.c (one_test): Make working copies for
+ hgcd_appr.
+ * tune/common.c (speed_mpn_hgcd_appr): Use SPEED_ROUTINE_MPN_HGCD_CALL.
+ * tune/speed.h (SPEED_ROUTINE_MPN_HGCD_APPR_CALL): Deleted.
+
+2011-10-09 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/s390_64/copyi.asm: New file.
+ * mpn/s390_64/lshift.asm: New file.
+ * mpn/s390_64/rshift.asm: New file.
+
+ * mpn/s390_64/add_n.asm: Rewrite using lmg/stmg.
+ * mpn/s390_64/sub_n.asm: Likewise.
+
+ * mpn/s390_64/invert_limb.asm: Save a callee-saves register less.
+
+ * tune/time.c (getrusage_backwards_p): Properly cast printed values.
+
+ * longlong.h (s390x): Put back UDItype casts to make gcc reloading use
+ right more for constants.
+ (s390x count_leading_zeros): Disable until we support z10 specifically.
+ (s390x add_ssaaaa): Remove algsi/slgsi until we support z10.
+
+2011-10-09 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/generic/hgcd_matrix.c (mpn_hgcd_matrix_adjust): Declare
+ matrix argument const.
+
+2011-10-08 Niels Möller <nisse@lysator.liu.se>
+
+ * tests/mpn/t-hgcd_appr.c (hgcd_appr_valid_p): Adjusted the
+ allowed margin of non-minimality for hgcd_appr.
+
+ * mpn/generic/hgcd_appr.c (mpn_hgcd_appr): Fixed handling of
+ extra_bits, starting at zero, to ensure that we don't produce too
+ small remainders. Added a final reduction loop when we we
+ otherwise terminate with extra_bits > 0, to make the returned
+ remainders closer to minimal.
+
+2011-10-07 Torbjorn Granlund <tege@gmplib.org>
+
+ * longlong.h (s390): Add 32-bit zarch umul_ppmm and udiv_qrnnd.
+ (s390): Overhaul 32-bit and 64-bit code.
+
+2011-10-07 Niels Möller <nisse@lysator.liu.se>
+
+ * tune/speed.h (speed_mpn_hgcd_appr): New prototype.
+ (SPEED_ROUTINE_MPN_HGCD_APPR_CALL): New macro.
+ * tune/common.c (speed_mpn_hgcd_appr): New function.
+ * tune/speed.c (routine): Added mpn_hgcd_appr.
+
+ * tests/mpn/t-hgcd_appr.c: New file.
+ * tests/mpn/Makefile.am (check_PROGRAMS): Added t-hgcd_appr.
+
+ * configure.in (gmp_mpn_functions): Added hgcd_step and hgcd_appr.
+
+ * gmp-impl.h: Added prototypes for mpn_hgcd_step,
+ mpn_hgcd_appr_itch and mpn_hgcd_appr.
+
+ * mpn/generic/hgcd_appr.c: New file.
+
+ * mpn/generic/hgcd_step.c: New file, extracted from hgcd.c.
+ (mpn_hgcd_step): Renamed, from...
+ * mpn/generic/hgcd.c (hgcd_step): ...old name. Renamed and moved
+ to hgcd_step.c.
+ (hgcd_hook): Also moved to hgcd_step.c.
+ (mpn_hgcd): Updated for hgcd_step renaming.
+
+2011-10-06 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/s390_64/invert_limb.asm: New file.
+
+2011-10-04 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/s390_64/submul_1.asm: New file.
+ * mpn/s390_32/esame/submul_1.asm: New file.
+
+ * mpn/generic/mulmid.c (mpn_mulmid): Move a TMP_DECL to block start.
+
+ * mpn/Makefile.am (TARG_DIST): Add s390_32 and s390_64, remove s390 and
+ z8000x.
+
+ * doc/gmp.texi (Custom Allocation): Rephrase a paragraph.
+
+ * demos/factorize.c: Run 25 Miller-Rabin tests.
+
+ * mpz/nextprime.c: Run 25 mpz_millerrabin tests (was 10).
+
+2011-10-03 Torbjorn Granlund <tege@gmplib.org>
+
+ * configure.in: Support s390x.
+
+ * longlong.h: Add support for 64-bit s390x.
+
+ * mpn/s390_64: New directory.
+ * mpn/s390_64/add_n.asm: New file.
+ * mpn/s390_64/sub_n.asm: New file.
+ * mpn/s390_64/mul_1.asm: New file.
+ * mpn/s390_64/addmul_1.asm: New file.
+ * mpn/s390_64/bdiv_dbm1c.asm: New file.
+ * mpn/s390_64/gmp-mparam.h: New file, taken from x86_64.
+
+ * mpn/s390_32: Directory renamed from mpn/s390.
+ * mpn/s390_32/gmp-mparam.h: New file, taken from x86_64.
+ * mpn/s390_32/esame/add_n.asm: New file.
+ * mpn/s390_32/esame/sub_n.asm: New file.
+ * mpn/s390_32/esame/mul_1.asm: New file.
+ * mpn/s390_32/esame/addmul_1.asm: New file.
+ * mpn/s390_32/esame/bdiv_dbm1c.asm: New file.
+
+2011-10-03 Niels Möller <nisse@lysator.liu.se>
+
+ * tests/mpn/Makefile.am (check_PROGRAMS): Added t-mulmid.
+ * tests/mpn/t-mulmid.c: New file.
+
+ mulmid-related assembly for x86_64, from David Harvey:
+ * mpn/asm-defs.m4 (define_mpn): Added [add,sub]_err[1,2,3]_n and
+ mulmid_basecase. Also use m4_not_for_expansion on the
+ corresponding OPERATION_* symbols.
+ * mpn/x86_64/aors_err1_n.asm: New file.
+ * mpn/x86_64/aors_err2_n.asm: Likewise.
+ * mpn/x86_64/aors_err3_n.asm: Likewise.
+ * mpn/x86_64/mulmid_basecase.asm: Likewise.
+ * mpn/x86_64/core2/aors_err1_n.asm: Likewise.
+ * mpn/x86_64/gmp-mparam.h (MULMID_TOOM42_THRESHOLD): New value.
+ * mpn/x86_64/core2/gmp-mparam.h (MULMID_TOOM42_THRESHOLD): Likewise.
+
+ Tuning of mulmid, from David Harvey:
+ * tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Added mulmid.c
+ mulmid_n.c toom42_mulmid.c.
+ * tune/speed.h: Prototypes for mulmid-related functions.
+ (struct speed_params): Increased max number of sources to 5.
+ (SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL): New macro.
+ (SPEED_ROUTINE_MPN_BINARY_ERR1_N): Likewise.
+ (SPEED_ROUTINE_MPN_BINARY_ERR2_N): Likewise.
+ (SPEED_ROUTINE_MPN_BINARY_ERR3_N): Likewise.
+ (SPEED_ROUTINE_MPN_MULMID): Likewise.
+ (SPEED_ROUTINE_MPN_MULMID_N): Likewise.
+ (SPEED_ROUTINE_MPN_TOOM42_MULMID): Likewise.
+ * tune/common.c (mpn_[add,sub]_err[1,2,3]_n): New functions.
+ (speed_mpn_mulmid_basecase): New function.
+ (speed_mpn_mulmid): New function.
+ (speed_mpn_mulmid_n): New function.
+ (speed_mpn_toom42_mulmid): New function.
+ * tune/speed.c (routine): Added mpn_[add,sub]_err[1,2,3]_n,
+ mpn_mulmid_basecase, mpn_toom42_mulmid, mpn_mulmid_n, and
+ mpn_mulmid.
+ * tune/tuneup.c (mulmid_toom42_threshold): New threshold variable.
+ (tune_mulmid): New function.
+ (all): Call tune_mulmid.
+
+ Testing of mulmid, from David Harvey:
+ * tests/refmpn.c (AORS_ERR1_N): New macro.
+ (refmpn_add_err1_n, refmpn_sub_err1_n): New functions.
+ (AORS_ERR2_N): New macro.
+ (refmpn_add_err2_n, refmpn_sub_err2_n): New functions.
+ (AORS_ERR3_N): New macro.
+ (refmpn_add_err3_n, refmpn_sub_err3_n): New functions.
+ (refmpn_mulmid_basecase): New function.
+ (refmpn_toom42_mulmid): New function, wrapper for
+ refmpn_mulmid_basecase.
+ (refmpn_mulmid_n): Likewise.
+ (refmpn_mulmid): Likewise.
+ * tests/tests.h: Prototypes for new functions.
+ * tests/devel/try.c (NUM_SOURCES): Increased to 5.
+ (struct try_t): Use NUM_SOURCES and NUM_DESTS constants.
+ (SIZE_4, SIZE_6, SIZE_DIFF_PLUS_3, SIZE_ODD): New constants.
+ (OVERLAP_NOT_DST2): New flag.
+ (param_init): New mulmid-related operation types.
+ (mpn_toom42_mulmid_fun): New function.
+ (choice_array): Added mulmid-related entries.
+ (overlap_array): Extended for larger NUM_SOURCES.
+ (OVERLAP_COUNT): Handle OVERLAP_NOT_DST2.
+ (call): Support mulmid-related functions.
+ (pointer_setup): Handle SIZE_4, SIZE_6, and SIZE_DIFF_PLUS_3.
+ (SIZE_ITERATION): Handle SIZE_ODD.
+ (SIZE2_FIRST): Handle SIZE_CEIL_HALF.
+ (SIZE2_LAST): Likewise.
+
+ Implementation of mulmid, from David Harvey:
+ * mpn/generic/add_err1_n.c (mpn_add_err1_n): New file and function.
+ * mpn/generic/add_err2_n.c (mpn_add_err2_n): Likewise.
+ * mpn/generic/add_err3_n.c (mpn_add_err3_n): Likewise.
+ * mpn/generic/sub_err1_n.c (mpn_sub_err1_n): Likewise.
+ * mpn/generic/sub_err2_n.c (mpn_sub_err2_n): Likewise.
+ * mpn/generic/sub_err3_n.c (mpn_sub_err3_n): Likewise.
+ * mpn/generic/mulmid_basecase.c (mpn_mulmid_basecase): Likewise.
+ * mpn/generic/mulmid_n.c (mpn_mulmid_n): Likewise.
+ * mpn/generic/toom42_mulmid.c (mpn_toom42_mulmid): Likewise.
+ * configure.in (gmp_mpn_functions): Added mulmid-related
+ functions.
+ (GMP_MULFUNC_CHOICES): Handle aors_err1_n, aors_err2_n, and
+ aors_err3_n.
+ * gmp-impl.h: Added prototypes for mulmid functions.
+ (MPN_TOOM42_MULMID_MINSIZE): New constant.
+ (MULMID_TOOM42_THRESHOLD): New threshold.
+ (mpn_toom42_mulmid_itch): New macro.
+
+2011-10-03 Niels Möller <nisse@lysator.liu.se>
+
+ * tune/tune-gcd-p.c (main): Fixed broken loop conditions.
+
+2011-09-26 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/sh/sh2/submul_1.asm: Make this old submul_1 implementation
+ actually compute intended function.
+
+ * longlong.h (SH): Recognise predefs for all SH processors as defined
+ by current gcc versions.
+
+2011-09-25 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/sh: Migrate files to '.asm'.
+ * configure.in: Recognise sh3 and sh4.
+
+2011-09-21 Marc Glisse <marc.glisse@inria.fr>
+
+ * gmpxx.h (mpz_class::swap): New function.
+ (mpq_class::swap): Likewise.
+ (mpf_class::swap): Likewise.
+ (swap): New function.
+ * tests/cxx/t-assign.cc: Test the above.
+ * doc/gmp.texi (swap): Document the above.
+
+2011-08-21 Marc Glisse <marc.glisse@inria.fr>
+
+ * tests/cxx/t-ops2.cc: check mul-div by 2.
+
+ * gmpxx.h (__GMPXX_CONSTANT): New macro (__builtin_constant_p).
+ (__gmp_binary_lshift): Move before multiplication. Optimize x << 0.
+ (__gmp_binary_rshift): Move before division. Optimize x >> 0.
+ (__gmp_binary_plus): Optimize x + 0. Rewrite rational + integer.
+ (__gmp_binary_minus): Optimize x - 0 and 0 - x.
+ Rewrite rational - integer.
+ (__gmp_binary_multiplies): Optimize x * 2^n.
+ (__gmp_binary_divides): Optimize x / 2^n.
+ (__gmp_binary_*): Deduplicate code for symmetric operations.
+
+2011-08-18 Torbjorn Granlund <tege@gmplib.org>
+
+ * printf/doprntf.c (__gmp_doprnt_mpf): For DOPRNT_CONV_FIXED, ask for
+ one more digit.
+
+2011-08-17 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpf/sub.c: Fix typo in copy condition. Delay an allocation.
+
+2011-08-12 Torbjorn Granlund <tege@gmplib.org>
+
+ * gmp-impl.h (LIMBS_PER_DIGIT_IN_BASE): Fix typo.
+
+2011-08-10 Torbjorn Granlund <tege@gmplib.org>
+
+ * gmp-impl.h (DIGITS_IN_BASEGT2_FROM_BITS): New.
+ (DIGITS_IN_BASE_FROM_BITS): Compute more accurate result.
+ (MPN_SIZEINBASE): Use DIGITS_IN_BASEGT2_FROM_BITS.
+
+ * tests/rand/t-lc2exp.c (check_bigc): Call abort after reporting error.
+
+2011-08-09 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpz/out_str.c (mpz_out_str): Reinsert accidentally deleted str_size
+ adjustment.
+
+ * gmp-impl.h (DIGITS_IN_BASE_FROM_BITS): Simplify, also avoiding
+ overflow for base 2.
+
+2011-08-07 Torbjorn Granlund <tege@gmplib.org>
+
+ * gmp-impl.h (struct bases): Add log2b and logb2 field, remove
+ chars_per_limb_exactly field.
+ (DIGITS_IN_BASE_FROM_BITS): New.
+ (DIGITS_IN_BASE_PER_LIMB): New.
+ (LIMBS_PER_DIGIT_IN_BASE): New.
+ * gen-bases.c: Generate log2b and logb2 fields; do not generate
+ chars_per_limb_exactly field.
+ * mpf/get_str.c mpf/out_str.c mpf/set_str.c mpn/generic/get_str.c
+ mpn/generic/sizeinbase.c mpq/get_str.c mpz/inp_str.c mpz/out_str.c
+ mpz/set_str.c printf/doprntf.c tune/speed.h tune/tuneup.c:
+ Use new macros.
+
+2011-08-04 Torbjorn Granlund <tege@gmplib.org>
+
+ * dumbmp.c (mpz_root): Reinsert accidentally removed line.
+
+2011-08-03 Torbjorn Granlund <tege@gmplib.org>
+
+ * dumbmp.c (mpz_tdiv_qr): Correctly handle dividend value being equal
+ to divisor value.
+ (mpz_root): Create reasonable starting approximation.
+ (mpz_sqrt): New function.
+ (mpz_mul_2exp): Add faster block shifting code, disabled for now.
+
+2011-07-15 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/arm/invert_limb.asm: Swap around some registers to silence 'as'
+ warnings.
+
+2011-07-14 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/generic/dcpi1_bdiv_q.c (mpn_dcpi1_bdiv_q): Get mpn_sub_1 size
+ argument right.
+
+2011-07-04 Torbjorn Granlund <tege@gmplib.org>
+
+ * tests/misc/t-locale.c: Disable test for mingw.
+
+ * configure.in (x86_64 *-*-mingw*): Handle also cygwin here; clear out
+ extra_functions_64.
+
+2011-07-02 Torbjorn Granlund <tege@gmplib.org>
+
+ * config.guess: Don't print newline in x86 cpuid function.
+ Rewrite x86-64 cpu recognition asm code to work under Windoze.
+
+2011-06-16 Torbjorn Granlund <tege@gmplib.org>
+
+ * acinclude.m4 (GMP_ASM_RODATA): Fix typo in 2011-04-20 change.
+
+ * configure.in: Surround tr ranges with [] for portability.
+
+2011-05-25 Niels Möller <nisse@lysator.liu.se>
+
+ * tune/tune-gcd-p.c (search): New function to search for minimum.
+ (main): Replaced slow linear search.
+
+2011-05-24 Niels Möller <nisse@lysator.liu.se>
+
+ * tune/Makefile.am (EXTRA_PROGRAMS): Added tune-gcd-p. Also added
+ related automake variables.
+
+ * mpn/Makefile.am (tune-gcd-p): Deleted target.
+
+ * tune/tune-gcd-p.c: New file, extracted from mpn/generic/gcd.c
+ and updated.
+ * mpn/generic/gcd.c: Deleted the corresponding code, including
+ main function.
+
+2011-05-23 Niels Möller <nisse@lysator.liu.se>
+
+ * mpz/jacobi.c (mpz_jacobi): Simplied by swapping operands when
+ needed, to get asize >= bsize. Use the reciprocity law generalized
+ to work when one operand is even.
+
+2011-05-22 Niels Möller <nisse@lysator.liu.se>
+
+ * mpz/jacobi.c (mpz_jacobi): Another bugfix for the asize == 1
+ case. Sometimes, powers of two in b were taken into account twice.
+
+2011-05-21 Niels Möller <nisse@lysator.liu.se>
+
+ * mpz/jacobi.c (mpz_jacobi): The handling of asize == 1 was
+ broken. Rewrote it.
+
+ * tests/mpz/t-jac.c (mpz_nextprime_step): Sanity check that prime
+ candidate and step has no common factor.
+ (check_data): Added some test cases related to the asize == 1 case
+ in mpz_jacobi.
+
+2011-05-20 Niels Möller <nisse@lysator.liu.se>
+
+ * gmp-impl.h: Jacobi-related prototypes.
+
+ * configure.in (gmp_mpn_functions): Added jacobi_2, jacobi,
+ hgcd2_jacobi, hgcd_jacobi, and removed jacobi_lehmer.
+
+ * mpz/jacobi.c (STRIP_TWOS): Deleted macro.
+ (mpz_jacobi): Partially rewritten, to no longer makes the A
+ operand odd. Use new mpn_jacobi_n.
+
+ * mpn/generic/jacobi_lehmer.c: Deleted file.
+
+ * mpn/generic/jacobi.c (mpn_jacobi_n): New subquadratic jacobi
+ implementation. Supersedes jacobi_lehmer.c.
+
+ * mpn/generic/hgcd_jacobi.c (mpn_hgcd_jacobi): New file and
+ function. A copy of mpn_hgcd, using mpn_hgcd2_jacobi, and with calls to
+ mpn_jacobi_update when appropriate.
+
+ * mpn/generic/jacobi_2.c (mpn_jacobi_2): New file. Extracted from
+ jacobi_lehmer.c.
+ * mpn/generic/hgcd2_jacobi.c (mpn_hgcd2_jacobi): Likewise.
+
+ * mpn/generic/hgcd.c (hgcd_hook): Avoid using NULL.
+
+2011-05-19 Niels Möller <nisse@lysator.liu.se>
+
+ * tune/hgcd_lehmer.c (__gmpn_hgcd_itch): Don't rename symbols for
+ the functions moved to hgcd_matrix.c.
+
+ * configure.in (gmp_mpn_functions): Added hgcd_matrix.
+
+ * mpn/generic/hgcd.c (hgcd_matrix_update_1): Deleted. Several other
+ helper functions moved to hgcd_matrix.c, see below.
+ (hgcd_hook): New function.
+ (hgcd_step): Simplified, using mpn_gcd_subdiv_step and hgcd_hook.
+
+ * mpn/generic/hgcd_matrix.c: New file.
+ (mpn_hgcd_matrix_init): Moved here, from hgcd.c.
+ (mpn_hgcd_matrix_update_q): Likewise.
+ (mpn_hgcd_matrix_mul_1): Likewise.
+ (mpn_hgcd_matrix_mul): Likewise.
+ (mpn_hgcd_matrix_adjust): Likewise.
+
+ * mpn/generic/gcd_subdiv_step.c (mpn_gcd_subdiv_step): New
+ argument s, for use by hgcd.
+ * gmp-impl.h (mpn_gcd_subdiv_step): Update declaration.
+
+ * mpn/generic/gcd.c (mpn_gcd): Pass s = 0 to mpn_gcd_subdiv_step.
+ * mpn/generic/gcdext.c (mpn_gcdext): Likewise. Also added an ASSERT.
+ * mpn/generic/gcdext_lehmer.c (mpn_gcdext_lehmer_n): Likewise.
+ (mpn_gcdext_hook): Added some ASSERTs.
+ * mpn/generic/jacobi_lehmer.c (mpn_jacobi_lehmer): Likewise.
+
+2011-05-17 Niels Möller <nisse@lysator.liu.se>
+
+ * doc/gmp.texi (mpn_gcd, mpn_gcdext): Document input requirements:
+ Must have un >= vn > 0, and V normalized.
+ * mpn/generic/gcdext.c (mpn_gcdext): Added ASSERT for input
+ normalization.
+ * mpn/generic/gcd.c (mpn_gcd): Added ASSERTs for input
+ requirements.
+
+2011-05-15 Marc Glisse <marc.glisse@inria.fr>
+
+ * gmpxx.h (operator<<): Dedup.
+ * tests/cxx/t-iostream.cc: Test on compound types.
+
+ * gmpxx.h (__gmp_binary_expr): Let things happen in place: c=(a+b)/2.
+
+2011-05-10 Marc Glisse <marc.glisse@inria.fr>
+
+ * gmpxx.h (__gmp_unary_expr): Let things happen in place: c=-(a+b).
+ (operator>>): Clean the commenting out.
+ * tests/cxx/t-iostream.cc: New file.
+ * tests/cxx/Makefile.am: Added t-iostream.
+
+2011-05-10 Niels Möller <nisse@lysator.liu.se>
+
+ * doc/gmp.texi (mpz_gcd): Document that gcd(0,0) = 0.
+ (mpz_gcdext): Document range for cofactors.
+
+2011-05-09 Niels Möller <nisse@lysator.liu.se>
+
+ * mpz/gcdext.c (mpz_gcdext): Increased sp allocation to bsize+1 limbs.
+ * doc/gmp.texi (mpn_gcdext): Fixed documentation of allocation
+ requirements; one extra limb is still needed for S.
+
+2011-05-09 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86/fat/gmp-mparam.h (BMOD_1_TO_MOD_1_THRESHOLD): Define.
+ * mpn/x86_64/fat/gmp-mparam.h (BMOD_1_TO_MOD_1_THRESHOLD): Define.
+
+2011-05-08 Marc Glisse <marc.glisse@inria.fr>
+
+ * gmpxx.h: Replace unsigned long with mp_bitcnt_t in many places.
+ * doc/gmp.texi: Likewise.
+
+2011-05-06 Marc Glisse <marc.glisse@inria.fr>
+
+ * gmpxx.h (mpz_class): Make constructor from mp[qf]_class explicit.
+ (mpq_class): Make constructor from mpf_class explicit.
+ * doc/gmp.texi: Document the above.
+ * NEWS: Likewise, and mention the EOF istream fix.
+ * tests/cxx/t-mix.cc: New file.
+ * tests/cxx/Makefile.am: Added t-mix.
+
+ * tests/cxx/t-assign.cc: Minor tweak.
+ * tests/cxx/t-misc.cc: Likewise.
+
+ * gmpxx.h (__gmp_resolve_temp): Remove.
+ (__gmp_set_expr): Remove some overloads.
+ (mpq_class): mpz_init_set the numerator and denominator instead of
+ mpq_init + mpq_set.
+ (mpz_class): Dedup the string constructors.
+ (mpq_class): Likewise.
+
+ * tests/cxx/t-ops3.cc: New file.
+ * tests/cxx/Makefile.am: Added t-ops3.
+
+2011-05-05 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpz/gcdext.c: Correct sgn computation.
+ Use MPZ_REALLOC.
+
+2011-05-05 Marc Glisse <marc.glisse@inria.fr>
+
+ * mpn/x86_64/fat/fat.c: Update for Sandy Bridge.
+ * config.guess: warning to keep it in sync with fat.c.
+
+2011-05-05 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/fat/fat_entry.asm (PIC_OR_DARWIN): New symbol. Use it to
+ work around Darwin problems.
+
+2011-05-04 Niels Möller <nisse@lysator.liu.se>
+
+ * mpz/gcdext.c (mpz_gcdext): Reduced temporary allocations. Use
+ mpz_divexact when computing the second cofactor.
+
+2011-05-03 David Harvey <dmharvey@cims.nyu.edu>
+
+ * configure.in: make invert_limb_table work correctly with
+ --disable-assembly (from Niels Moller)
+
+2011-05-02 Marc Glisse <marc.glisse@inria.fr>
+
+ * .bootstrap: libtoolize doesn't need -c.
+
+ * configfsf.guess: Update to version of 2011-02-02.
+ * configfsf.sub: Update to version of 2011-03-23.
+
+2011-05-02 Niels Möller <nisse@lysator.liu.se>
+
+ * mpz/gcdext.c (mpz_gcdext): Don't allocate extra limbs at the end
+ of mpn_gcdext parameters.
+
+ * doc/gmp.texi (mpn_gcdext): Updated doc.
+
+2011-05-01 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/generic/div_qr_2u_pi1.c (mpn_div_qr_2u_pi1): Fixed ASSERT.
+
+2011-04-30 Marc Glisse <marc.glisse@inria.fr>
+
+ * gmp-h.in (mpz_cdiv_q_2exp): Use mp_bitcnt_t to match the definition
+ and the documentation.
+ (mpz_remove): Likewise.
+ (mpf_eq): Likewise.
+
+ * ltmain.sh: Remove.
+ * .bootstrap: Let libtoolize generate ltmain.sh.
+
+ * tests/cxx/t-ops2.cc: Add a couple tests.
+ * tests/cxx/t-rand.cc: Likewise.
+
+ * doc/gmp.texi (mpf_urandomb): Explicit the fact that it does not
+ change the precision.
+
+ * gmp-h.in (__GMP_EXTERN_INLINE): Recent g++ uses gnu_inline.
+
+2011-04-28 Torbjorn Granlund <tege@gmplib.org>
+
+ * configure.in (x86_64): Support bobcat specifically.
+ (x86): Match bobcat and bulldozer, handle like k10.
+
+2011-04-28 David Harvey <dmharvey@cims.nyu.edu>
+
+ * README.HG: update autotools version numbers.
+
+2011-04-27 Torbjorn Granlund <tege@gmplib.org>
+
+ * tune/speed.h (speed_cyclecounter): Always use PIC variant when
+ compiled with Apple's GCC.
+
+ * mpn/x86/darwin.m4 (LEA): Complete rewrite.
+ (m4append): New macro.
+
+2011-04-26 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/sparc32/sparc-defs.m4 (changecom): Don't redefine '!' as it
+ interferes with expressions.
+
+2011-04-20 Torbjorn Granlund <tege@gmplib.org>
+
+ * acinclude.m4 (GMP_ASM_RODATA): Make 'foo' larger to avoid clang
+ problems.
+
+2011-04-12 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/x86_64/invert_limb.asm [PIC]: Declare mpn_invert_limb_table
+ as .protected.
+
+2011-04-11 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86/k7/invert_limb.asm: Use deflit for Darwin bug workaround.
+ Undo 2011-03-28 change.
+
+ * mpn/asm-defs.m4 (define_mpn): Use deflit.
+
+2011-04-10 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/asm-defs.m4 (define_mpn): Added invert_limb_table.
+
+ * configure.in: Add invert_limb_table to extra_functions_64 on
+ x86_64.
+
+ * mpn/x86_64/invert_limb.asm: Changed references from approx_tab
+ mpn_invert_limb_table.
+
+ * mpn/x86_64/invert_limb_table.asm (mpn_invert_limb_table): New
+ file. Extracted approximation table from invert_limb.asm, renamed
+ and made global.
+
+2011-03-30 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/x86_64/div_qr_2u_pi1.asm: New file.
+
+ * configure.in (gmp_mpn_functions): Add div_qr_2u_pi1.
+
+ * gmp-impl.h (mpn_div_qr_2u_pi1): Declare.
+
+ * mpn/generic/div_qr_2u_pi1.c (mpn_div_qr_2u_pi1): Moved to
+ separate file, from...
+ * mpn/generic/div_qr_2.c: ... old location.
+
+ * mpn/generic/div_qr_2n_pi1.c: Renamed file, from...
+ * mpn/generic/div_qr_2_pi1_norm.c: ...old name.
+ * mpn/x86_64/div_qr_2n_pi1.asm: Renamed file, from...
+ * mpn/x86_64/div_qr_2_pi1_norm.asm: ...old name.
+
+ * gmp-impl.h (mpn_div_qr_2n_pi1): Use new name in declaration.
+ * tune/speed.h (speed_mpn_div_qr_2n): Likewise.
+ (speed_mpn_div_qr_2u): Likewise.
+
+ * tune/tuneup.c (tune_div_qr_2): Use new name speed_mpn_div_qr_2n.
+
+ * tune/speed.c (routine): Use new names mpn_div_qr_2n and
+ mpn_div_qr_2u, also on the command line.
+
+ * tune/common.c (speed_mpn_div_qr_2n): Renamed, from...
+ (speed_mpn_div_qr_2_norm): ... old name.
+ (speed_mpn_div_qr_2u): Renamed, from...
+ (speed_mpn_div_qr_2_unnorm): ... old name.
+
+ * mpn/generic/div_qr_2_pi1_norm.c (mpn_div_qr_2n_pi1): Renamed,
+ from...
+ (mpn_div_qr_2_pi1_norm): ...old name.
+ * mpn/x86_64/div_qr_2_pi1_norm.asm: Likewise.
+
+ * mpn/generic/div_qr_2.c (mpn_div_qr_2n_pi2): Renamed, from...
+ (mpn_div_qr_2_pi2_norm): ... old name.
+ (mpn_div_qr_2u_pi1): Renamed, from...
+ (mpn_div_qr_2_pi1_unnorm): ... old name.
+ (mpn_div_qr_2): Call functions using new names.
+
+ * mpn/asm-defs.m4: Renamed div_qr_2-functions to new names.
+
+2011-03-29 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/x86_64/div_qr_2_pi1_norm.asm: Updated to use a separate rp
+ argument.
+
+ * gmp-impl.h (mpn_div_qr_2_pi1_norm): Updated declaration.
+ * gmp-h.in (mpn_div_qr_2): Likewise.
+
+ * tests/mpn/t-div.c (main): Adapted to new mpn_div_qr2 interface.
+ * tune/speed.h (SPEED_ROUTINE_MPN_DIV_QR_2): Likewise.
+
+ * mpn/generic/div_qr_2.c (mpn_div_qr_2_pi2_norm): Added rp
+ argument. Don't clobber the input dividend.
+ (mpn_div_qr_2_pi1_unnorm): Likewise.
+ (mpn_div_qr_2): Likewise.
+ * mpn/generic/div_qr_2_pi1_norm.c (mpn_div_qr_2_pi1_norm): Likewise.
+
+2011-03-29 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/x86/k7/invert_limb.asm: Use mov rather than push and pop.
+ Earlier load of divisor from stack.
+
+2011-03-28 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86/k7/invert_limb.asm: Protect movzwl register parameters from
+ being interpreted as m4 macro parameters.
+
+2011-03-22 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/x86_64/div_qr_2_pi1_norm.asm: Copied optimized inner loop
+ from divrem_2.asm.
+
+ * mpn/x86_64/div_qr_2_pi1_norm.asm: First working, but poorly
+ optimized, implementation.
+
+ * mpn/asm-defs.m4 (define_mpn): Added div_qr_2_pi[12]_*norm.
+
+ * mpn/generic/div_qr_2_pi1_norm.c (mpn_div_qr_2_pi1_norm): Moved
+ to separate file, from...
+ * mpn/generic/div_qr_2.c: ... old location.
+
+ * gmp-impl.h (mpn_div_qr_2_pi1_norm): Declare.
+
+ * configure.in (gmp_mpn_functions): Added div_qr_2_pi1_norm.
+
+2011-03-22 Torbjorn Granlund <tege@gmplib.org>
+
+ * configure.in (powerpc): Reinsert lost AIX cpu_path 32-bit handling.
+ Reinsert lost linux/bsd cpu_path handling.
+
+ * mpn/generic/mod_1_1.c: Disable powerpc asm for _LONG_LONG_LIMB.
+ * mpn/generic/div_qr_2.c: Likewise.
+
+ * mpn/generic/div_qr_2.c: Use asm just for gcc.
+ Make powerpc add_sssaaaa work for 32-bit case, and use less strict
+ constraints.
+
+2011-03-21 Niels Möller <nisse@lysator.liu.se>
+
+ * tune/tuneup.c (div_qr_2_pi2_threshold): New global variable.
+ (tune_div_qr_2): New function.
+ (all): Call tune_div_qr_2.
+
+ * tune/Makefile.am (TUNE_MPN_SRCS_BASIC): Added div_qr_2.c.
+
+ * gmp-impl.h (DIV_QR_2_PI2_THRESHOLD): Setup for tuning.
+
+ New 4/2 division loop, based on Torbjörn's work:
+ * mpn/generic/div_qr_2.c (add_sssaaaa, add_csaac): New macros.
+ (udiv_qr_4by2): New macro.
+ (invert_4by2): New function.
+ (mpn_div_qr_2_pi2_norm): New function.
+ (DIV_QR_2_PI2_THRESHOLD): New threshold.
+ (mpn_div_qr_2_pi1_norm): Renamed, from...
+ (mpn_div_qr_2_norm): ... old name.
+ (mpn_div_qr_2_pi1_unnorm): Renamed, from...
+ (mpn_div_qr_2_unnorm): ... old name.
+ (mpn_div_qr_2): Use mpn_div_qr_2_pi2_norm for large enough
+ normalized divisors.
+
+ * gmp-impl.h (udiv_qr_3by2): Avoid a copy.
+
+2011-03-21 Torbjorn Granlund <tege@gmplib.org>
+
+ * configure.in (hppa): Under linux, treat 64-bit processors as if they
+ were 32-bit processors.
+
+ * mpn/generic/addcnd_n.c: New file.
+ * mpn/asm-defs.m4 (define_mpn): Add addcnd_n and subcnd_n.
+ * configure.in (gmp_mpn_functions): Add addcnd_n.
+ * gmp-impl.h (mpn_addcnd_n): Declare.
+
+ * mpn/generic/subcnd_n.c: Combine nails and non-nails functions.
+
+ * gmp-impl.h (invert_pi1): Prepend _ to local variables, protect
+ parameters within () where necessary.
+
+ * mpn/asm-defs.m4 (define_mpn): Add div_qr_2.
+ * configure.in (gmp_mpn_functions): Reinsert mercurial-bug-removed
+ line.
+
+2011-03-20 Torbjorn Granlund <tege@gmplib.org>
+
+ * configure.in (powerpc): Add cpu_path for all three ABIs.
+ Rename "aix64" to "mode64" for consistency.
+
+2011-03-16 Marc Glisse <marc.glisse@inria.fr>
+
+ * gmpxx.h (__gmp_binary_not_equal): Remove, use !__gmp_binary_equal.
+ (__gmp_binary_less_equal): Remove, use !__gmp_binary_greater.
+ (__gmp_binary_greater_equal): Remove, use !__gmp_binary_less.
+ * tests/cxx/t-ops2.cc: Typo.
+
+2011-03-20 Niels Möller <nisse@lysator.liu.se>
+
+ * tune/common.c (speed_mpn_div_qr_2_norm): New function.
+ (speed_mpn_div_qr_2_unnorm): New function.
+ * tune/speed.c (routine): Recognize above functions.
+ * tune/speed.h: Declarations for above functions.
+ (SPEED_ROUTINE_MPN_DIV_QR_2): New macro.
+
+ * tests/mpn/t-div.c (main): Added tests for mpn_divrem_2 and
+ mpn_div_qr_2.
+
+ * mpn/generic/div_qr_2.c (mpn_div_qr_2): New file and function.
+ Intended to eventually replace divrem_2.
+ * configure.in (gmp_mpn_functions): Add div_qr_2.
+
+2011-03-16 Marc Glisse <marc.glisse@inria.fr>
+
+ * gmpxx.h (__gmp_set_expr): Remove broken declarations.
+
+2011-03-19 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpz/fac_ui.c (mpz_fac_ui): Use MPZ_REALLOC for standard, conditional
+ reallocation.
+
+2011-03-19 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/generic/divrem_2.c (mpn_divrem_2): Fixed comment and assert
+ regarding q and n overlap.
+
+2011-03-16 Marc Glisse <marc.glisse@inria.fr>
+
+ * gmpxx.h (__mpz_set_ui_safe): New inline function.
+ (__mpz_set_si_safe): Likewise.
+ (__GMPXX_TMPZ_UI): Use the new function.
+ (__GMPXX_TMPZ_SI): Likewise.
+ (__GMPXX_TMPQ_UI): Likewise.
+ (__GMPXX_TMPQ_SI): Likewise.
+ * tests/cxx/t-ops2.cc: test converting 0 to stack mpq_t.
+
+2011-03-15 Marc Glisse <marc.glisse@inria.fr>
+
+ * gmpxx.h (__GMPXX_TMPQ_UI): New macro.
+ (__GMPXX_TMPQ_SI): New macro.
+ (struct __gmp_binary_multiplies): Rewrite, using the new macros.
+ (struct __gmp_binary_divides): Likewise.
+
+ * gmpxx.h (__GMPZ_ULI_LIMBS): Rewrite.
+ * tests/cxx/t-ops2.cc: test converting ULONG_MIN to stack mpq_t.
+
+2011-03-15 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/generic/toom_interpolate_16pts.c: Remove ambiguity.
+
+2011-03-14 Torbjorn Granlund <tege@gmplib.org>
+
+ * tune/tuneup.c (tune_mul): Set tuning min size considering print skew.
+
+ * doc/gmp.texi: Make reference to "Formatted I/O" chapters from type
+ specific I/O sections.
+
+ * mpn/alpha/add_n.asm: Add _nc entry point.
+ * mpn/alpha/sub_n.asm: Likewise.
+ * mpn/mips64/add_n.asm: Likewise.
+ * mpn/mips64/sub_n.asm: Likewise.
+ * mpn/sparc64/ultrasparc1234/add_n.asm: Likewise.
+ * mpn/sparc64/ultrasparc1234/sub_n: Likewise.
+
+2011-03-13 Marc Glisse <marc.glisse@inria.fr>
+
+ * tests/cxx/t-ops2.cc: New file.
+ * tests/cxx/Makefile.am: Added t-ops2.
+
+2011-03-13 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/generic/toom32_mul.c (mpn_toom32_mul): Make 'hi' be limb-sized
+ for better code.
+
+ * gmp-impl.h (MPN_IORD_U): Handle x86_64 as well as x86_32. Generate
+ no code for incrementing by constant 0.
+
+2011-03-12 Marc Glisse <marc.glisse@inria.fr>
+
+ * gmpxx.h: Rename __GMPXX_TMP_* to __GMPXX_TMPZ_*. Use in more places.
+
+2011-03-12 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/powerpc64/rshift.asm: Accept/return values correctly also for
+ 32-bit ABI.
+ * mpn/powerpc64/lshift.asm: Likewise.
+
+ * tune/powerpc.asm: Use powerpc syntax, not power syntax.
+
+ * tune/common.c (speed_udiv_qrnnd_preinv1, etc): Remove.
+ * tune/speed.c (routine): Remove udiv_qrnnd_preinv1, etc.
+
+2011-03-12 Marc Glisse <marc.glisse@inria.fr>
+
+ * tests/cxx/t-istream.cc: Restrict mpq test in t-istream -s.
+
+ * gmpxx.h: Remove leftover #undefs.
+
+2011-03-11 Torbjorn Granlund <tege@gmplib.org>
+
+ * gmp-impl.h (udiv_qrnnd_preinv1, udiv_qrnnd_preinv2,
+ udiv_qrnnd_preinv2gen): Remove obsolete macros.
+ (udiv_qrnnd_preinv): New name for udiv_qrnnd_preinv3.
+
+2011-03-11 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * gmp-impl.h: Declare many mpn_{sub,add}lsh*_n_ip[12] functions/macros.
+ * mpn/generic/toom_interpolate_5pts.c: Use mpn_sublsh1_n_ip1.
+
+ * tests/devel/try.c: Tests for {add,sub}lsh*_n_ip[12].
+ * tests/refmpn.c: New reference for mpn_{add,sub}lsh*_n_ip[12].
+ * tests/tests.h: Declarations for reference functions above.
+
+ * tune/common.c: New speed_mpn_{add,sub}lsh*_n_ip[12] functions.
+ * tune/speed.h: Prototypes for functions above.
+ * tune/speed.c: Support for mpn_{add,sub}lsh*_n_ip[12].
+
+ * mpn/x86/k7/sublsh1_n.asm: Replaced generic sublsh1 code with faster _ip1.
+ * mpn/x86/atom/sublsh1_n.asm: Changed PROLOGUE accordingly.
+
+ * configure.in: Define HAVE_NATIVE_mpn_addlsh*_n*_ip[12].
+ * mpn/asm-defs.m4: Declare mpn_addlsh*_n*_ip[12].
+
+2011-03-10 Marc Glisse <marc.glisse@inria.fr>
+
+ * tests/cxx/t-istream.cc: Explicit conversion to streampos.
+
+2011-03-10 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86/atom/sse2/mul_basecase.asm: Suppress wind-down rp updates.
+
+ * Move new aorrlsh_n.asm to new k8 dir. Revert
+ mpn/x86_64/aorrlsh_n.asm.
+ * configure.in: Setup path for new k8 directory.
+
+2011-03-10 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/x86/pentium4/sse2/bdiv_dbm1c.asm: New file, was in atom.
+ * mpn/x86/atom/sse2/bdiv_dbm1c.asm: Grab file above.
+
+2011-03-09 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/aorrlsh_n.asm: Complete rewrite.
+
+ * mpn/x86_64/core2/aorrlsh_n.asm: New file, grabbing another asm file.
+
+2011-03-09 Marc Glisse <marc.glisse@inria.fr>
+
+ * tests/cxx/t-ostream.cc: Use bool instead of int.
+ * tests/cxx/t-istream.cc: Likewise.
+ * tests/cxx/t-misc.cc: Likewise.
+
+ * cxx/ismpznw.cc: Don't clear eofbit.
+ * cxx/ismpq.cc: Likewise.
+ * cxx/ismpf.cc: Likewise.
+ * tests/cxx/t-istream.cc: Test accordingly.
+
+2011-03-09 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/x86/atom/sse2/bdiv_dbm1c.asm: New file.
+
+2011-03-09 Marc Glisse <marc.glisse@inria.fr>
+
+ * doc/gmp.texi: Remove void return type from constructors. Document
+ explicit constructors. Document mpf_class::mpf_class(mpf_t).
+
+2011-03-07 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/x86/atom/sse2/sqr_basecase.asm: Postponed pushes. Cleaned
+ outer loop exit.
+
+2011-03-07 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/gcd_1.asm: Workaround Oracle assembler bug.
+
+ * mpn/x86/atom/sse2/mul_basecase.asm: Replace addmul_1 loops.
+ Tweak outer loop rp updates.
+
+2011-03-06 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86/atom/sse2/sqr_basecase.asm: New file.
+
+2011-03-05 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/bdiv_dbm1c.asm: Write proper feed-in code.
+
+2011-03-04 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/addmul_2.asm: Rewrite for linear performance.
+
+2011-03-03 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/generic/mod_1_1.c (add_mssaaaa): Canonicalise layout. Add arm
+ variant. Enable sparc64 code and powerpc code (the latter for 32-bit
+ and 64-bit).
+
+ * mpn/generic/sqrtrem.c (mpn_dc_sqrtrem): Use mpn_addlsh1_n.
+
+ * gmp-impl.h (mpn_addlsh_nc, mpn_rsblsh_nc): Declare.
+ * mpn/asm-defs.m4: Likewise.
+
+ * mpn/x86_64/coreisbr/aorrlsh_n.asm: Disable mpn_rsblsh_n due to
+ carry-in issues.
+ * mpn/x86_64/coreinhm/aorrlsh_n.asm: Likewise.
+ * mpn/x86_64/coreisbr/aorrlsh2_n.asm: Likewise.
+
+2011-03-03 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/generic/mod_1_1.c (add_mssaaaa): For x86 and x86_64, treat m
+ as in output operand only. Added sparc32 implementation. Also
+ added #if:ed out attempts at sparc64 and powerpc64.
+
+ * tune/tuneup.c (tune_mod_1): Record result of MOD_1_1P_METHOD
+ measurement for use by mpn_mod_1_tune. And omit measurement if
+ mpn_mod_1_1p is native assebly code.
+
+ * mpn/generic/mod_1.c (mpn_mod_1_1p) [TUNE_PROGRAM_BUILD]: Macro
+ to check mod_1_1p_method and call the right function.
+ (mpn_mod_1_1p_cps) [TUNE_PROGRAM_BUILD]: Likewise.
+
+ * gmp-impl.h (MOD_1_1P_METHOD) [TUNE_PROGRAM_BUILD]: Define macro.
+ (mod_1_1p_method) [TUNE_PROGRAM_BUILD]: Declare variable.
+
+2011-03-02 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/coreinhm/aorrlsh_n.asm: New file.
+ * mpn/x86_64/coreisbr/aorrlsh_n.asm: New file.
+
+2011-03-01 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/x86_64/mod_1_1.asm (mpn_mod_1_1p_cps): Eliminated a neg and
+ two mov instructions.
+
+ * mpn/x86/k7/mod_1_1.asm (mpn_mod_1_1p_cps): Simplified
+ computation, analogous to recent x86_64/mod_1_1.asm changes.
+ (mpn_mod_1_1p): Corresponding changes. Don't shift b.
+
+ * mpn/sparc64/mod_1_4.c (mpn_mod_1s_4p_cps): Use udiv_rnnd_preinv
+ rather than udiv_rnd_preinv.
+ (mpn_mod_1s_4p): Likewise.
+
+2011-03-01 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86/pentium4/sse2/mul_1.asm: Swap entry insns to share more code
+ between entry points.
+ * mpn/x86/pentium4/sse2/addmul_1.asm: Likewise.
+
+ * mpz/divegcd.c: Rewrite, as per Marc Glisse's suggestion. Also fix
+ problem with passing a longlong limb to a _ui function.
+
+ * gmp-impl.h (udiv_qrnnd_preinv3): Cast truth value to mask's type.
+ (udiv_rnnd_preinv): Likewise.
+ * mpn/generic/mod_1_1.c (mpn_mod_1_1p): Likewise.
+
+2011-02-28 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/generic/mod_1_1.c (add_mssaaaa): Typo fix, define
+ add_mssaaaa, not add_sssaaaa.
+
+ * tune/tuneup.c (tune_mod_1): Measure mpn_mod_1_1_1 and
+ mpn_mod_1_1_2, to set MOD_1_1P_METHOD.
+
+ * tune/speed.c (routine): Added mpn_mod_1_1_1 and mpn_mod_1_1_2.
+
+ * tune/speed.h: Declare speed_mpn_mod_1_1_1, speed_mpn_mod_1_1_2,
+ mpn_mod_1_1p_1, mpn_mod_1_1p_2, mpn_mod_1_1p_cps_1, and
+ mpn_mod_1_1p_cps_2.
+
+ * tune/common.c (speed_mpn_mod_1_1_1): New function.
+ (speed_mpn_mod_1_1_2): New function.
+
+ * tune/Makefile.am (libspeed_la_SOURCES): Added mod_1_1-1.c
+ mod_1_1-2.c.
+
+ * tune/mod_1_1-1.c: New file.
+ * tune/mod_1_1-2.c: New file.
+
+ * mpn/generic/mod_1_1.c: Implemented an algorithm with fewer
+ multiplications, configured via MOD_1_1P_METHOD.
+
+ * mpn/x86_64/mod_1_1.asm (mpn_mod_1_1p_cps): Simplified
+ computation of B2modb, use B^2 mod (normalized b).
+ (mpn_mod_1_1p): Corresponding changes. Don't shift b.
+
+ * mpn/generic/mod_1_1.c (mpn_mod_1_1p_cps): Use udiv_rnnd_preinv rather
+ than udiv_rnd_preinv.
+ (mpn_mod_1_1p): Likewise.
+ * mpn/generic/mod_1_4.c: Analogous changes.
+ * mpn/generic/mod_1_3.c: Analogous changes.
+ * mpn/generic/mod_1_2.c: Analogous changes.
+ * mpn/generic/mod_1.c: Analogous changes.
+ * mpn/generic/pre_mod_1.c: Analogous changes.
+
+ * gmp-impl.h (udiv_qrnnd_preinv3): Eliminated unpredictable branch
+ using masking logic. Further optimization of the nl == constant 0
+ case, similar to udiv_rnd_preinv.
+ (udiv_rnnd_preinv): Likewise.
+ (udiv_rnd_preinv): Deleted, use udiv_rnnd_preinv with nl == 0
+ instead.
+
+ * tests/mpn/t-divrem_1.c (check_data): Added testcase to exercise
+ the nl == constant 0 special case in udiv_qrnnd_preinv3.
+
+2011-02-28 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/generic/rootrem.c (mpn_rootrem): Combine two similar scalar
+ divisions. Misc minor cleanup.
+
+ * mpn/x86/atom/sse2/aorsmul_1.asm: Shorten software pipeline.
+
+ * mpn/x86/atom/mul_basecase.asm: Remove file no longer used.
+
+ * mpn/generic/rootrem.c (mpn_rootrem_internal): Delay O(log(U))
+ allocations until they are known to be needed.
+
+2011-02-27 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/x86/atom/sse2/mul_1.asm: New code.
+
+2011-02-27 Niels Möller <nisse@lysator.liu.se>
+
+ * gmp-impl.h (udiv_rnnd_preinv): New macro.
+
+2011-02-27 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86/atom/sse2/mul_basecase.asm: New file.
+
+2011-02-26 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/x86/atom/sse2/aorsmul_1.asm: Optimise non-loop code.
+
+2011-02-26 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/powerpc64/mode64/aorsmul_1.asm: Add MULFUNC_PROLOGUE.
+ * mpn/m68k/mc68020/aorsmul_1.asm: Likewise.
+
+ * mpn/powerpc64/mode64/aorsmul_1.asm: Add missing MULFUNC_PROLOGUE.
+ * mpn/m68k/mc68020/aorsmul_1.asm: Likewise.
+
+2011-02-25 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86/atom/sse2/aorsmul_1.asm: New file.
+ * mpn/x86/atom/aorsmul_1.asm: File removed.
+
+2011-02-25 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/x86/atom/sse2/divrem_1.asm: New file (was in x86/atom).
+ * mpn/x86/atom/sse2/mul_1.asm: Likewise.
+ * mpn/x86/atom/sse2/popcount.asm: Likewise.
+ * mpn/x86/atom/divrem_1.asm: ReMoved (in sse2/ now).
+ * mpn/x86/atom/mul_1.asm: Likewise.
+ * mpn/x86/atom/popcount.asm: Likewise.
+
+ * configure.in: Set up mmx path for atom.
+ * mpn/x86/atom/mmx/copyd.asm: New file (was in x86/atom).
+ * mpn/x86/atom/mmx/copyi.asm: Likewise.
+ * mpn/x86/atom/mmx/hamdist.asm: Likewise.
+ * mpn/x86/atom/copyd.asm: ReMoved (in mmx/ now).
+ * mpn/x86/atom/copyi.asm: Likewise.
+ * mpn/x86/atom/hamdist.asm: Likewise.
+
+2011-02-24 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86/atom/sse2/mod_1_1.asm: New file.
+ * mpn/x86/atom/sse2/mod_1_4.asm: New file.
+ * configure.in: Set up sse2 path for atom.
+
+ * mpn/x86/p6/sse2/mod_1_1.asm: New file.
+ * mpn/x86/p6/sse2/mod_1_4.asm: Fix typo in MULFUNC_PROLOGUE.
+
+2011-02-24 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/x86/k7/mod_1_1.asm (mpn_mod_1_1p): Rewrite using the same
+ algorithm as the x86_64 version.
+
+2011-02-23 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/x86/atom/logops_n.asm: New file (same loop as aors_n).
+
+2011-02-23 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/x86_64/mod_1_1.asm (mpn_mod_1_1p): Shaved off one
+ instruction and one register in the inner loop. Rearranged
+ registers slightly, and no longer needs the callee-save register
+ %r12.
+
+2011-02-22 Torbjorn Granlund <tege@gmplib.org>
+
+ * configure.in: Export SHLD_SLOW and SHRD_SLOW to config.m4, also
+ fixing typo in exporting code.
+
+ * mpn/x86_64/nano/gmp-mparam.h (SHLD_SLOW, SHRD_SLOW): Define.
+ * mpn/x86_64/atom/gmp-mparam.h (SHLD_SLOW, SHRD_SLOW): Define.
+
+2011-02-22 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/x86_64/mod_1_1.asm (mpn_mod_1_1p): Rewrite.
+
+2011-02-22 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/x86/atom/lshiftc.asm: New file (a copy of lshift.asm with a handful of neg added).
+
+2011-02-21 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86/aors_n.asm: Move _nc entry to after main code. Align loop
+ and _n entry for claimed performance. Normalise mnemonic usage.
+
+ * mpn/x86/atom/aorrlsh1_n.asm: New file (code from rsblsh_1, slightly
+ slower for addlsh_1 for large operands, but much faster for small).
+ * mpn/x86/atom/addlsh1_n.asm: Remove.
+ * mpn/x86/atom/rsblsh1_n.asm: Remove.
+
+2011-02-20 Marc Glisse <marc.glisse@inria.fr>
+
+ * mpq/aors.c: Rewrite to remove redundant division.
+
+2011-02-20 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86/atom/lshift.asm: New file.
+ * mpn/x86/atom/rshift.asm: Normalise mnemonic usage.
+
+ * gmp-impl.h (mpn_divexact_by7): Relax inclusion condition.
+
+ * mpz/divegcd.c (mpz_divexact_by5): New conditionally enabled function.
+ (mpz_divexact_by3): Wrap inside appropriate conditions.
+ (mpz_divexact_gcd): Rewrite.
+
+ * mpn/x86/bdiv_dbm1c.asm: Save a jump.
+
+2011-02-20 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/x86/atom/aorslshC_n.asm: New file.
+ * mpn/x86/atom/sublsh2_n.asm: New file.
+
+ * mpn/x86/atom/aors_n.asm: New code.
+ * mpn/x86/atom/rshift.asm: Atom64 code adapted to 32-bit.
+ * mpn/x86/atom/lshift.asm: Likewise.
+
+2011-02-19 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/atom/rsh1aors_n.asm: New file.
+
+ * mpn/x86_64/atom/lshift.asm: New file.
+ * mpn/x86_64/atom/rshift.asm: New file.
+ * mpn/x86_64/atom/lshiftc.asm: New file.
+
+2011-02-17 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/x86/atom/aorsmul_1.asm: Small improvements for small sizes.
+ * mpn/x86/atom/aorrlshC_n.asm: Tiny size improvements.
+
+2011-02-16 Torbjorn Granlund <tege@gmplib.org>
+
+ * configure.in: Fix k8/k10 32-bit path setup problem.
+
+2011-02-16 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/x86/atom/aorsmul_1.asm: Revive an old k7/aorsmul.
+
+2011-02-14 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * gmp-impl.h (mpn_sublsh_n): Declare.
+ * mpn/asm-defs.m4: Likewise.
+
+ * mpn/x86/atom/aorrlshC_n.asm: New file (was k7).
+ * mpn/x86/k7/aorrlshC_n.asm: ReMoved.
+ * mpn/x86/atom/aorrlsh2_n.asm: Grab atom/aorrlshC_n.asm.
+ * mpn/x86/atom/rsblsh1_n.asm: Grab atom/aorrlshC_n.asm.
+
+2011-02-13 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/atom/aorrlsh2_n.asm: New file.
+
+2011-02-12 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/aorrlsh_n.asm: Minor tweaks, update c/l numbers.
+
+ * mpn/x86_64/atom/sublsh1_n.asm: New file.
+
+ * mpn/x86_64/atom/aorrlsh1_n.asm: New file.
+
+2011-02-11 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/powerpc64/mode64/mod_1_1.asm: Fix Darwin syntax issues.
+
+2011-02-10 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/powerpc64/mode64/mod_1_4.asm: Tune away a cycle for 970.
+
+2011-02-11 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/x86/k7/addlsh1_n.asm: Faster core loop (Torbjorn's).
+
+ * configure.in: Add HAVE_NATIVE_{add,sub,rsb}lsh{,1,2}_nc.
+ * tests/tests.h: refmpn_{add,sub,rsb}lsh{,1,2}_nc prototypes.
+ * tests/refmpn.c: New refmpn_{add,sub,rsb}lsh{,1,2}_nc.
+ * tests/devel/try.c: Tests for mpn_{add,sub,rsb}lsh{,1,2}_nc.
+
+ * mpn/x86/k7/aorrlshC_n.asm: New file.
+ * mpn/x86/atom/aorrlsh2_n.asm: Grab k7/aorrlshC_n.asm.
+ * mpn/x86/atom/rsblsh1_n.asm: Grab k7/aorrlshC_n.asm.
+
+2011-02-06 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/x86/k7/addlsh1_n.asm: New file.
+ * mpn/x86/k7/sublsh1_n.asm: New file.
+ * mpn/x86/atom/addlsh1_n.asm: Grab k7/addlsh1_n.asm.
+ * mpn/x86/atom/sublsh1_n.asm: Grab k7/sublsh1_n.asm.
+
+2011-02-05 Torbjorn Granlund <tege@gmplib.org>
+
+ * gmp-impl.h (mpn_addlsh1_nc, mpn_addlsh2_nc, mpn_sublsh1_nc,
+ mpn_sublsh2_nc, mpn_rsblsh1_nc, mpn_rsblsh2_nc): Declare.
+ * mpn/asm-defs.m4: Likewise.
+
+ * mpn/x86_64/coreisbr/aorrlshC_n.asm: New file.
+ * mpn/x86_64/coreisbr/aorrlsh1_n.asm: New file.
+ * mpn/x86_64/coreisbr/aorrlsh2_n.asm: New file.
+
+ * mpn/x86_64/coreisbr/aors_n.asm: New file, based on old
+ atom/aors_n.asm.
+ * mpn/x86_64/atom/aors_n.asm: Grab coreisbr/aors_n.asm.
+
+2011-02-05 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * gmp-impl.h (mpn_toom6_mul_n_itch): Handle threshold == zero.
+ (mpn_toom8_mul_n_itch): Likewise.
+ (MPN_TOOM6H_MIN, MPN_TOOM8H_MIN): Define.
+ * tests/mpn/t-toom6h.c: No tests below MPN_TOOM6H_MIN.
+ * tests/mpn/t-toom8h.c: No tests below MPN_TOOM8H_MIN.
+
+ * mpz/lucnum_ui.c: Use mpn_addlsh2_n.
+
+2011-02-04 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/atom/rsh1aors_n.asm: Add a MULFUNC_PROLOGUE.
+ * mpn/x86_64/atom/dive_1.asm: Likewise.
+ * mpn/x86_64/atom/popcount.asm: Likewise.
+ * mpn/x86_64/core2/popcount.asm: Likewise.
+ * mpn/x86_64/coreinhm/hamdist.asm: Likewise.
+ * mpn/x86_64/coreinhm/popcount.asm: Likewise.
+ * mpn/x86_64/nano/popcount.asm: Likewise.
+ * mpn/x86_64/pentium4/popcount.asm: Likewise.
+
+2011-02-04 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/x86/atom/mode1o.asm: New file, grabbing another asm file.
+ * mpn/x86/atom/mul_1.asm: Claim mul_1c.
+
+2011-02-02 Niels Möller <nisse@lysator.liu.se>
+
+ * tune/speed.h (SPEED_ROUTINE_MPN_HGCD_CALL): Fixed one
+ speed_operand_dst call.
+
+2011-02-01 Torbjorn Granlund <tege@gmplib.org>
+
+ * tune/speed.h (struct speed_params): Allow for 4 dst operands.
+ * tune/common.c (TOLERANCE): Increase from 0.5% to 1%.
+
+ * tune/speed.h (SPEED_ROUTINE_MPN_HGCD_CALL): New macro, mainly based
+ on old speed_mpn_hgcd, but with speed_operand_src calls (as suggested
+ by Niels).
+ * tune/common.c (speed_mpn_hgcd): Invoke SPEED_ROUTINE_MPN_HGCD_CALL.
+ (speed_mpn_hgcd_lehmer): Likewise.
+
+ * configure.in: Set up 32-bit x86 paths for new corei* CPU strings.
+
+2011-01-31 Torbjorn Granlund <tege@gmplib.org>
+
+ * config.guess: Recognise new Intel processors.
* config.guess: Support 'coreinhm' and 'coreisbr'.
* config.sub: Likewise.
* configure.in: Likewise.
-2011-01-25 Marco Bodrato <bodrato@mail.dm.unipi.it>
+2011-01-30 Torbjorn Granlund <tege@gmplib.org>
+
+ * configure.in: Support x86/geode.
+ * mpn/x86/geode/gmp-mparam.h: New file.
+
+2011-01-29 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/x86/atom/addlsh1_n.asm: Removed.
+ * mpn/x86/atom/rsh1add_n.asm: Likewise.
+
+2011-01-28 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/alpha/ev6/slot.pl: Add some missing insns.
+
+2011-01-28 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/x86/atom/copyd.asm: New file, grabbing another asm file.
+ * mpn/x86/atom/copyi.asm: Likewise.
+ * mpn/x86/atom/aors_n.asm: Likewise.
+ * mpn/x86/atom/addlsh1_n.asm: Likewise.
+ * mpn/x86/atom/aorsmul_1.asm: Likewise.
+ * mpn/x86/atom/bdiv_q_1.asm: Likewise.
+ * mpn/x86/atom/dive_1.asm: Likewise.
+ * mpn/x86/atom/divrem_1.asm: Likewise.
+ * mpn/x86/atom/hamdist.asm: Likewise.
+ * mpn/x86/atom/logops_n.asm: Likewise.
+ * mpn/x86/atom/lshift.asm: Likewise.
+ * mpn/x86/atom/mod_34lsub1.asm: Likewise.
+ * mpn/x86/atom/mul_1.asm: Likewise.
+ * mpn/x86/atom/mul_basecase.asm: Likewise.
+ * mpn/x86/atom/popcount.asm: Likewise.
+ * mpn/x86/atom/rsh1add_n.asm: Likewise.
+ * mpn/x86/atom/rshift.asm: Likewise.
+ * mpn/x86/atom/sqr_basecase.asm: Likewise.
+
+2011-01-27 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/atom/rsh1aors_n.asm: New file, grabbing another asm file.
+ * mpn/x86_64/atom/popcount.asm: Likewise.
+ * mpn/x86_64/atom/dive_1.asm: Likewise.
+ * mpn/x86_64/nano/popcount.asm: Likewise.
+
+2011-01-26 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/alpha/invert_limb.asm: Complete rewrite.
+
+2011-01-25 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/powerpc32/invert_limb.asm: New file.
+
+2011-01-25 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/x86/pentium4/sse2/bdiv_q_1.asm: New file.
+ * mpn/x86/k7/bdiv_q_1.asm: New file.
+
+2011-01-24 Torbjorn Granlund <tege@gmplib.org>
+
+ * tune/tuneup.c (tune_mul_n, tune_sqr): Loop, re-measuring thresholds
+ until no tiny ranges remain.
+
+2011-01-23 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/ia64/mul_2.asm: Tweak to 1.5 c/l, less overhead.
+
+ * mpn/ia64/addmul_2.asm: Rewrite, adding mpn_addmul_2s entry point.
+
+2011-01-22 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/ia64/aors_n.asm: Fix some incorrect bundle types.
+
+ * mpn/ia64/sqr_diagonal.asm: Remove.
+
+ * mpn/ia64/sqr_diag_addlsh1.asm: New file.
+
+ * mpn/ia64/ia64-defs.m4: Define some shorter convenience mnemonics.
+
+ * mpn/generic/sqr_basecase.c (MPN_SQR_DIAG_ADDLSH1): New macro, using
+ new function mpn_sqr_diag_addlsh1 or defining its equivalent.
+
+ * gmp-impl.h (mpn_addmul_2s): Declare.
+ (mpn_sqr_diag_addlsh1): Declare.
+ * mpn/asm-defs.m4 (define_mpn): Add addmul_2s and sqr_diag_addlsh1.
+
+ * configure.in: Add HAVE_NATIVEs for mpn_sqr_diag_addlsh1 and
+ mpn_addmul_2s.
+ (gmp_mpn_functions_optional): Add sqr_diag_addlsh1.
+
+2011-01-21 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * tests/devel/try.c: Initial support for mpn_bdiv_q_1.
+ * mpn/x86/pentium/bdiv_q_1.asm: New file.
+ * mpn/x86/p6/bdiv_q_1.asm: New file.
+
+2011-01-20 Torbjorn Granlund <tege@gmplib.org>
+
+ * tune/speed.c (run_gnuplot): Update to current gnuplot syntax.
+
+ * mpn/powerpc64/mode64/aorsmul_1.asm: Trim away 0.5 c/l for submul_1
+ for POWER5.
+
+2011-01-19 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/core2/rsh1aors_n.asm: New file.
+
+2011-01-18 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/x86/bdiv_q_1.asm: New file (same core alg. as dive_1).
+
+2011-01-15 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/generic/divexact.c: Avoid COPY if not needed.
+
+2011-01-14 Torbjorn Granlund <tege@gmplib.org>
+
+ * gmp-impl.h (struct cpuvec_t): Add field bmod_1_to_mod_1_threshold.
+ * configure.in (fat_thresholds): Add BMOD_1_TO_MOD_1_THRESHOLD.
+
+2011-01-13 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpz/mul.c: Remove redundant size computation.
+
+2011-01-08 Torbjorn Granlund <tege@gmplib.org>
+
+ * tests/devel/try.c (types enum): Add TYPE_MUL_5 and TYPE_MUL_6.
+ (param_init): Support new types.
+ (choice_array): Support testing of mpn_mul_5 and mpn_mul_6.
+ (call): Support new routines.
+
+ * tests/refmpn.c (refmpn_mul_5, refmpn_mul_6): New functions.
+ * tests/tests.h (refmpn_mul_5, refmpn_mul_6): Declare.
+ Remove parameter names from some other functions.
+
+ * gmp-impl.h (mpn_mul_5, mpn_mul_6): Declare.
+ * mpn/asm-defs.m4: Likewise, also declare mpn_addmul_5, mpn_addmul_6,
+ mpn_addmul_7, and mpn_addmul_8.
+
+ * configure.in (gmp_mpn_functions_optional): Add mul_5 and mul_6.
+
+ * tune/speed.c (routine): Add measuring of mpn_mul_5 and mpn_mul_6.
+ * tune/common.c (speed_mpn_mul_5, speed_mpn_mul_6): New functions.
+ * tune/speed.h: Declare new functions.
+
+2011-01-03 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpz/aors.h: Remove #ifdef BERKELEY_MP, and cleanup.
+ * mpz/cmp.c: Likewise.
+ * mpz/gcd.c: Likewise.
+ * mpz/mul.c: Likewise.
+ * mpz/powm.c: Likewise.
+ * mpz/set.c: Likewise.
+ * mpz/sqrtrem.c: Likewise.
+ * mpz/tdiv_qr.c: Likewise.
+
+2010-12-28 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/minithres/gmp-mparam.h: Update with several recent thresholds.
+
+2010-12-19 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86/k7/mod_1_1.asm: Canonicalise cmov forms.
+ * mpn/x86/k7/mod_1_4.asm: Likewise.
+ * mpn/x86/pentium4/sse2/mod_1_1.asm: Likewise.
+ * mpn/x86/pentium4/sse2/mod_1_4.asm: Likewise.
+ * mpn/x86_64/core2/divrem_1.asm: Likewise.
+ * mpn/x86_64/divrem_1.asm: Likewise.
+ * mpn/x86_64/mod_1_1.asm: Likewise.
+ * mpn/x86_64/mod_1_2.asm: Likewise.
+ * mpn/x86_64/mod_1_4.asm: Likewise.
+
+ * mpn/x86/k7/gcd_1.asm: Rewrite. Remove slow 'div' loop. Call
+ mpn_mod_1 for operands with mode than BMOD_1_TO_MOD_1_THRESHOLD limbs.
+ Misc cleanups.
+
+2010-12-18 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/gcd_1.asm: Call mpn_mod_1 for operands with mode than
+ BMOD_1_TO_MOD_1_THRESHOLD limbs.
+
+ * configure.in: Generalise code for putting THRESHOLDs in config.m4.
+ Add BMOD_1_TO_MOD_1_THRESHOLD to list.
+
+ * mpn/x86_64/core2/divrem_1.asm: Tweak slightly, correct cycle counts.
+
+ * mpn/x86_64/addmul_2.asm: Remove constant index.
+ * mpn/x86_64/lshiftc.asm: Likewise.
+ * mpn/x86_64/pentium4/lshift.asm: Likewise.
+ * mpn/x86_64/pentium4/lshiftc.asm: Likewise.
+ * mpn/x86_64/pentium4/rshift.asm: Likewise.
+
+2010-12-16 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/mod_34lsub1.asm: Complete rewrite.
+ * mpn/x86_64/pentium4/mod_34lsub1.asm: New file, old
+ mpn/x86_64/mod_34lsub1.asm.
+
+2010-12-15 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/powerpc64/vmx/popcount.asm: Rewrite to use vperm count table.
+
+2010-12-14 Torbjorn Granlund <tege@gmplib.org>
+
+ * mp-h.in: Remove.
+ * configure.in: Remove mp-h.in from AC_OUTPUT invocation.
+
+2010-12-13 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpz/mod.c: Rewrite.
+
+ * mpn/x86_64/corei/popcount.asm: New file.
+ * mpn/x86_64/corei/hamdist.asm: New file.
+
+ * mpn/x86_64/k10/hamdist.asm: New file.
+
+ * configure.in: Amend last change for lame /bin/sh.
+
+2010-12-12 Torbjorn Granlund <tege@gmplib.org>
+
+ * configure.in: Comment out M4=m4-not-needed.
+
+ * mpn/x86_64/k10/popcount.asm: New file.
+ * configure.in: Setup special path for k10 and later AMD CPUs.
+ Remove special x86_64'k8' path, since directory is non-existent.
+
+2010-12-11 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/sparc32/ultrasparct1: New directory.
+ * mpn/sparc32/ultrasparct1/add_n.asm: New file.
+ * mpn/sparc32/ultrasparct1/sub_n.asm: New file.
+ * mpn/sparc32/ultrasparct1/mul_1.asm: New file.
+ * mpn/sparc32/ultrasparct1/addmul_1.asm: New file.
+ * mpn/sparc32/ultrasparct1/submul_1.asm: New file.
+ * mpn/sparc32/ultrasparct1/sqr_diagonal.asm: New file.
+
+ * config.guess: Support Ultrasparc T2 and T3.
+ * config.sub: Likewise.
+ * configure.in: Likewise.
+
+ * config.guess: Generalise BSD Sparc recognition by allowing any
+ caps (needed for OpenBSD which spells things innovatively).
+
+2010-12-01 Torbjorn Granlund <tege@gmplib.org>
+
+ * config.guess: Match new AMD processors, allow finer distinctions
+ among old ones.
+ * acinclude.m4 (X86_64_PATTERN): Likewise.
+ * config.sub: Likewise.
+ * configure.in: Rudimentarily support new AMD processors.
+
+ * configure.in (--enable_assembly): New option.
+ (target none-*-*): Disable, give error.
+
+2010-11-29 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86/x86-defs.m4 (LEA): Support non-PIC code.
+ * mpn/x86/darwin.m4 (LEA): Likewise.
+
+ * tests/amd64call.asm: Rewrite for code size, and to match calls and
+ returns.
+
+ * tests/x86call.asm: Rewrite for code size, to support PIC, and to
+ match calls and returns.
+ * tests/x86check.c: Rewrite.
- * mpz/mul.c: Remove redundant size computation.
+2010-11-22 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpz/get_str.c: Make all bases either work or return an error.
+ * mpz/out_str.c: Likewise.
+ * mpq/get_str.c: Likewise.
+ * mpf/get_str.c: Likewise.
+
+2010-11-14 Torbjorn Granlund <tege@gmplib.org>
+
+ * tests/misc/t-printf.c: Add explicit casts for type conversions.
+ * mpn/generic/toom62_mul.c: Likewise.
+
+2010-11-13 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/generic/get_d.c: Misc cleanup. Fail with a syntax error for
+ non-IEEE fp formats.
+
+ * tests/devel/try.c (malloc_region): Add explicit casts for type
+ conversions.
+
+ * acinclude.m4 (GMP_ASM_RODATA): Make test code snippet C++ compatible.
+ (GMP_C_DOUBLE_FORMAT): Likewise.
+ (GMP_FUNC_VSNPRINTF): Likewise.
+
+ * config.guess (x86): Make test C snippet C++ compatible.
+
+2010-11-12 Torbjorn Granlund <tege@gmplib.org>
+
+ * Makefile.am: Remove mpbsd.
+ * configure.in: Remove mpbsd.
+ * doc/configuration: Remove mpbsd mentions.
+ * doc/gmp.texi: Remove mpbsd docs.
+ * tests/Makefile.am: Remove mpbsd.
+ * libmp.sym: Remove.
+ * mpbsd: Remove directory and files.
+ * tests/mpbsd: Remove directory and files.
2010-11-11 Torbjorn Granlund <tege@gmplib.org>
* mpn/x86_64/atom/aors_n.asm: Don't rely on ZF after 'bt' insn.
Use 64-bit 'test' to support operands of 2^32 limbs and more.
+ * rand: New directory, move rand*.c and randmt.h here.
+ * rand/Makefile.am: New file.
+ * Makefile.am (SUBDIRS): Add rand.
+ (RANDOM_OBJECTS): New variable.
+ (libgmp_la_SOURCES): Remove random objects.
+ (libgmp_la_DEPENDENCIES): Add RANDOM_OBJECTS.
+ * configure.in (AC_OUTPUT): Add rand/Makefile.
+
+ * ansi2knr.1: File removed.
+ * ansi2knr.c: File removed.
+
2010-11-10 Torbjorn Granlund <tege@gmplib.org>
- [These changes were made after the 5.0.2 release, but inserted here to
- match the change chronology of the main repository.]
+ Make it possible to compile GMP with g++:
+
+ * gmp-impl.h: Declare __gmp_digit_value_tab here.
+ * mpbsd/min.c: ...not here.
+ * mpbsd/xtom.c: ...nor here.
+ * mpf/set_str.c: ...nor here.
+ * mpz/inp_str.c: ...nor here.
+ * mpz/set_str.c: ...nor here.
+
+ * mpn/generic/toom43_mul.c: Add casts for logical operations on enums.
+ * mpn/generic/toom44_mul.c: Likewise.
+ * mpn/generic/toom4_sqr.c: Likewise.
+ * mpn/generic/toom52_mul.c: Likewise.
+ * mpn/generic/toom53_mul.c: Likewise.
+ * mpn/generic/toom62_mul.c: Likewise.
+
+ * mpz/clrbit.c: Clean up typing using MPZ_REALLOC.
+ * mpz/setbit.c: Likewise.
+
+ * mpz/powm.c: Avoid variable name 'new'.
+
+ * randlc2x.c: Add explicit casts for type conversions.
+ * tests/misc/t-printf.c: Likewise.
+ * tests/misc/t-scanf.c: Likewise.
+ * tests/misc.c: Likewise.
+ * tests/mpz/convert.c: Likewise.
+ * tests/refmpn.c: Likewise.
+
+ * tests/tests.h: Unconditionally use <sstream> for now.
+
+ * tests/memory.c: Include "tests.h.
* mp_get_fns.c: Add a __GMP_NOTHROW for coherency with prototype.
* mp_set_fns.c: Likewise.
* mpz/sizeinbase.c: Likewise.
* mpz/swap.c: Likewise.
* mpz/tstbit.c: Likewise.
+ * tal-reent.c: Likewise.
2010-11-09 Torbjorn Granlund <tege@gmplib.org>
- [This change was made after the 5.0.2 release, but inserted here to
- match the change chronology of the main repository.]
+ * configure.in: Get rid of K&R support.
+ * Makefile.am: Likewise.
+ * mpn/Makefile.am: Likewise.
+ * doc/configuration: Update docs wrt K&R support.
+ * doc/gmp.texi: Likewise.
* configure.in (AC_INIT): Amend bug reporting address with manual
reference.
2010-11-06 Torbjorn Granlund <tege@gmplib.org>
+ * config.guess: If cpuid says we have 32bit-only x86 but
+ configfsf.guess return x86_64, return the latter.
+
* mpn/x86_64/aors_n.asm: Rewrite not to rely on ZF after 'bt' insn.
+2010-10-09 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/generic/trialdiv.c: Update documentation.
+
2010-10-04 Torbjorn Granlund <tege@gmplib.org>
* mpn/x86_64/gcd_1.asm: Use m4_lshift to avoid << operator.
+ * mpn/x86_64/aorrlshC_n.asm: Likewise.
+ * mpn/x86_64/pentium4/aorslshC_n.asm: Likewise.
* mpn/x86/k7/gcd_1.asm: Likewise.
2010-08-20 Niels Möller <nisse@lysator.liu.se>
touch it. Fixed the case that no assembler files are used, and
GMP_PROG_M4 is omitted.
+2010-08-08 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/fat/fat.c: Recognise many more processors.
+
+2010-06-30 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/divrem_2.asm: Tune.
+
+2010-06-19 Niels Möller <nisse@lysator.liu.se>
+
+ * tune/speed.h (SPEED_ROUTINE_MPN_MOD_1_1): Pass normalized
+ divisor to the benchmarked function.
+
+2010-06-15 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/mod_1_1.asm (mpn_mod_1_1p_cps): Rewrite.
+ * mpn/x86_64/mod_1_2.asm (mpn_mod_1s_2p_cps): Rewrite.
+ * mpn/x86_64/mod_1_4.asm (mpn_mod_1s_4p_cps): Rewrite.
+
+ * gmp-impl.h (udiv_rnd_preinv): Simplify.
+
+ * mpn/x86/k7/mod_1_1.asm: New file.
+ * mpn/x86/pentium4/sse2/mod_1_1.asm (mpn_mod_1_1p_cps): Rewrite.
+ * mpn/x86/k7/mod_1_4.asm (mpn_mod_1s_4p_cps): Rewrite.
+ * mpn/x86/pentium4/sse2/mod_1_4.asm (mpn_mod_1s_4p_cps): Rewrite.
+
+ * mpn/generic/mod_1_1.c (mpn_mod_1_1p_cps): Store results as they are
+ computed.
+ * mpn/generic/mod_1_2.c (mpn_mod_1s_2p_cps): Likewise.
+ * mpn/generic/mod_1_4.c (mpn_mod_1s_4p_cps): Likewise.
+
+ * mpn/x86/k7/invert_limb.asm: Moved from mpn/x86/invert_limb.asm.
+
2010-06-15 Niels Möller <nisse@lysator.liu.se>
* tests/mpn/Makefile.am (check_PROGRAMS): Added t-mod_1.
* tests/mpn/t-mod_1.c: New file.
+2010-05-25 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/generic/mu_div_qr.c (mpn_preinv_mu_div_qr_itch): Trim out space
+ for inverse, since that is passed in already.
+
2010-05-24 Torbjorn Granlund <tege@gmplib.org>
* mpn/generic/mu_div_qr.c (mpn_preinv_mu_div_qr_itch): New function.
* tune/speed.h (SPEED_ROUTINE_MPN_MUPI_DIV_QR): Pass parameters right
for new itch function.
+ * mpn/powerpc32/lshiftc.asm: New file.
+
+2010-05-22 Torbjorn Granlund <tege@gmplib.org>
+
+ * tune/tuneup.c (tune_mod_1): Revert to version of 2010-05-06.
+
+2010-05-17 Torbjorn Granlund <tege@gmplib.org>
+
+ * configure.in (ia64): Get 32-bit sizeof test right.
+
+ * tune/tuneup.c (tune_mod_1): Undo unintensional change to tuning of
+ PREINV_MOD_1_TO_MOD_1_THRESHOLD.
+
+2010-05-16 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/sparc64/mod_1.c: Rewrite.
+ * mpn/sparc64/sparc64.h (umul_ppmm_s): New macro.
+ * mpn/sparc64/mod_1_4.c: New file.
+
+ * mpn/generic/divrem_1.c: Minor cleanup.
+ * mpn/generic/mod_1.c: Likewise.
+ * mpn/generic/mod_1_1.c: Likewise.
+ * mpn/generic/mod_1_2.c: Likewise.
+ * mpn/generic/mod_1_3.c: Likewise.
+ * mpn/generic/mod_1_4.c: Likewise.
+
+ * configure.in (ia64-hpux): Do sizeof tests for 32-bit and 64-bit ABI.
+
+ * tune/tuneup.c (tune_mod_1): Completely finish MOD_1_N tuning before
+ tuning MOD_1U_TO_MOD_1_1_THRESHOLD.
+
2010-05-14 Torbjorn Granlund <tege@gmplib.org>
* mpn/generic/redc_2.c: Use asm code just for GNU C.
+2010-05-13 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/sparc64/ultrasparc1234: New directory. Move all code that uses
+ floating-point into this directory.
+ * configure.in: Point to ultrasparc1234 for appropriate CPUs.
+
+ * mpn/sparc64/ultrasparct1/add_n.asm: New file.
+ * mpn/sparc64/ultrasparct1/addlsh2_n.asm: New file.
+ * mpn/sparc64/ultrasparct1/addmul_1.asm: New file.
+ * mpn/sparc64/ultrasparct1/lshift.asm: New file.
+ * mpn/sparc64/ultrasparct1/mul_1.asm: New file.
+ * mpn/sparc64/ultrasparct1/rsblsh2_n.asm: New file.
+ * mpn/sparc64/ultrasparct1/rshift.asm: New file.
+ * mpn/sparc64/ultrasparct1/sublsh1_n.asm: New file.
+ * mpn/sparc64/ultrasparct1/sublshC_n.asm: New file.
+ * mpn/sparc64/ultrasparct1/addlsh1_n.asm: New file.
+ * mpn/sparc64/ultrasparct1/addlshC_n.asm: New file.
+ * mpn/sparc64/ultrasparct1/lshiftc.asm: New file.
+ * mpn/sparc64/ultrasparct1/rsblsh1_n.asm: New file.
+ * mpn/sparc64/ultrasparct1/rsblshC_n.asm: New file.
+ * mpn/sparc64/ultrasparct1/sub_n.asm: New file.
+ * mpn/sparc64/ultrasparct1/sublsh2_n.asm: New file.
+ * mpn/sparc64/ultrasparct1/submul_1.asm: New file.
+ * mpn/sparc64/ultrasparct1/gmp-mparam.h: New file.
+
+ * configure.in: Give ultrasparct1 and ultrasparct2 special code path.
+
+ * mpn/x86_64/pentium4/gmp-mparam.h: Disable mpn_addlsh_n, mpn_rsblsh_n.
+
+2010-05-12 Niels Möller <nisse@lysator.liu.se>
+
+ * mpz/jacobi.c (mpz_jacobi): Fixed off-by-one error in use of
+ scratch space.
+
+ * tune/common.c (speed_mpz_powm_sec): New function.
+ * tune/speed.h: Declare speed_mpz_powm_sec.
+ * tune/speed.c (routine): Added speed_mpz_powm_sec.
+
+ * tune/common.c (speed_mpn_addlsh_n, speed_mpn_sublsh_n)
+ (speed_mpn_rsblsh_n): New functions.
+ * tune/speed.h: Declare new functions.
+ * tune/speed.c (routine): Add new functions.
+
+2010-05-12 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/mod_1_4.asm: Tune for more processors.
+
+ * mpn/x86_64/pentium4/lshiftc.asm: New file.
+
+2010-05-11 Niels Möller <nisse@lysator.liu.se>
+
+ * mpz/jacobi.c (mpz_jacobi): Deleted old implementation.
+ Reorganized new implementation, to handle small inputs efficiently.
+
+ * tests/mpz/t-jac.c (check_large_quotients): Reduced test sizes.
+ (check_data): One more input pair related to a fixed bug.
+ (main): Enable check_large_quotients.
+
+2010-05-10 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/aorrlsh2_n.asm: Fix typo.
+
+2010-05-09 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/aorrlshC_n.asm: New file based on aorrlsh2_n.asm.
+ * mpn/x86_64/aorrlsh2_n.asm: Now just include aorrlshC_n.asm.
+ * mpn/x86_64/core2/aorrlsh1_n.asm: New file, include ../aorrlshC_n.asm.
+ * mpn/x86_64/core2/aorrlsh2_n.asm: Likewise.
+
+ * mpn/x86_64/core2/sublshC_n.asm: New file based on aorslsh1_n.asm.
+ * mpn/x86_64/core2/aorslsh1_n.asm: Remove.
+ * mpn/x86_64/core2/sublsh1_n.asm: Just include sublshC_n.asm.
+ * mpn/x86_64/core2/sublsh2_n.asm: Likewise.
+
+2010-05-08 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/atom/gmp-mparam.h: Disable mpn_rsh1add_n, mpn_rsh1sub_n.
+
+ * mpn/x86_64/pentium4/aorslshC_n.asm: New file based on aorslsh1_n.asm.
+ * mpn/x86_64/pentium4/aorslsh1_n.asm: Now just include aorslshC_n.asm.
+ * mpn/x86_64/pentium4/aorslsh2_n.asm: New file.
+
+2010-05-07 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/sparc64: Support operands of >= 2^32 limbs.
+
+ * mpn/sparc64/lshiftc.asm: New file.
+
+ * mpn/ia64/divrem_2.asm: Complete rewrite.
+
2010-05-06 Torbjorn Granlund <tege@gmplib.org>
- [This change was made after the 5.0.2 release, but inserted here to
- match the change chronology of the main repository.]
+ * tune/tuneup.c (all): Don't call tune_divrem_2.
+
+ * mpn/generic/divrem_2.c: Complete rewrite.
* tune/tuneup.c (tune_mod_1): Fix typo.
2010-05-05 Torbjorn Granlund <tege@gmplib.org>
- [These changes were made after the 5.0.2 release, but inserted here to
- match the change chronology of the main repository.]
+ * mpn/x86_64/mod_1_1.asm (mpn_mod_1_1p): Use macro register names.
+ (mpn_mod_1_1p_cps): Rewrite.
+
+ * mpn/generic/mod_1_1.c (mpn_mod_1_1p_cps): Micro-optimise.
* longlong.h: Undo 2009-03-01 change for powerpc64, it gives poor code.
+ * mpn/x86/pentium4/sse2/mod_1_1.asm: New file.
+
+ * mpn/powerpc64/mode64/mod_1_1.asm: New file.
+
* tune/tuneup.c (tune_mod_1): Use more typical divisor, for the benefit
of machines with early-out multipliers.
2010-05-04 Torbjorn Granlund <tege@gmplib.org>
- [This change was made after the 5.0.2 release, but inserted here to
- match the change chronology of the main repository.]
-
* tune/tuneup.c (tune_mod_1): Fix typo.
+ * mpn/generic/mod_1_1.c: Undo last change.
+ * mpn/x86_64/mod_1_1.asm: Likewise.
+
+2010-05-03 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/generic/jacobi_lehmer.c (jacobi_hook): New function.
+ (mpn_jacobi_subdiv_step): Deleted function.
+ (mpn_jacobi_lehmer): Use general mpn_gcd_subdiv_step.
+
+ * mpn/generic/gcd_subdiv_step.c (mpn_gcd_subdiv_step): Reorganized
+ to use a single hook function.
+ * mpn/generic/gcdext.c (mpn_gcdext): Adapted to new hook
+ interface.
+ * mpn/generic/gcdext_lehmer.c (mpn_gcdext_hook): New unified hook
+ function.
+ * mpn/generic/gcd.c (gcd_hook): Renamed from gcd_done, and adapted
+ to new hook interface.
+ * gmp-impl.h (gcd_subdiv_step_hook): New typedef, now a function
+ type, not a struct.
+ (mpn_gcdext_hook): Declare.
+
2010-05-03 Torbjorn Granlund <tege@gmplib.org>
- [This change was made after the 5.0.2 release, but inserted here to
- match the change chronology of the main repository.]
+ * mpn/generic/mod_1_1.c: Avoid multiply for 2 limb feed-in.
+ * mpn/generic/mod_1_2.c: Likewise.
+ * mpn/generic/mod_1_3.c: Likewise.
+ * mpn/generic/mod_1_4.c: Likewise.
+ * mpn/x86_64/mod_1_1.asm: Likewise.
+ * mpn/x86_64/mod_1_2.asm: Likewise.
+ * mpn/x86_64/mod_1_4.asm: Likewise.
+ * mpn/x86/k7/mod_1_4.asm: Likewise.
+ * mpn/x86/pentium4/sse2/mod_1_4.asm: Likewise.
+ * mpn/alpha/ev6/mod_1_4.asm: Likewise.
* tune/tuneup.c (tune_mod_1): Measure MOD_1_1_TO_MOD_1_2_THRESHOLD and
MOD_1_2_TO_MOD_1_4_THRESHOLD before MOD_1U_TO_MOD_1_1_THRESHOLD for
correctness.
+ * mpn/powerpc64/sqr_diagonal.asm: Complete rewrite.
+
+ * mpn/powerpc64/mode64/mod_1_4.asm: New file.
+
+2010-05-02 Torbjorn Granlund <tege@gmplib.org>
+
+ * config.guess: Recognise power7.
+
+ * configure.in: Major overhaul of powerpc support.
+
+ * mpn/powerpc64/p6/lshift.asm: New file.
+ * mpn/powerpc64/p6/lshiftc.asm: Likewise.
+ * mpn/powerpc64/p6/rshift.asm: Likewise.
+
+2010-04-30 Torbjorn Granlund <tege@gmplib.org>
+
+ * configure.in (powerpc64): Support CPU specific mode-less subdirs.
+
+ * mpn/powerpc64/aix.m4 (PROLOGUE_cpu): Use "named csect" making
+ requested aignment actually honoured.
+
+2010-04-30 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/generic/jacobi_lehmer.c (mpn_jacobi_2): Fixed handling of
+ the case bl == 1. Fixed missing application of reciprocity.
+
+2010-04-29 Niels Möller <nisse@lysator.liu.se>
+
+ * configure.in (gmp_mpn_functions): Deleted gcdext_subdiv_step.
+
+ * mpn/generic/gcdext.c (mpn_gcdext): Use new generalized
+ mpn_gcd_subdiv_step.
+
+ * mpn/generic/gcdext_lehmer.c (gcdext_update): New function.
+ (gcdext_done): New function.
+ (gcdext_hook): New const hook struct.
+ (mpn_gcdext_lehmer_n): Use new generalized mpn_gcd_subdiv_step.
+
+ * mpn/generic/gcd.c (gcd_done): New function.
+ (gcd_hook): New const hook struct.
+ (mpn_gcd): Adapted to new mpn_gcd_subdiv_step interface.
+
+ * mpn/generic/gcd_subdiv_step.c (mpn_gcd_subdiv_step): Reorganized
+ function. Added hook function pointers to the argument list, so
+ the same function can be used for gcd, gcdext, and jacobi.
+
+ * gmp-impl.h (struct gcd_subdiv_step_hook): New struct.
+ (mpn_gcdext_subdiv_step): Deleted prototype.
+ (struct gcdext_ctx): New struct.
+ (gcdext_hook): Declare const struct.
+ (mpn_gcd_subdiv_step): Updated prototype.
+
+ * mpn/generic/gcdext_subdiv_step.c: Deleted file.
+
+2010-04-28 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/powerpc64/lshift.asm: Rewrite.
+ * mpn/powerpc64/rshift.asm: Likewise.
+ * mpn/powerpc64/mode64/lshiftc.asm: New file.
+
+ * mpn/powerpc64/aix.m4: Align functions to 32-byte boundary.
+ * mpn/powerpc64/darwin.m4: Likewise.
+ * mpn/powerpc64/elf.m4: Likewise.
+
+2010-04-28 Niels Möller <nisse@lysator.liu.se>
+
+ * tests/mpz/t-jac.c (check_data): Added some more test cases.
+
+ * mpn/generic/jacobi_lehmer.c (mpn_jacobi_2): Bugfix, count
+ trailing zeros, not leading.
+
+2010-04-27 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/powerpc64/mode64/p6/mul_basecase.asm: New file.
+
+2010-04-23 Niels Möller <nisse@lysator.liu.se>
+
+ * gmp-impl.h (MPN_GCD_LEHMER_N_ITCH): Deleted.
+ (mpn_gcd_lehmer_n): Deleted declaration.
+
+ * mpn/generic/gcd.c (gcd_2): Moved from gcd_lehmer.c.
+ (mpn_gcd): Inlined the code from mpn_gcd_lehmer_n. Also use
+ MPN_GCD_SUBDIV_STEP_ITCH rather than MPN_GCD_LEHMER_N_ITCH.
+
+2010-04-22 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/powerpc64/mode64/bdiv_dbm1c.asm: Swap multiply insns to make them
+ consecutive, for the benefit of POWER6.
+
+ * mpn/powerpc64/mode64/p6/gmp-mparam.h: New file.
+
+2010-04-21 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/generic/gcd_lehmer.c: Deleted file.
+
+ * mpn/powerpc64/mode64/divrem_1.asm: Swap multiply insns to make them
+ consecutive, for the benefit of POWER6.
+ * mpn/powerpc64/mode64/dive_1.asm: Likewise.
+ * mpn/powerpc64/mode64/divrem_2.asm: Likewise.
+ * mpn/powerpc64/mode64/mul_1.asm: Likewise.
+ * mpn/powerpc64/mode64/aorsmul_1.asm: Likewise.
+
+ * mpn/powerpc64/mode64/aorslshC_n.asm: Swap ldx operands as a temporary
+ workaround for POWER6 pipeline glitch.
+
+2010-04-19 Niels Möller <nisse@lysator.liu.se>
+
+ * mpz/jacobi.c (mpz_jacobi): New implementation using
+ mpn_jacobi_lehmer. Currently #if:ed out.
+
+ * mpn/generic/jacbase.c (mpn_jacobi_base)
+ [JACOBI_BASE_METHOD < 4]: Support inputs with a >= b.
+
+ * gmp-impl.h (mpn_jacobi_lehmer): Added prototype.
+ (jacobi_table): Declare.
+ (mpn_jacobi_init): New inline function.
+ (mpn_jacobi_finish): Likewise.
+ (mpn_jacobi_update): Likewise.
+
+ * mpn/generic/jacobi_lehmer.c (mpn_jacobi_lehmer): New file, new
+ function.
+
+ * configure.in (gmp_mpn_functions): Added jacobi_lehmer.
+
+2010-04-14 Niels Möller <nisse@lysator.liu.se>
+
+ * configure.in (gmp_mpn_functions): Added
+ matrix22_mul1_inverse_vector.
+ * mpn/Makefile.am (nodist_EXTRA_libmpn_la_SOURCES): Added
+ matrix22_mul1_inverse_vector.c.
+
+ * gmp-impl.h (mpn_matrix22_mul1_inverse_vector): Updated for
+ rename of mpn_matrix22_mul1_inverse_vector.
+ * mpn/generic/gcd_lehmer.c (mpn_gcd_lehmer_n): Likewise.
+ * mpn/generic/gcdext_lehmer.c (mpn_gcdext_lehmer_n): Likewise.
+ * mpn/generic/hgcd.c (hgcd_step): Likewise.
+
+ * mpn/generic/matrix22_mul1_inverse_vector.c
+ (mpn_matrix22_mul1_inverse_vector): New file, function moved and
+ renamed...
+ * mpn/generic/hgcd2.c (mpn_hgcd_mul_matrix1_inverse_vector):
+ ...from here.
+
2010-04-12 Torbjorn Granlund <tege@gmplib.org>
* tests/mpn/t-toom6h.c (SIZE_LOG): Define.
2010-04-10 Torbjorn Granlund <tege@gmplib.org>
+ * mpn/ia64/lorrshift.asm: Rewrite feed-in and wind-down code.
+
+ * mpn/ia64/aorslsh1_n.asm: Adapt to new aorslsh1_n.
+ * mpn/ia64/aorslsh1_n.asm: Likewise.
+
+ * mpn/ia64/aors_n.asm: Complete rewrite.
+ * mpn/ia64/aorslsh1_n.asm: Likewise.
+
+ * mpn/ia64/add_n_sub_n.asm: Misc cleanups. Add slotting comments.
+
+ * mpn/ia64/lshiftc.asm: New file.
+
+ * mpn/x86_64/pentium4/gmp-mparam.h: No longer disable rsh1add_n and
+ rsh1sub_n; instead disable rsblsh1_n, addlsh2_n, rsblsh2_n.
+
* mpn/x86/divrem_2.asm: Use "orb" instead of "or" to work around
Solaris assembler bug.
* mpn/x86_64/mpn/x86_64/divrem_2.asm: Likewise.
+ * mpn/x86/aors_n.asm: Use operand-less shift-by-1 insn form.
+ * mpn/x86/pentium/aors_n.asm: Likewise.
+ * mpn/x86_64/invert_limb.asm: Likewise.
+
+ * mpn/x86_64/pentium4/aors_n.asm: Let non-nc code fall into nc code.
+
+ * mpn/x86_64/pentium4/rsh1aors_n.asm: New file.
+
2010-03-25 Torbjorn Granlund <tege@gmplib.org>
+ * mpn/ia64/add_n_sub_n.asm: New file.
+
* mpn/generic/toom33_mul.c: Fix mpn_add_n_sub_n usage.
* mpn/generic/toom3_sqr.c: Likewise.
* mpn/generic/toom63_mul.c: Likewise.
+ * mpn/generic/add_n_sub_n.c: Renamed from addsub_n.c.
+
+2010-03-23 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/divrem_2.asm: Use mpn_invert_limb instead of div insn.
+
+ * mpn/ia64/aorslshC_n.asm: New file, generalised from last iteration of
+ aorslsh1_n.asm.
+ * mpn/ia64/aorslsh1_n.asm: Use aorslshC_n.asm.
+ * mpn/ia64/aorslsh1_n.asm: New file, use aorslshC_n.asm.
+
+2010-03-20 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/powerpc64/mode64/invert_limb.asm: Rewrite to exploit cancellation
+ in the Newton iteration.
+
+2010-03-20 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * mpn/generic/toom_interpolate_8pts.c: Use mpn_sublsh2_n.
+
+2010-03-20 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/powerpc64/mode64/aorslshC_n.asm: New file, generalised from
+ last iteration of aorslsh1_n.asm.
+ * mpn/powerpc64/mode64/aorslsh1_n.asm: Use aorslshC_n.asm.
+ * mpn/powerpc64/mode64/aorslsh1_n.asm: New file, use aorslshC_n.asm.
+
2010-03-19 Torbjorn Granlund <tege@gmplib.org>
+ * mpn/x86_64/nano/dive_1.asm: New file.
+
+ * mpn/x86_64/divrem_1.asm: Avoid shld since it is slow on several CPU
+ types. Unconditionally provide code for normalised and unnormalised
+ divisors. Cleanup labels.
+
+ * mpn/x86_64/core2/divrem_1.asm: Remove special code for normalised
+ divisors. Cleanup labels.
+
* mpn/generic/toom_interpolate_6pts.c: Call mpn_sublsh2_n and
mpn_sublsh_n with correct args.
+ * tests/devel/try.c: Use enum for TYPE_*.
+
+ * tests/devel/try.c: Test mpn_sublsh2_n.
+ * tests/refmpn.c (refmpn_sublsh2_n): New function.
+ * tests/tests.h (refmpn_sublsh2_n): Declare.
+
+ * mpn/powerpc64/mode64/aorslsh1_n.asm: New file, with faster
+ mpn_addlsh1_n and mpn_sublsh1_n.
+ * mpn/powerpc64/mode64/addlsh1_n.asm: Delete.
+ * mpn/powerpc64/mode64/sublsh1_n.asm: Delete.
+
+2010-03-18 Torbjorn Granlund <tege@gmplib.org>
+
+ * configure.in (*-*-aix): Define gcc_32_cflags_maybe, ar_32_flags and
+ nm_32_flags.
+
+ * mpn/x86/pentium4/sse2/addlsh1_n.asm: Tune for slightly better speed.
+ Misc cleanups. Add cycle table.
+
+ * mpn/x86_64/copyi.asm: Update cycle table.
+ * mpn/x86_64/copyd.asm: Likewise.
+ * mpn/x86_64/rsh1aors_n.asm: Likewise.
+ * mpn/x86_64/dive_1.asm: Likewise.
+
+ * mpn/x86/pentium4/sse2/add_n.asm: Misc cleanups. Add cycle table.
+ * mpn/x86/pentium4/sse2/sub_n.asm: Likewise.
+
+2010-03-16 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/divrem_1.asm: Use mpn_invert_limb instead of div insn.
+ * mpn/x86_64/core2/divrem_1.asm: Likewise.
+
+ * tune/speed.c (routine): Add FLAG_R_OPTIONAL for many binops.
+
+2010-03-15 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/alpha/ev6/mod_1_4.asm (mpn_mod_1s_4p_cps): Rewrite.
+
+ * mpn/ia64/aors_n.asm: Insert explicitly typed nops to trigger intended
+ bundling.
+ * mpn/ia64/aorslsh1_n.asm: Likewise.
+ * mpn/ia64/dive_1.asm: Likewise.
+
+2010-03-13 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86/pentium4/sse2/submul_1.asm: Rewrite.
+
+ * mpn/powerpc64/mode64/aorsmul_1.asm: New file, faster than old code
+ for both mpn_addmul_1 and mpn_submul_1.
+ * mpn/powerpc64/mode64/addmul_1.asm: Remove.
+ * mpn/powerpc64/mode64/submul_1.asm: Remove.
+
+2010-03-11 Niels Möller <nisse@lysator.liu.se>
+
+ * mpn/generic/gcd_lehmer.c (gcd_2): Use sub_ddmmss.
+
+ * mpn/generic/jacbase.c (mpn_jacobi_base): Reorganized the
+ JACOBI_BASE_METHOD 4 slightly. Now requires that b > 1.
+
+2010-03-10 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/divrem_1.asm: Make fraction code take documented # of
+ cycles. Annotate code for more CPUs. Misc cleanups.
+ * mpn/x86_64/core2/divrem_1.asm: Annotate code for more CPUs.
+
+ * mpn/alpha/ev6/mod_1_4.asm: New file.
+
+ * mpn/ia64/mod_34lsub1.asm: New file.
+
+ * doc/gmp.texi (Language Bindings): Update Python site, add Ruby.
+
+2010-03-10 Niels Möller <nisse@lysator.liu.se>
+
+ * tune/tuneup.c (tune_jacobi_base): Consider mpn_jacobi_base_4.
+ * tune/speed.c (routine): Added mpn_jacobi_base_4.
+ * tune/common.c (speed_mpn_jacobi_base_4): New function.
+ * tune/speed.h (speed_mpn_jacobi_base_4): Declare it.
+ * tune/Makefile.am (libspeed_la_SOURCES): Added jacbase4.c.
+ * tune/jacbase4.c: New file.
+
+ * mpn/generic/jacbase.c (mpn_jacobi_base): New function, for
+ JACOBI_BASE_METHOD 4.
+
+2010-03-09 Niels Möller <nisse@lysator.liu.se>
+
+ * tests/mpz/t-jac.c (check_large_quotients): Also generate inputs
+ with large quotients and a large gcd.
+
+2010-03-09 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * tests/mpz/t-bin.c (randomwalk): New test-generator function.
+
+2010-03-07 Torbjorn Granlund <tege@gmplib.org>
+
+ * tune/speed.c (routine): Force r argument for several mod_1 calls.
+
2010-03-06 Torbjorn Granlund <tege@gmplib.org>
+ * mpn/x86_64/divrem_1.asm: Disable SPECIAL_CODE_FOR_NORMALIZED_DIVISOR.
+ Misc clean up.
+
+ * mpn/x86_64/mod_1_1.asm: New file.
+ * mpn/x86_64/mod_1_2.asm: New file.
+ * mpn/x86_64/mod_1_4.asm: Update cycle counts.
+
* tests/tests.h (TESTS_REPS): Fix typo.
+2010-03-03 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/x86_64/core2/divrem_1.asm: New file.
+
+2010-02-26 Niels Möller <nisse@lysator.liu.se>
+
+ * tune/speed.c (routine): Added udiv_qrnnd_preinv3.
+
+ * tune/common.c (speed_udiv_qrnnd_preinv3): New function.
+ * tune/speed.h: Added prototype for it.
+
+2010-02-26 Niels Möller <nisse@lysator.liu.se>
+
+ * tests/mpz/t-jac.c (check_large_quotients): New test. Currently
+ disabled, since it's quite slow.
+ (mpz_nextprime_step): New function.
+
2010-02-26 Torbjorn Granlund <tege@gmplib.org>
* mpn/pa64/aors_n.asm: Fix typo in last change.
+2010-02-25 Niels Möller <nisse@lysator.liu.se>
+
+ * tests/mpz/t-jac.c (ref_jacobi): New reference implementation,
+ using factorization and legendre symbols computed by powm.
+
+ * tests/devel/try.c (param_init, call): Don't pass negative values
+ for the second argument to mpz_jacobi and refmpz_jacobi.
+
+ * tests/refmpz.c (refmpz_jacobi): Require that b is odd and positive.
+
+ * tests/devel/try.c (param_init): Support mpz_legendre.
+ (choice_array): Added mpz_kronecker (apparently forgotten) and
+ mpz_legendre.
+ (call): Added TYPE_MPZ_LEGENDRE.
+ (try_one): Added support for DATA_SRC1_ODD_PRIME.
+
+ * tests/refmpz.c (refmpz_legendre): Rewrote using powm.
+
2010-02-25 Torbjorn Granlund <tege@gmplib.org>
+ * config.guess: Make "corei" default for unrecognised Intel P6 CPUs.
+
* tests/mpz/t-perfpow.c (check_random): Use mp_limb_t type for limb
variables.
+ * tests/mpn/t-toom6h.c (COUNT): Define.
+ * tests/mpn/t-toom8h.c (COUNT): Define.
+
* tests/mpn/t-div.c: Cast a switch index to placate HP's cc.
* tests/mpn/t-bdiv.c: Likewise.
* mpn/pa64/aors_n.asm: Fix support of the 2.0n ABI.
+2010-02-24 Marco Bodrato <bodrato@mail.dm.unipi.it>
+
+ * tests/mpz/t-bin.c (data): Replace (2k,k), tested by twos ().
+ * tests/mpf/t-inp_str.c (data): Test also "+" in the exponent.
+
2010-02-23 Torbjorn Granlund <tege@gmplib.org>
- * mpn/generic/mod_1_3.c: Cast a switch index.
+ * mpn/generic/mod_1_3.c: Cast a switch index to placate HP's cc.
* mpn/generic/sqrtrem.c: Use CNST_LIMB.
+2010-02-20 Niels Möller <nisse@lysator.liu.se>
+
+ * tune/speed.h (mpn_gcd_accel): Deleted prototype.
+ (mpn_hgcd_lehmer): New prototype.
+ (MPN_HGCD_LEHMER_ITCH): New macro (previously in gmp-impl.h).
+
+ * tune/Makefile.am (libspeed_la_SOURCES): Added hgcd_lehmer.c.
+ * tune/hgcd_lehmer.c: New file.
+ * tune/gcd_accel.c: Deleted obsolete file.
+
+ * gmp-impl.h (MPN_HGCD_LEHMER_ITCH): Deleted macro.
+
+ * mpn/generic/hgcd.c (mpn_hgcd_lehmer): Deleted function,
+ (mpn_hgcd): Don't call mpn_hgcd_lehmer, instead use inlined loop
+ around hgcd_step.
+ (mpn_hgcd_itch): Substitute n for MPN_HGCD_LEHMER_ITCH (n).
+
+2010-02-19 Niels Möller <nisse@lysator.liu.se>
+
+ * Makefile.am (mpn/jacobitab.h): Added the rules needed to
+ generate this file.
+
+ * gen-jacobitab.c: New file.
+
+2010-02-19 Torbjorn Granlund <tege@gmplib.org>
+
+ * mpn/generic/powm.c: Honour SQR_BASECASE_THRESHOLD in innerloop
+ expansions.
+
+2010-02-16 Niels Möller <nisse@lysator.liu.se>
+
+ * tune/time.c (cgt_works_p): Added rudimentary sanity check for
+ clock_gettime working.
+
+2010-02-15 Niels Möller <nisse@lysator.liu.se>
+
+ * tune/time.c (speed_time_init): Make use of cycle counter
+ configurable, via the speed_option_cycles_broken flag.
+ * tune/common.c (speed_option_cycles_broken): New global variable.
+ (speed_option_set): Recognize option "cycles-broken".
+
+ * tune/time.c (cycles_works_p): Deleted hack to disable cycle
+ counter on linux. Needs to be replaced by something more
+ selective.
+
+2010-02-11 Niels Möller <nisse@lysator.liu.se>
+
+ * tune/time.c (speed_time_init): Fix speed_time_string when using
+ clock_gettime.
+ (cycles_works_p): On linux, don't use the cycle counter.
+
+ * tune/Makefile.am: Add $(TUNE_LIBS) when linking programs.
+
+ * configure.in: Check if -lrt is needed for clock_gettime, and if
+ so, add that flag to TUNE_LIBS.
+
2010-02-07 Torbjorn Granlund <tege@gmplib.org>
* tune/tuneup.c (tune_redc): Set min_size and min_is_always when
* mpn/generic/mu_div_q.c (mpn_mu_div_q_itch): New function.
-2009-12-22 Niels Möller <<nisse@lysator.liu.se>>
+2009-12-22 Niels Möller <nisse@lysator.liu.se>
* mpn/generic/sbpi1_div_q.c: Use udiv_qr_3by2. Intended to change
nothing after preprocessing.
* tune/Makefile.am (libspeed_la_SOURCES): Remove sb_div.c and sb_inv.c.
(TUNE_MPN_SRCS_BASIC): Remove sb_divrem_mn.c.
* tune/common.c (speed_mpn_dcpi1_div_qr_n): New function.
- Remove mpn_sb_divrem_mn related functions.
+ Remove mpn_sb_divrem_mn related functions.
* tune/speed.c (routine): Remove entries related to mpn_dc_divrem and
mpn_sb_divrem.
(routine): New entry for mpn_dc_div_qr_n.
* nextprime.c: New file.
* gmp-impl.h (gmp_primesieve_t, gmp_init_primesieve, gmp_nextprime):
- Declare
+ Declare.
* Makefile.am (libgmp_la_SOURCES): Add nextprime.c.
2009-06-11 Torbjorn Granlund <tege@gmplib.org>
* mpn/Makefile.am: Remove incorrect comment.
* mpn/Makefile.in: Regenerate.
- * gmp.h: Rename most of the random number functions, structs and
- some of the struct members.
+ * gmp.h: Rename most of the random number functions, structs and some
+ of the struct members.
* rand.c (gmp_randinit): Likewise.
* randclr.c (gmp_randclear): Likewise.
* randlc.c (gmp_randinit_lc): Likewise.
* mpz_do_sqrt: Simplify special case for U == 0.
* m*sqrt*.c, mpz_perfsqr.c (mpz_perfect_square_p):
- Rename _mpz_impl_sqrt to _mpz_do_sqrt.
+ Rename _mpz_impl_sqrt to _mpz_do_sqrt.
Fri Dec 13 12:52:28 1991 Torbjorn Granlund (tege@zevs.sics.se)
# Copyright 1991, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2004,
-# 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+# 2006, 2007, 2008, 2009, 2011, 2012, 2013 Free Software Foundation, Inc.
#
# This file is part of the GNU MP Library.
#
# Makefiles in subdirectories, but here we must omit it so automake gives
# the actual ansi2knr build rule, not "cd $(top_builddir) && make ansi2knr".
#
-AUTOMAKE_OPTIONS = 1.8 gnu no-dependencies ansi2knr
+# AUTOMAKE_OPTIONS = 1.8 gnu no-dependencies
# Libtool -version-info for libgmp.la and libmp.la. See "Versioning" in the
# 5.0.3 10:3:0 6:3:2 4:23:1
# 5.0.4 10:4:0 6:4:2 4:24:1
# 5.0.5 10:5:0 6:5:2 4:25:1
+# 5.1.0 11:0:1 7:0:3 -
+# 5.1.1 11:1:1 7:1:3 -
+# 5.1.2 11:2:1 7:2:3 -
+# 5.1.3 11:3:1 7:3:3 -
#
# Starting at 3:0:0 is a slight abuse of the versioning system, but it
# ensures we're past soname libgmp.so.2, which was used on Debian GNU/Linux
# it's still good to get the shared library filename (like
# libgmpxx.so.3.0.4) incrementing, to make it clear which GMP it's from.
-LIBGMP_LT_CURRENT = 10
-LIBGMP_LT_REVISION = 5
-LIBGMP_LT_AGE = 0
+LIBGMP_LT_CURRENT = 11
+LIBGMP_LT_REVISION = 3
+LIBGMP_LT_AGE = 1
-LIBGMPXX_LT_CURRENT = 6
-LIBGMPXX_LT_REVISION = 5
-LIBGMPXX_LT_AGE = 2
+LIBGMPXX_LT_CURRENT = 7
+LIBGMPXX_LT_REVISION = 3
+LIBGMPXX_LT_AGE = 3
-LIBMP_LT_CURRENT = 4
-LIBMP_LT_REVISION = 25
-LIBMP_LT_AGE = 1
-
-SUBDIRS = tests mpn mpz mpq mpf printf scanf cxx mpbsd demos tune doc
+SUBDIRS = tests mpn mpz mpq mpf printf scanf rand cxx demos tune doc
EXTRA_DIST = configfsf.guess configfsf.sub .gdbinit INSTALL.autoconf
#
includeexecdir = $(exec_prefix)/include
include_HEADERS = $(GMPXX_HEADERS_OPTION)
-nodist_includeexec_HEADERS = gmp.h $(MPBSD_HEADERS_OPTION)
-lib_LTLIBRARIES = libgmp.la $(GMPXX_LTLIBRARIES_OPTION) $(MPBSD_LTLIBRARIES_OPTION)
+nodist_includeexec_HEADERS = gmp.h
+lib_LTLIBRARIES = libgmp.la $(GMPXX_LTLIBRARIES_OPTION)
BUILT_SOURCES = gmp.h
mpz/cong$U.lo mpz/cong_2exp$U.lo mpz/cong_ui$U.lo \
mpz/divexact$U.lo mpz/divegcd$U.lo mpz/dive_ui$U.lo \
mpz/divis$U.lo mpz/divis_ui$U.lo mpz/divis_2exp$U.lo mpz/dump$U.lo \
- mpz/export$U.lo mpz/fac_ui$U.lo mpz/fdiv_q$U.lo \
+ mpz/export$U.lo mpz/mfac_uiui$U.lo \
+ mpz/2fac_ui$U.lo mpz/fac_ui$U.lo mpz/oddfac_1$U.lo mpz/prodlimbs$U.lo \
mpz/fdiv_q_ui$U.lo mpz/fdiv_qr$U.lo mpz/fdiv_qr_ui$U.lo \
- mpz/fdiv_r$U.lo mpz/fdiv_r_ui$U.lo \
+ mpz/fdiv_r$U.lo mpz/fdiv_r_ui$U.lo mpz/fdiv_q$U.lo \
mpz/fdiv_ui$U.lo mpz/fib_ui$U.lo mpz/fib2_ui$U.lo mpz/fits_sint$U.lo \
mpz/fits_slong$U.lo mpz/fits_sshort$U.lo mpz/fits_uint$U.lo \
mpz/fits_ulong$U.lo mpz/fits_ushort$U.lo mpz/gcd$U.lo \
mpz/gcd_ui$U.lo mpz/gcdext$U.lo mpz/get_d$U.lo mpz/get_d_2exp$U.lo \
mpz/get_si$U.lo mpz/get_str$U.lo mpz/get_ui$U.lo mpz/getlimbn$U.lo \
mpz/hamdist$U.lo \
- mpz/import$U.lo mpz/init$U.lo mpz/init2$U.lo mpz/inits$U.lo \
+ mpz/import$U.lo mpz/init$U.lo mpz/init2$U.lo mpz/inits$U.lo \
mpz/inp_raw$U.lo mpz/inp_str$U.lo mpz/invert$U.lo \
mpz/ior$U.lo mpz/iset$U.lo mpz/iset_d$U.lo mpz/iset_si$U.lo \
mpz/iset_str$U.lo mpz/iset_ui$U.lo mpz/jacobi$U.lo mpz/kronsz$U.lo \
mpz/n_pow_ui$U.lo mpz/neg$U.lo mpz/nextprime$U.lo \
mpz/out_raw$U.lo mpz/out_str$U.lo mpz/perfpow$U.lo mpz/perfsqr$U.lo \
mpz/popcount$U.lo mpz/pow_ui$U.lo mpz/powm$U.lo mpz/powm_sec$U.lo \
- mpz/powm_ui$U.lo mpz/pprime_p$U.lo mpz/random$U.lo mpz/random2$U.lo \
+ mpz/powm_ui$U.lo mpz/primorial_ui$U.lo \
+ mpz/pprime_p$U.lo mpz/random$U.lo mpz/random2$U.lo \
mpz/realloc$U.lo mpz/realloc2$U.lo mpz/remove$U.lo \
mpz/root$U.lo mpz/rootrem$U.lo mpz/rrandomb$U.lo mpz/scan0$U.lo \
mpz/scan1$U.lo mpz/set$U.lo mpz/set_d$U.lo mpz/set_f$U.lo \
scanf/scanf$U.lo scanf/sscanf$U.lo scanf/sscanffuns$U.lo \
scanf/vfscanf$U.lo scanf/vscanf$U.lo scanf/vsscanf$U.lo
+RANDOM_OBJECTS = \
+ rand/rand$U.lo rand/randclr$U.lo rand/randdef$U.lo rand/randiset$U.lo \
+ rand/randlc2s$U.lo rand/randlc2x$U.lo rand/randmt$U.lo \
+ rand/randmts$U.lo rand/rands$U.lo rand/randsd$U.lo rand/randsdui$U.lo \
+ rand/randbui$U.lo rand/randmui$U.lo
+
# no $U for C++ files
CXX_OBJECTS = \
cxx/isfuns.lo cxx/ismpf.lo cxx/ismpq.lo cxx/ismpz.lo cxx/ismpznw.lo \
- cxx/osdoprnti.lo cxx/osfuns.lo \
+ cxx/limits.lo cxx/osdoprnti.lo cxx/osfuns.lo \
cxx/osmpf.lo cxx/osmpq.lo cxx/osmpz.lo
-MPBSD_OBJECTS = mpbsd/add$U.lo mpbsd/tdiv_qr$U.lo mpbsd/set$U.lo \
- mpbsd/powm$U.lo mpbsd/sub$U.lo mpbsd/cmp$U.lo mpbsd/mfree$U.lo \
- mpbsd/mtox$U.lo mpbsd/realloc$U.lo mpbsd/gcd$U.lo mpbsd/itom$U.lo \
- mpbsd/min$U.lo mpbsd/mul$U.lo mpbsd/mout$U.lo mpbsd/rpow$U.lo \
- mpbsd/sdiv$U.lo mpbsd/sqrtrem$U.lo mpbsd/xtom$U.lo
-
-
# In libtool 1.5 it doesn't work to build libgmp.la from the convenience
# libraries like mpz/libmpz.la. Or rather it works, but it ends up putting
# PIC objects into libgmp.a if shared and static are both built. (The PIC
# -export-symbols, since the tune and speed programs, and perhaps some of
# the test programs, want to access undocumented symbols.
-libgmp_la_SOURCES = gmp-impl.h longlong.h randmt.h \
+libgmp_la_SOURCES = gmp-impl.h longlong.h \
assert.c compat.c errno.c extract-dbl.c invalid.c memory.c \
mp_bpl.c mp_clz_tab.c mp_dv_tab.c mp_minv_tab.c mp_get_fns.c mp_set_fns.c \
- rand.c randclr.c randdef.c randiset.c randlc2s.c randlc2x.c randmt.c \
- randmts.c rands.c randsd.c randsdui.c randbui.c randmui.c version.c \
- nextprime.c
+ version.c nextprime.c primesieve.c
EXTRA_libgmp_la_SOURCES = tal-debug.c tal-notreent.c tal-reent.c
libgmp_la_DEPENDENCIES = @TAL_OBJECT@ \
$(MPF_OBJECTS) $(MPZ_OBJECTS) $(MPQ_OBJECTS) \
$(MPN_OBJECTS) @mpn_objs_in_libgmp@ \
- $(PRINTF_OBJECTS) $(SCANF_OBJECTS)
+ $(PRINTF_OBJECTS) $(SCANF_OBJECTS) $(RANDOM_OBJECTS)
libgmp_la_LIBADD = $(libgmp_la_DEPENDENCIES)
libgmp_la_LDFLAGS = $(GMP_LDFLAGS) $(LIBGMP_LDFLAGS) \
-version-info $(LIBGMP_LT_CURRENT):$(LIBGMP_LT_REVISION):$(LIBGMP_LT_AGE)
-version-info $(LIBGMPXX_LT_CURRENT):$(LIBGMPXX_LT_REVISION):$(LIBGMPXX_LT_AGE)
-# The selected mpz objects here support mpz/powm.c (built as mpbsd/powm.lo)
-# and can probably be removed when that switches to an mpn implementation.
-# (Apart from mpz/n_pow_ui$U.lo, which supports mpbsd/rpow.c)
-
-if WANT_MPBSD
-MPBSD_HEADERS_OPTION = mp.h
-MPBSD_LTLIBRARIES_OPTION = libmp.la
-endif
-BUILT_SOURCES += mp.h
-libmp_la_SOURCES = assert.c errno.c memory.c mp_bpl.c mp_clz_tab.c \
- mp_dv_tab.c mp_minv_tab.c mp_get_fns.c mp_set_fns.c nextprime.c
-libmp_la_DEPENDENCIES = $(srcdir)/libmp.sym \
- @TAL_OBJECT@ $(MPBSD_OBJECTS) $(MPN_OBJECTS) @mpn_objs_in_libmp@ \
- mpz/add$U.lo mpz/gcdext$U.lo mpz/invert$U.lo mpz/mul$U.lo \
- mpz/n_pow_ui$U.lo mpz/realloc$U.lo mpz/set$U.lo mpz/sub$U.lo \
- mpz/tdiv_q$U.lo
-libmp_la_LIBADD = $(libmp_la_DEPENDENCIES)
-libmp_la_LDFLAGS = $(GMP_LDFLAGS) \
- -version-info $(LIBMP_LT_CURRENT):$(LIBMP_LT_REVISION):$(LIBMP_LT_AGE) \
- -export-symbols $(srcdir)/libmp.sym
-EXTRA_DIST += libmp.sym
-
install-data-hook:
@echo ''
@echo ''
-# The ansi2knr setups for the build programs are the same as the normal
-# automake ansi2knr rules, but using $(CC_FOR_BUILD) instead of $(CC).
-#
# The "test -f" support for srcdir!=builddir is similar to the automake .c.o
# etc rules, but with each foo.c explicitly, since $< is not portable
# outside an inference rule.
# the .h files are not properly expressed for the various objects that use
# them.
-EXTRA_DIST += dumbmp.c
-
-mpz/fac_ui.h: gen-fac_ui$(EXEEXT_FOR_BUILD)
- ./gen-fac_ui $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mpz/fac_ui.h || (rm -f mpz/fac_ui.h; exit 1)
-BUILT_SOURCES += mpz/fac_ui.h
+EXTRA_DIST += bootstrap.c
-gen-fac_ui$(EXEEXT_FOR_BUILD): gen-fac_ui$(U_FOR_BUILD).c dumbmp.c
- $(CC_FOR_BUILD) `test -f 'gen-fac_ui$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-fac_ui$(U_FOR_BUILD).c -o gen-fac_ui$(EXEEXT_FOR_BUILD)
-DISTCLEANFILES += gen-fac_ui$(EXEEXT_FOR_BUILD)
-EXTRA_DIST += gen-fac_ui.c
+fac_table.h: gen-fac$(EXEEXT_FOR_BUILD)
+ ./gen-fac $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >fac_table.h || (rm -f fac_table.h; exit 1)
+BUILT_SOURCES += fac_table.h
-gen-fac_ui_.c: gen-fac_ui.c $(ANSI2KNR)
- $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-fac_ui.c; then echo $(srcdir)/gen-fac_ui.c; else echo gen-fac_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-fac_ui_.c || rm -f gen-fac_ui_.c
+gen-fac$(EXEEXT_FOR_BUILD): gen-fac$(U_FOR_BUILD).c bootstrap.c
+ $(CC_FOR_BUILD) `test -f 'gen-fac$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-fac$(U_FOR_BUILD).c -o gen-fac$(EXEEXT_FOR_BUILD)
+DISTCLEANFILES += gen-fac$(EXEEXT_FOR_BUILD)
+EXTRA_DIST += gen-fac.c
fib_table.h: gen-fib$(EXEEXT_FOR_BUILD)
./gen-fib table $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mpn/fib_table.c || (rm -f mpn/fib_table.c; exit 1)
BUILT_SOURCES += mpn/fib_table.c
-gen-fib$(EXEEXT_FOR_BUILD): gen-fib$(U_FOR_BUILD).c dumbmp.c
+gen-fib$(EXEEXT_FOR_BUILD): gen-fib$(U_FOR_BUILD).c bootstrap.c
$(CC_FOR_BUILD) `test -f 'gen-fib$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-fib$(U_FOR_BUILD).c -o gen-fib$(EXEEXT_FOR_BUILD)
DISTCLEANFILES += gen-fib$(EXEEXT_FOR_BUILD)
EXTRA_DIST += gen-fib.c
-gen-fib_.c: gen-fib.c $(ANSI2KNR)
- $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-fib.c; then echo $(srcdir)/gen-fib.c; else echo gen-fib.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-fib_.c || rm -f gen-fib_.c
-
mp_bases.h: gen-bases$(EXEEXT_FOR_BUILD)
./gen-bases header $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mp_bases.h || (rm -f mp_bases.h; exit 1)
./gen-bases table $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mpn/mp_bases.c || (rm -f mpn/mp_bases.c; exit 1)
BUILT_SOURCES += mpn/mp_bases.c
-gen-bases$(EXEEXT_FOR_BUILD): gen-bases$(U_FOR_BUILD).c dumbmp.c
+gen-bases$(EXEEXT_FOR_BUILD): gen-bases$(U_FOR_BUILD).c bootstrap.c
$(CC_FOR_BUILD) `test -f 'gen-bases$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-bases$(U_FOR_BUILD).c -o gen-bases$(EXEEXT_FOR_BUILD) $(LIBM_FOR_BUILD)
DISTCLEANFILES += gen-bases$(EXEEXT_FOR_BUILD)
EXTRA_DIST += gen-bases.c
-gen-bases_.c: gen-bases.c $(ANSI2KNR)
- $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-bases.c; then echo $(srcdir)/gen-bases.c; else echo gen-bases.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-bases_.c || rm -f gen-bases_.c
-
-
trialdivtab.h: gen-trialdivtab$(EXEEXT_FOR_BUILD)
./gen-trialdivtab $(GMP_LIMB_BITS) 8000 >trialdivtab.h || (rm -f trialdivtab.h; exit 1)
BUILT_SOURCES += trialdivtab.h
-gen-trialdivtab$(EXEEXT_FOR_BUILD): gen-trialdivtab$(U_FOR_BUILD).c dumbmp.c
+gen-trialdivtab$(EXEEXT_FOR_BUILD): gen-trialdivtab$(U_FOR_BUILD).c bootstrap.c
$(CC_FOR_BUILD) `test -f 'gen-trialdivtab$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-trialdivtab$(U_FOR_BUILD).c -o gen-trialdivtab$(EXEEXT_FOR_BUILD) $(LIBM_FOR_BUILD)
DISTCLEANFILES += gen-trialdivtab$(EXEEXT_FOR_BUILD)
EXTRA_DIST += gen-trialdivtab.c
-gen-trialdivtab_.c: gen-trialdivtab.c $(ANSI2KNR)
- $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-trialdivtab.c; then echo $(srcdir)/gen-trialdivtab.c; else echo gen-trialdivtab.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-trialdivtab_.c || rm -f gen-trialdivtab_.c
+mpn/jacobitab.h: gen-jacobitab$(EXEEXT_FOR_BUILD)
+ ./gen-jacobitab >mpn/jacobitab.h || (rm -f mpn/jacobitab.h; exit 1)
+BUILT_SOURCES += mpn/jacobitab.h
+gen-jacobitab$(EXEEXT_FOR_BUILD): gen-jacobitab$(U_FOR_BUILD).c
+ $(CC_FOR_BUILD) `test -f 'gen-jacobitab$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-jacobitab$(U_FOR_BUILD).c -o gen-jacobitab$(EXEEXT_FOR_BUILD)
+DISTCLEANFILES += gen-jacobitab$(EXEEXT_FOR_BUILD)
+EXTRA_DIST += gen-jacobitab.c
mpn/perfsqr.h: gen-psqr$(EXEEXT_FOR_BUILD)
./gen-psqr $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mpn/perfsqr.h || (rm -f mpn/perfsqr.h; exit 1)
BUILT_SOURCES += mpn/perfsqr.h
-gen-psqr$(EXEEXT_FOR_BUILD): gen-psqr$(U_FOR_BUILD).c dumbmp.c
+gen-psqr$(EXEEXT_FOR_BUILD): gen-psqr$(U_FOR_BUILD).c bootstrap.c
$(CC_FOR_BUILD) `test -f 'gen-psqr$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-psqr$(U_FOR_BUILD).c -o gen-psqr$(EXEEXT_FOR_BUILD) $(LIBM_FOR_BUILD)
DISTCLEANFILES += gen-psqr$(EXEEXT_FOR_BUILD)
EXTRA_DIST += gen-psqr.c
-gen-psqr_.c: gen-psqr.c $(ANSI2KNR)
- $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-psqr.c; then echo $(srcdir)/gen-psqr.c; else echo gen-psqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-psqr_.c || rm -f gen-psqr_.c
-
+# Distribute mini-gmp. Test sources copied by dist-hook.
+EXTRA_DIST += mini-gmp/README mini-gmp/mini-gmp.c mini-gmp/mini-gmp.h \
+ mini-gmp/tests/Makefile mini-gmp/tests/run-tests
# Avoid: CVS - cvs directories
# *~ - emacs backups
dist-hook:
-find $(distdir) \( -name CVS -type d \) -o -name "*~" -o -name ".#*" \
| xargs rm -rf
+ cp "$(srcdir)"/mini-gmp/tests/*.[ch] "$(distdir)/mini-gmp/tests"
# grep -F $(VERSION) $(srcdir)/Makefile.am \
-# | grep -q "^# *$(VERSION) *$(LIBGMP_LT_CURRENT):$(LIBGMP_LT_REVISION):$(LIBGMP_LT_AGE) *$(LIBGMPXX_LT_CURRENT):$(LIBGMPXX_LT_REVISION):$(LIBGMPXX_LT_AGE) *$(LIBMP_LT_CURRENT):$(LIBMP_LT_REVISION):$(LIBMP_LT_AGE)"
+# | grep -q "^# *$(VERSION) *$(LIBGMP_LT_CURRENT):$(LIBGMP_LT_REVISION):$(LIBGMP_LT_AGE) *$(LIBGMPXX_LT_CURRENT):$(LIBGMPXX_LT_REVISION):$(LIBGMPXX_LT_AGE)"
# test -z "`sed -n 's/^# *[0-9]*\.[0-9]*\.[0-9]* *\([0-9]*:[0-9]*:[0-9]*\) *\([0-9]*:[0-9]*:[0-9]*\) *\([0-9]*:[0-9]*:[0-9]*\).*/A\1\nB\2\nC\3/p' $(srcdir)/Makefile.am | grep -v 'A6:3:3\|B3:5:0\|C4:7:1' | sort | uniq -d`"
+
+.PHONY: check-mini-gmp clean-mini-gmp
+
+check-mini-gmp:
+ abs_srcdir="`cd $(srcdir) && pwd`" ; \
+ $(MKDIR_P) mini-gmp/tests \
+ && cd mini-gmp/tests \
+ && LD_LIBRARY_PATH="../../.libs:$$LD_LIBRARY_PATH" \
+ DYLD_LIBRARY_PATH="../../.libs:$$DYLD_LIBRARY_PATH" \
+ $(MAKE) -f "$$abs_srcdir/mini-gmp/tests/Makefile" \
+ VPATH="$$abs_srcdir/mini-gmp/tests" \
+ srcdir="$$abs_srcdir/mini-gmp/tests" \
+ MINI_GMP_DIR="$$abs_srcdir/mini-gmp" \
+ LDFLAGS="-L../../.libs" \
+ LIBS="-lgmp -lm" \
+ CC="$(CC_FOR_BUILD)" EXTRA_CFLAGS="-g -I../.." check
+
+clean-mini-gmp:
+ if [ -d mini-gmp/tests ] ; then \
+ abs_srcdir="`cd $(srcdir) && pwd`" ; \
+ cd mini-gmp/tests \
+ && $(MAKE) -f "$$abs_srcdir/mini-gmp/tests/Makefile" clean ; \
+ fi
+
+clean-local: clean-mini-gmp
+distclean-local: clean-mini-gmp
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@SET_MAKE@
# Copyright 1991, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2004,
-# 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+# 2006, 2007, 2008, 2009, 2011, 2012, 2013 Free Software Foundation, Inc.
#
# This file is part of the GNU MP Library.
#
# You should have received a copy of the GNU Lesser General Public License
# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+# The following options are the same as AM_INIT_AUTOMAKE in configure.in,
+# except no $(top_builddir) on ansi2knr. That directory is wanted for the
+# Makefiles in subdirectories, but here we must omit it so automake gives
+# the actual ansi2knr build rule, not "cd $(top_builddir) && make ansi2knr".
+#
+# AUTOMAKE_OPTIONS = 1.8 gnu no-dependencies
+
+# Libtool -version-info for libgmp.la and libmp.la. See "Versioning" in the
+# libtool manual.
+#
+# CURRENT:REVISION:AGE
+#
+# 1. No interfaces changed, only implementations (good): Increment REVISION.
+#
+# 2. Interfaces added, none removed (good): Increment CURRENT, increment
+# AGE, set REVISION to 0.
+#
+# 3. Interfaces removed (BAD, breaks upward compatibility): Increment
+# CURRENT, set AGE and REVISION to 0.
+#
+# Do this separately for libgmp, libgmpxx and libmp, and only for releases.
+#
+# GMP -version-info
+# release libgmp libgmpxx libmp
+# 2.0.x - - -
+# 3.0 3:0:0 - 3:0:0
+# 3.0.1 3:1:0 - 3:0:0
+# 3.1 4:0:1 - 4:0:1
+# 3.1.1 4:1:1 - 4:1:1
+# 4.0 5:0:2 3:0:0 4:2:1
+# 4.0.1 5:1:2 3:1:0 4:3:1
+# 4.1 6:0:3 3:2:0 4:4:1
+# 4.1.1 6:1:3 3:3:0 4:5:1
+# 4.1.2 6:2:3 3:4:0 4:6:1
+# 4.1.3 6:3:3 3:5:0 4:7:1
+# 4.1.4 6:3:3 3:5:0 4:7:1 WRONG, same as 4.1.3!
+# 4.2 6:0:3 3:2:0 4:4:1 REALLY WRONG, same as 4.1!
+# 4.2.1 7:1:4 4:1:1 4:10:1 WRONG for libgmpxx
+# 4.2.2 7:2:4 4:2:0 4:11:1
+# 4.2.3 7:3:4 4:3:0 4:12:1
+# 4.2.4 7:4:4 4:4:0 4:13:1
+# 4.3.0 8:0:5 5:0:1 4:14:1
+# 4.3.1 8:1:5 5:1:1 4:15:1 WRONG Really used same as 4.3.0
+# 4.3.2 8:2:5 5:2:1 4:16:1
+# 5.0.0 9:0:6 6:0:2 4:20:1 Should have been 10:0:0
+# 5.0.1 10:1:0 6:1:2 4:21:1
+# 5.0.2 10:2:0 6:2:2 4:22:1
+# 5.0.3 10:3:0 6:3:2 4:23:1
+# 5.0.4 10:4:0 6:4:2 4:24:1
+# 5.0.5 10:5:0 6:5:2 4:25:1
+# 5.1.0 11:0:1 7:0:3 -
+# 5.1.1 11:1:1 7:1:3 -
+# 5.1.2 11:2:1 7:2:3 -
+# 5.1.3 11:3:1 7:3:3 -
+#
+# Starting at 3:0:0 is a slight abuse of the versioning system, but it
+# ensures we're past soname libgmp.so.2, which was used on Debian GNU/Linux
+# packages of gmp 2. Pretend gmp 2 was 2:0:0, so the interface changes for
+# gmp 3 mean 3:0:0 is right.
+#
+# We interpret "implementation changed" in item "1." above as meaning any
+# release, ie. the REVISION is incremented every time (if nothing else).
+# Even if we thought the code generated will be identical on all systems,
+# it's still good to get the shared library filename (like
+# libgmpxx.so.3.0.4) incrementing, to make it clear which GMP it's from.
+
VPATH = @srcdir@
+am__make_dryrun = \
+ { \
+ am__dry=no; \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
+ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+ *) \
+ for am__flg in $$MAKEFLAGS; do \
+ case $$am__flg in \
+ *=*|--*) ;; \
+ *n*) am__dry=yes; break;; \
+ esac; \
+ done;; \
+ esac; \
+ test $$am__dry = yes; \
+ }
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
-ANSI2KNR = @ANSI2KNR@
subdir = .
DIST_COMMON = README $(am__configure_deps) $(am__include_HEADERS_DIST) \
$(srcdir)/Makefile.am $(srcdir)/Makefile.in \
- $(srcdir)/config.in $(srcdir)/gmp-h.in $(srcdir)/mp-h.in \
- $(top_srcdir)/configure AUTHORS COPYING COPYING.LIB ChangeLog \
- INSTALL NEWS ansi2knr.1 ansi2knr.c config.guess config.sub \
- install-sh ltmain.sh missing ylwrap
+ $(srcdir)/config.in $(srcdir)/gmp-h.in $(top_srcdir)/configure \
+ AUTHORS COPYING COPYING.LIB ChangeLog INSTALL NEWS \
+ config.guess config.sub install-sh ltmain.sh missing ylwrap
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/configure.in
+ $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
configure.lineno config.status.lineno
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = config.h
-CONFIG_CLEAN_FILES = gmp.h mp.h gmp-mparam.h
+CONFIG_CLEAN_FILES = gmp.h gmp-mparam.h
CONFIG_CLEAN_VPATH_FILES =
am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
am__vpath_adj = case $$p in \
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)" \
"$(DESTDIR)$(includeexecdir)"
LTLIBRARIES = $(lib_LTLIBRARIES)
am__DEPENDENCIES_1 = $(MPF_OBJECTS) $(MPZ_OBJECTS) $(MPQ_OBJECTS) \
- $(MPN_OBJECTS) $(PRINTF_OBJECTS) $(SCANF_OBJECTS)
-am_libgmp_la_OBJECTS = assert$U.lo compat$U.lo errno$U.lo \
- extract-dbl$U.lo invalid$U.lo memory$U.lo mp_bpl$U.lo \
- mp_clz_tab$U.lo mp_dv_tab$U.lo mp_minv_tab$U.lo \
- mp_get_fns$U.lo mp_set_fns$U.lo rand$U.lo randclr$U.lo \
- randdef$U.lo randiset$U.lo randlc2s$U.lo randlc2x$U.lo \
- randmt$U.lo randmts$U.lo rands$U.lo randsd$U.lo randsdui$U.lo \
- randbui$U.lo randmui$U.lo version$U.lo nextprime$U.lo
+ $(MPN_OBJECTS) $(PRINTF_OBJECTS) $(SCANF_OBJECTS) \
+ $(RANDOM_OBJECTS)
+am_libgmp_la_OBJECTS = assert.lo compat.lo errno.lo extract-dbl.lo \
+ invalid.lo memory.lo mp_bpl.lo mp_clz_tab.lo mp_dv_tab.lo \
+ mp_minv_tab.lo mp_get_fns.lo mp_set_fns.lo version.lo \
+ nextprime.lo primesieve.lo
libgmp_la_OBJECTS = $(am_libgmp_la_OBJECTS)
libgmp_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
$(CXXFLAGS) $(libgmpxx_la_LDFLAGS) $(LDFLAGS) -o $@
@WANT_CXX_TRUE@am_libgmpxx_la_rpath = -rpath $(libdir)
-am__DEPENDENCIES_2 = $(srcdir)/libmp.sym $(MPBSD_OBJECTS) \
- $(MPN_OBJECTS) mpz/add$U.lo mpz/gcdext$U.lo mpz/invert$U.lo \
- mpz/mul$U.lo mpz/n_pow_ui$U.lo mpz/realloc$U.lo mpz/set$U.lo \
- mpz/sub$U.lo mpz/tdiv_q$U.lo
-am_libmp_la_OBJECTS = assert$U.lo errno$U.lo memory$U.lo mp_bpl$U.lo \
- mp_clz_tab$U.lo mp_dv_tab$U.lo mp_minv_tab$U.lo \
- mp_get_fns$U.lo mp_set_fns$U.lo nextprime$U.lo
-libmp_la_OBJECTS = $(am_libmp_la_OBJECTS)
-libmp_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
- --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(libmp_la_LDFLAGS) \
- $(LDFLAGS) -o $@
-@WANT_MPBSD_TRUE@am_libmp_la_rpath = -rpath $(libdir)
DEFAULT_INCLUDES = -I.@am__isrc@
depcomp =
am__depfiles_maybe =
--mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
$(LDFLAGS) -o $@
SOURCES = $(libgmp_la_SOURCES) $(EXTRA_libgmp_la_SOURCES) \
- $(libgmpxx_la_SOURCES) $(libmp_la_SOURCES)
+ $(libgmpxx_la_SOURCES)
DIST_SOURCES = $(libgmp_la_SOURCES) $(EXTRA_libgmp_la_SOURCES) \
- $(libgmpxx_la_SOURCES) $(libmp_la_SOURCES)
+ $(libgmpxx_la_SOURCES)
RECURSIVE_TARGETS = all-recursive check-recursive dvi-recursive \
html-recursive info-recursive install-data-recursive \
install-dvi-recursive install-exec-recursive \
install-pdf-recursive install-ps-recursive install-recursive \
installcheck-recursive installdirs-recursive pdf-recursive \
ps-recursive uninstall-recursive
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
am__include_HEADERS_DIST = gmpxx.h
HEADERS = $(include_HEADERS) $(nodist_includeexec_HEADERS)
RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
distdir = $(PACKAGE)-$(VERSION)
top_distdir = $(distdir)
am__remove_distdir = \
- { test ! -d "$(distdir)" \
- || { find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \
- && rm -fr "$(distdir)"; }; }
+ if test -d "$(distdir)"; then \
+ find "$(distdir)" -type d ! -perm -200 -exec chmod u+w {} ';' \
+ && rm -rf "$(distdir)" \
+ || { sleep 5 && rm -rf "$(distdir)"; }; \
+ else :; fi
am__relativize = \
dir0=`pwd`; \
sed_first='s,^\([^/]*\)/.*$$,\1,'; \
DIST_ARCHIVES = $(distdir).tar.gz
GZIP_ENV = --best
distuninstallcheck_listfiles = find . -type f -print
+am__distuninstallcheck_listfiles = $(distuninstallcheck_listfiles) \
+ | sed 's|^\./|$(prefix)/|' | grep -v '$(infodir)/dir$$'
distcleancheck_listfiles = find . -type f -print
ABI = @ABI@
ACLOCAL = @ACLOCAL@
SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
STRIP = @STRIP@
TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
U_FOR_BUILD = @U_FOR_BUILD@
VERSION = @VERSION@
WITH_READLINE_01 = @WITH_READLINE_01@
mkdir_p = @mkdir_p@
mpn_objects = @mpn_objects@
mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
+LIBGMP_LT_CURRENT = 11
+LIBGMP_LT_REVISION = 3
+LIBGMP_LT_AGE = 1
+LIBGMPXX_LT_CURRENT = 7
+LIBGMPXX_LT_REVISION = 3
+LIBGMPXX_LT_AGE = 3
+SUBDIRS = tests mpn mpz mpq mpf printf scanf rand cxx demos tune doc
-# The following options are the same as AM_INIT_AUTOMAKE in configure.in,
-# except no $(top_builddir) on ansi2knr. That directory is wanted for the
-# Makefiles in subdirectories, but here we must omit it so automake gives
-# the actual ansi2knr build rule, not "cd $(top_builddir) && make ansi2knr".
-#
-AUTOMAKE_OPTIONS = 1.8 gnu no-dependencies ansi2knr
-
-# Libtool -version-info for libgmp.la and libmp.la. See "Versioning" in the
-# libtool manual.
-#
-# CURRENT:REVISION:AGE
-#
-# 1. No interfaces changed, only implementations (good): Increment REVISION.
-#
-# 2. Interfaces added, none removed (good): Increment CURRENT, increment
-# AGE, set REVISION to 0.
-#
-# 3. Interfaces removed (BAD, breaks upward compatibility): Increment
-# CURRENT, set AGE and REVISION to 0.
-#
-# Do this separately for libgmp, libgmpxx and libmp, and only for releases.
-#
-# GMP -version-info
-# release libgmp libgmpxx libmp
-# 2.0.x - - -
-# 3.0 3:0:0 - 3:0:0
-# 3.0.1 3:1:0 - 3:0:0
-# 3.1 4:0:1 - 4:0:1
-# 3.1.1 4:1:1 - 4:1:1
-# 4.0 5:0:2 3:0:0 4:2:1
-# 4.0.1 5:1:2 3:1:0 4:3:1
-# 4.1 6:0:3 3:2:0 4:4:1
-# 4.1.1 6:1:3 3:3:0 4:5:1
-# 4.1.2 6:2:3 3:4:0 4:6:1
-# 4.1.3 6:3:3 3:5:0 4:7:1
-# 4.1.4 6:3:3 3:5:0 4:7:1 WRONG, same as 4.1.3!
-# 4.2 6:0:3 3:2:0 4:4:1 REALLY WRONG, same as 4.1!
-# 4.2.1 7:1:4 4:1:1 4:10:1 WRONG for libgmpxx
-# 4.2.2 7:2:4 4:2:0 4:11:1
-# 4.2.3 7:3:4 4:3:0 4:12:1
-# 4.2.4 7:4:4 4:4:0 4:13:1
-# 4.3.0 8:0:5 5:0:1 4:14:1
-# 4.3.1 8:1:5 5:1:1 4:15:1 WRONG Really used same as 4.3.0
-# 4.3.2 8:2:5 5:2:1 4:16:1
-# 5.0.0 9:0:6 6:0:2 4:20:1 Should have been 10:0:0
-# 5.0.1 10:1:0 6:1:2 4:21:1
-# 5.0.2 10:2:0 6:2:2 4:22:1
-# 5.0.3 10:3:0 6:3:2 4:23:1
-# 5.0.4 10:4:0 6:4:2 4:24:1
-# 5.0.5 10:5:0 6:5:2 4:25:1
-#
-# Starting at 3:0:0 is a slight abuse of the versioning system, but it
-# ensures we're past soname libgmp.so.2, which was used on Debian GNU/Linux
-# packages of gmp 2. Pretend gmp 2 was 2:0:0, so the interface changes for
-# gmp 3 mean 3:0:0 is right.
-#
-# We interpret "implementation changed" in item "1." above as meaning any
-# release, ie. the REVISION is incremented every time (if nothing else).
-# Even if we thought the code generated will be identical on all systems,
-# it's still good to get the shared library filename (like
-# libgmpxx.so.3.0.4) incrementing, to make it clear which GMP it's from.
-LIBGMP_LT_CURRENT = 10
-LIBGMP_LT_REVISION = 5
-LIBGMP_LT_AGE = 0
-LIBGMPXX_LT_CURRENT = 6
-LIBGMPXX_LT_REVISION = 5
-LIBGMPXX_LT_AGE = 2
-LIBMP_LT_CURRENT = 4
-LIBMP_LT_REVISION = 25
-LIBMP_LT_AGE = 1
-SUBDIRS = tests mpn mpz mpq mpf printf scanf cxx mpbsd demos tune doc
-
-# The ansi2knr setups for the build programs are the same as the normal
-# automake ansi2knr rules, but using $(CC_FOR_BUILD) instead of $(CC).
-#
# The "test -f" support for srcdir!=builddir is similar to the automake .c.o
# etc rules, but with each foo.c explicitly, since $< is not portable
# outside an inference rule.
# build-system stuff over and done with at the start. Also, dependencies on
# the .h files are not properly expressed for the various objects that use
# them.
+
+# Distribute mini-gmp. Test sources copied by dist-hook.
EXTRA_DIST = configfsf.guess configfsf.sub .gdbinit INSTALL.autoconf \
- gmpxx.h libmp.sym dumbmp.c gen-fac_ui.c gen-fib.c gen-bases.c \
- gen-trialdivtab.c gen-psqr.c
+ gmpxx.h bootstrap.c gen-fac.c gen-fib.c gen-bases.c \
+ gen-trialdivtab.c gen-jacobitab.c gen-psqr.c mini-gmp/README \
+ mini-gmp/mini-gmp.c mini-gmp/mini-gmp.h \
+ mini-gmp/tests/Makefile mini-gmp/tests/run-tests
@WANT_CXX_TRUE@GMPXX_HEADERS_OPTION = gmpxx.h
# gmp.h and mp.h are architecture dependent, mainly since they encode the
#
includeexecdir = $(exec_prefix)/include
include_HEADERS = $(GMPXX_HEADERS_OPTION)
-nodist_includeexec_HEADERS = gmp.h $(MPBSD_HEADERS_OPTION)
-lib_LTLIBRARIES = libgmp.la $(GMPXX_LTLIBRARIES_OPTION) $(MPBSD_LTLIBRARIES_OPTION)
-BUILT_SOURCES = gmp.h mp.h mpz/fac_ui.h fib_table.h mpn/fib_table.c \
- mp_bases.h mpn/mp_bases.c trialdivtab.h mpn/perfsqr.h
+nodist_includeexec_HEADERS = gmp.h
+lib_LTLIBRARIES = libgmp.la $(GMPXX_LTLIBRARIES_OPTION)
+BUILT_SOURCES = gmp.h fac_table.h fib_table.h mpn/fib_table.c \
+ mp_bases.h mpn/mp_bases.c trialdivtab.h mpn/jacobitab.h \
+ mpn/perfsqr.h
DISTCLEANFILES = $(BUILT_SOURCES) config.m4 @gmp_srclinks@ \
- gen-fac_ui$(EXEEXT_FOR_BUILD) gen-fib$(EXEEXT_FOR_BUILD) \
+ gen-fac$(EXEEXT_FOR_BUILD) gen-fib$(EXEEXT_FOR_BUILD) \
gen-bases$(EXEEXT_FOR_BUILD) \
- gen-trialdivtab$(EXEEXT_FOR_BUILD) gen-psqr$(EXEEXT_FOR_BUILD)
+ gen-trialdivtab$(EXEEXT_FOR_BUILD) \
+ gen-jacobitab$(EXEEXT_FOR_BUILD) gen-psqr$(EXEEXT_FOR_BUILD)
# Tell gmp.h it's building gmp, not an application, used by windows DLL stuff.
INCLUDES = -D__GMP_WITHIN_GMP
mpz/cong$U.lo mpz/cong_2exp$U.lo mpz/cong_ui$U.lo \
mpz/divexact$U.lo mpz/divegcd$U.lo mpz/dive_ui$U.lo \
mpz/divis$U.lo mpz/divis_ui$U.lo mpz/divis_2exp$U.lo mpz/dump$U.lo \
- mpz/export$U.lo mpz/fac_ui$U.lo mpz/fdiv_q$U.lo \
+ mpz/export$U.lo mpz/mfac_uiui$U.lo \
+ mpz/2fac_ui$U.lo mpz/fac_ui$U.lo mpz/oddfac_1$U.lo mpz/prodlimbs$U.lo \
mpz/fdiv_q_ui$U.lo mpz/fdiv_qr$U.lo mpz/fdiv_qr_ui$U.lo \
- mpz/fdiv_r$U.lo mpz/fdiv_r_ui$U.lo \
+ mpz/fdiv_r$U.lo mpz/fdiv_r_ui$U.lo mpz/fdiv_q$U.lo \
mpz/fdiv_ui$U.lo mpz/fib_ui$U.lo mpz/fib2_ui$U.lo mpz/fits_sint$U.lo \
mpz/fits_slong$U.lo mpz/fits_sshort$U.lo mpz/fits_uint$U.lo \
mpz/fits_ulong$U.lo mpz/fits_ushort$U.lo mpz/gcd$U.lo \
mpz/gcd_ui$U.lo mpz/gcdext$U.lo mpz/get_d$U.lo mpz/get_d_2exp$U.lo \
mpz/get_si$U.lo mpz/get_str$U.lo mpz/get_ui$U.lo mpz/getlimbn$U.lo \
mpz/hamdist$U.lo \
- mpz/import$U.lo mpz/init$U.lo mpz/init2$U.lo mpz/inits$U.lo \
+ mpz/import$U.lo mpz/init$U.lo mpz/init2$U.lo mpz/inits$U.lo \
mpz/inp_raw$U.lo mpz/inp_str$U.lo mpz/invert$U.lo \
mpz/ior$U.lo mpz/iset$U.lo mpz/iset_d$U.lo mpz/iset_si$U.lo \
mpz/iset_str$U.lo mpz/iset_ui$U.lo mpz/jacobi$U.lo mpz/kronsz$U.lo \
mpz/n_pow_ui$U.lo mpz/neg$U.lo mpz/nextprime$U.lo \
mpz/out_raw$U.lo mpz/out_str$U.lo mpz/perfpow$U.lo mpz/perfsqr$U.lo \
mpz/popcount$U.lo mpz/pow_ui$U.lo mpz/powm$U.lo mpz/powm_sec$U.lo \
- mpz/powm_ui$U.lo mpz/pprime_p$U.lo mpz/random$U.lo mpz/random2$U.lo \
+ mpz/powm_ui$U.lo mpz/primorial_ui$U.lo \
+ mpz/pprime_p$U.lo mpz/random$U.lo mpz/random2$U.lo \
mpz/realloc$U.lo mpz/realloc2$U.lo mpz/remove$U.lo \
mpz/root$U.lo mpz/rootrem$U.lo mpz/rrandomb$U.lo mpz/scan0$U.lo \
mpz/scan1$U.lo mpz/set$U.lo mpz/set_d$U.lo mpz/set_f$U.lo \
scanf/scanf$U.lo scanf/sscanf$U.lo scanf/sscanffuns$U.lo \
scanf/vfscanf$U.lo scanf/vscanf$U.lo scanf/vsscanf$U.lo
+RANDOM_OBJECTS = \
+ rand/rand$U.lo rand/randclr$U.lo rand/randdef$U.lo rand/randiset$U.lo \
+ rand/randlc2s$U.lo rand/randlc2x$U.lo rand/randmt$U.lo \
+ rand/randmts$U.lo rand/rands$U.lo rand/randsd$U.lo rand/randsdui$U.lo \
+ rand/randbui$U.lo rand/randmui$U.lo
+
# no $U for C++ files
CXX_OBJECTS = \
cxx/isfuns.lo cxx/ismpf.lo cxx/ismpq.lo cxx/ismpz.lo cxx/ismpznw.lo \
- cxx/osdoprnti.lo cxx/osfuns.lo \
+ cxx/limits.lo cxx/osdoprnti.lo cxx/osfuns.lo \
cxx/osmpf.lo cxx/osmpq.lo cxx/osmpz.lo
-MPBSD_OBJECTS = mpbsd/add$U.lo mpbsd/tdiv_qr$U.lo mpbsd/set$U.lo \
- mpbsd/powm$U.lo mpbsd/sub$U.lo mpbsd/cmp$U.lo mpbsd/mfree$U.lo \
- mpbsd/mtox$U.lo mpbsd/realloc$U.lo mpbsd/gcd$U.lo mpbsd/itom$U.lo \
- mpbsd/min$U.lo mpbsd/mul$U.lo mpbsd/mout$U.lo mpbsd/rpow$U.lo \
- mpbsd/sdiv$U.lo mpbsd/sqrtrem$U.lo mpbsd/xtom$U.lo
-
# In libtool 1.5 it doesn't work to build libgmp.la from the convenience
# libraries like mpz/libmpz.la. Or rather it works, but it ends up putting
# Currently, for libgmp, unlike libmp below, we're not using
# -export-symbols, since the tune and speed programs, and perhaps some of
# the test programs, want to access undocumented symbols.
-libgmp_la_SOURCES = gmp-impl.h longlong.h randmt.h \
+libgmp_la_SOURCES = gmp-impl.h longlong.h \
assert.c compat.c errno.c extract-dbl.c invalid.c memory.c \
mp_bpl.c mp_clz_tab.c mp_dv_tab.c mp_minv_tab.c mp_get_fns.c mp_set_fns.c \
- rand.c randclr.c randdef.c randiset.c randlc2s.c randlc2x.c randmt.c \
- randmts.c rands.c randsd.c randsdui.c randbui.c randmui.c version.c \
- nextprime.c
+ version.c nextprime.c primesieve.c
EXTRA_libgmp_la_SOURCES = tal-debug.c tal-notreent.c tal-reent.c
libgmp_la_DEPENDENCIES = @TAL_OBJECT@ \
$(MPF_OBJECTS) $(MPZ_OBJECTS) $(MPQ_OBJECTS) \
$(MPN_OBJECTS) @mpn_objs_in_libgmp@ \
- $(PRINTF_OBJECTS) $(SCANF_OBJECTS)
+ $(PRINTF_OBJECTS) $(SCANF_OBJECTS) $(RANDOM_OBJECTS)
libgmp_la_LIBADD = $(libgmp_la_DEPENDENCIES)
libgmp_la_LDFLAGS = $(GMP_LDFLAGS) $(LIBGMP_LDFLAGS) \
libgmpxx_la_LDFLAGS = $(GMP_LDFLAGS) $(LIBGMPXX_LDFLAGS) \
-version-info $(LIBGMPXX_LT_CURRENT):$(LIBGMPXX_LT_REVISION):$(LIBGMPXX_LT_AGE)
-
-# The selected mpz objects here support mpz/powm.c (built as mpbsd/powm.lo)
-# and can probably be removed when that switches to an mpn implementation.
-# (Apart from mpz/n_pow_ui$U.lo, which supports mpbsd/rpow.c)
-@WANT_MPBSD_TRUE@MPBSD_HEADERS_OPTION = mp.h
-@WANT_MPBSD_TRUE@MPBSD_LTLIBRARIES_OPTION = libmp.la
-libmp_la_SOURCES = assert.c errno.c memory.c mp_bpl.c mp_clz_tab.c \
- mp_dv_tab.c mp_minv_tab.c mp_get_fns.c mp_set_fns.c nextprime.c
-
-libmp_la_DEPENDENCIES = $(srcdir)/libmp.sym \
- @TAL_OBJECT@ $(MPBSD_OBJECTS) $(MPN_OBJECTS) @mpn_objs_in_libmp@ \
- mpz/add$U.lo mpz/gcdext$U.lo mpz/invert$U.lo mpz/mul$U.lo \
- mpz/n_pow_ui$U.lo mpz/realloc$U.lo mpz/set$U.lo mpz/sub$U.lo \
- mpz/tdiv_q$U.lo
-
-libmp_la_LIBADD = $(libmp_la_DEPENDENCIES)
-libmp_la_LDFLAGS = $(GMP_LDFLAGS) \
- -version-info $(LIBMP_LT_CURRENT):$(LIBMP_LT_REVISION):$(LIBMP_LT_AGE) \
- -export-symbols $(srcdir)/libmp.sym
-
all: $(BUILT_SOURCES) config.h
$(MAKE) $(AM_MAKEFLAGS) all-recursive
.SUFFIXES:
.SUFFIXES: .c .cc .lo .o .obj
-am--refresh:
+am--refresh: Makefile
@:
$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
$(am__aclocal_m4_deps):
config.h: stamp-h1
- @if test ! -f $@; then \
- rm -f stamp-h1; \
- $(MAKE) $(AM_MAKEFLAGS) stamp-h1; \
- else :; fi
+ @if test ! -f $@; then rm -f stamp-h1; else :; fi
+ @if test ! -f $@; then $(MAKE) $(AM_MAKEFLAGS) stamp-h1; else :; fi
stamp-h1: $(srcdir)/config.in $(top_builddir)/config.status
@rm -f stamp-h1
-rm -f config.h stamp-h1
gmp.h: $(top_builddir)/config.status $(srcdir)/gmp-h.in
cd $(top_builddir) && $(SHELL) ./config.status $@
-mp.h: $(top_builddir)/config.status $(srcdir)/mp-h.in
- cd $(top_builddir) && $(SHELL) ./config.status $@
install-libLTLIBRARIES: $(lib_LTLIBRARIES)
@$(NORMAL_INSTALL)
- test -z "$(libdir)" || $(MKDIR_P) "$(DESTDIR)$(libdir)"
@list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \
list2=; for p in $$list; do \
if test -f $$p; then \
else :; fi; \
done; \
test -z "$$list2" || { \
+ echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \
echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \
$(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \
}
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-libgmp.la: $(libgmp_la_OBJECTS) $(libgmp_la_DEPENDENCIES)
+libgmp.la: $(libgmp_la_OBJECTS) $(libgmp_la_DEPENDENCIES) $(EXTRA_libgmp_la_DEPENDENCIES)
$(libgmp_la_LINK) -rpath $(libdir) $(libgmp_la_OBJECTS) $(libgmp_la_LIBADD) $(LIBS)
-libgmpxx.la: $(libgmpxx_la_OBJECTS) $(libgmpxx_la_DEPENDENCIES)
+libgmpxx.la: $(libgmpxx_la_OBJECTS) $(libgmpxx_la_DEPENDENCIES) $(EXTRA_libgmpxx_la_DEPENDENCIES)
$(libgmpxx_la_LINK) $(am_libgmpxx_la_rpath) $(libgmpxx_la_OBJECTS) $(libgmpxx_la_LIBADD) $(LIBS)
-libmp.la: $(libmp_la_OBJECTS) $(libmp_la_DEPENDENCIES)
- $(libmp_la_LINK) $(am_libmp_la_rpath) $(libmp_la_OBJECTS) $(libmp_la_LIBADD) $(LIBS)
mostlyclean-compile:
-rm -f *.$(OBJEXT)
distclean-compile:
-rm -f *.tab.c
-./ansi2knr: ansi2knr.$(OBJEXT)
- $(LINK) ansi2knr.$(OBJEXT) $(LIBS)
-ansi2knr.$(OBJEXT): $(CONFIG_HEADER)
-
-clean-krextra:
- -rm -f ansi2knr
-
-mostlyclean-kr:
- -test "$U" = "" || rm -f *_.c
.c.o:
$(COMPILE) -c $<
.c.lo:
$(LTCOMPILE) -c -o $@ $<
-assert_.c: assert.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/assert.c; then echo $(srcdir)/assert.c; else echo assert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-compat_.c: compat.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/compat.c; then echo $(srcdir)/compat.c; else echo compat.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-errno_.c: errno.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/errno.c; then echo $(srcdir)/errno.c; else echo errno.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-extract-dbl_.c: extract-dbl.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/extract-dbl.c; then echo $(srcdir)/extract-dbl.c; else echo extract-dbl.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-invalid_.c: invalid.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/invalid.c; then echo $(srcdir)/invalid.c; else echo invalid.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-memory_.c: memory.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/memory.c; then echo $(srcdir)/memory.c; else echo memory.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mp_bpl_.c: mp_bpl.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_bpl.c; then echo $(srcdir)/mp_bpl.c; else echo mp_bpl.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mp_clz_tab_.c: mp_clz_tab.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_clz_tab.c; then echo $(srcdir)/mp_clz_tab.c; else echo mp_clz_tab.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mp_dv_tab_.c: mp_dv_tab.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_dv_tab.c; then echo $(srcdir)/mp_dv_tab.c; else echo mp_dv_tab.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mp_get_fns_.c: mp_get_fns.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_get_fns.c; then echo $(srcdir)/mp_get_fns.c; else echo mp_get_fns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mp_minv_tab_.c: mp_minv_tab.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_minv_tab.c; then echo $(srcdir)/mp_minv_tab.c; else echo mp_minv_tab.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mp_set_fns_.c: mp_set_fns.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_set_fns.c; then echo $(srcdir)/mp_set_fns.c; else echo mp_set_fns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-nextprime_.c: nextprime.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/nextprime.c; then echo $(srcdir)/nextprime.c; else echo nextprime.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-rand_.c: rand.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/rand.c; then echo $(srcdir)/rand.c; else echo rand.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-randbui_.c: randbui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randbui.c; then echo $(srcdir)/randbui.c; else echo randbui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-randclr_.c: randclr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randclr.c; then echo $(srcdir)/randclr.c; else echo randclr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-randdef_.c: randdef.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randdef.c; then echo $(srcdir)/randdef.c; else echo randdef.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-randiset_.c: randiset.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randiset.c; then echo $(srcdir)/randiset.c; else echo randiset.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-randlc2s_.c: randlc2s.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randlc2s.c; then echo $(srcdir)/randlc2s.c; else echo randlc2s.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-randlc2x_.c: randlc2x.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randlc2x.c; then echo $(srcdir)/randlc2x.c; else echo randlc2x.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-randmt_.c: randmt.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randmt.c; then echo $(srcdir)/randmt.c; else echo randmt.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-randmts_.c: randmts.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randmts.c; then echo $(srcdir)/randmts.c; else echo randmts.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-randmui_.c: randmui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randmui.c; then echo $(srcdir)/randmui.c; else echo randmui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-rands_.c: rands.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/rands.c; then echo $(srcdir)/rands.c; else echo rands.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-randsd_.c: randsd.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randsd.c; then echo $(srcdir)/randsd.c; else echo randsd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-randsdui_.c: randsdui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/randsdui.c; then echo $(srcdir)/randsdui.c; else echo randsdui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tal-debug_.c: tal-debug.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tal-debug.c; then echo $(srcdir)/tal-debug.c; else echo tal-debug.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tal-notreent_.c: tal-notreent.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tal-notreent.c; then echo $(srcdir)/tal-notreent.c; else echo tal-notreent.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tal-reent_.c: tal-reent.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tal-reent.c; then echo $(srcdir)/tal-reent.c; else echo tal-reent.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-version_.c: version.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/version.c; then echo $(srcdir)/version.c; else echo version.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-assert_.$(OBJEXT) assert_.lo compat_.$(OBJEXT) compat_.lo \
-errno_.$(OBJEXT) errno_.lo extract-dbl_.$(OBJEXT) extract-dbl_.lo \
-invalid_.$(OBJEXT) invalid_.lo memory_.$(OBJEXT) memory_.lo \
-mp_bpl_.$(OBJEXT) mp_bpl_.lo mp_clz_tab_.$(OBJEXT) mp_clz_tab_.lo \
-mp_dv_tab_.$(OBJEXT) mp_dv_tab_.lo mp_get_fns_.$(OBJEXT) \
-mp_get_fns_.lo mp_minv_tab_.$(OBJEXT) mp_minv_tab_.lo \
-mp_set_fns_.$(OBJEXT) mp_set_fns_.lo nextprime_.$(OBJEXT) \
-nextprime_.lo rand_.$(OBJEXT) rand_.lo randbui_.$(OBJEXT) randbui_.lo \
-randclr_.$(OBJEXT) randclr_.lo randdef_.$(OBJEXT) randdef_.lo \
-randiset_.$(OBJEXT) randiset_.lo randlc2s_.$(OBJEXT) randlc2s_.lo \
-randlc2x_.$(OBJEXT) randlc2x_.lo randmt_.$(OBJEXT) randmt_.lo \
-randmts_.$(OBJEXT) randmts_.lo randmui_.$(OBJEXT) randmui_.lo \
-rands_.$(OBJEXT) rands_.lo randsd_.$(OBJEXT) randsd_.lo \
-randsdui_.$(OBJEXT) randsdui_.lo tal-debug_.$(OBJEXT) tal-debug_.lo \
-tal-notreent_.$(OBJEXT) tal-notreent_.lo tal-reent_.$(OBJEXT) \
-tal-reent_.lo version_.$(OBJEXT) version_.lo : $(ANSI2KNR)
.cc.o:
$(CXXCOMPILE) -c -o $@ $<
-rm -f libtool config.lt
install-includeHEADERS: $(include_HEADERS)
@$(NORMAL_INSTALL)
- test -z "$(includedir)" || $(MKDIR_P) "$(DESTDIR)$(includedir)"
@list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \
+ if test -n "$$list"; then \
+ echo " $(MKDIR_P) '$(DESTDIR)$(includedir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(includedir)" || exit 1; \
+ fi; \
for p in $$list; do \
if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
echo "$$d$$p"; \
@$(NORMAL_UNINSTALL)
@list='$(include_HEADERS)'; test -n "$(includedir)" || list=; \
files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
- test -n "$$files" || exit 0; \
- echo " ( cd '$(DESTDIR)$(includedir)' && rm -f" $$files ")"; \
- cd "$(DESTDIR)$(includedir)" && rm -f $$files
+ dir='$(DESTDIR)$(includedir)'; $(am__uninstall_files_from_dir)
install-nodist_includeexecHEADERS: $(nodist_includeexec_HEADERS)
@$(NORMAL_INSTALL)
- test -z "$(includeexecdir)" || $(MKDIR_P) "$(DESTDIR)$(includeexecdir)"
@list='$(nodist_includeexec_HEADERS)'; test -n "$(includeexecdir)" || list=; \
+ if test -n "$$list"; then \
+ echo " $(MKDIR_P) '$(DESTDIR)$(includeexecdir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(includeexecdir)" || exit 1; \
+ fi; \
for p in $$list; do \
if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
echo "$$d$$p"; \
@$(NORMAL_UNINSTALL)
@list='$(nodist_includeexec_HEADERS)'; test -n "$(includeexecdir)" || list=; \
files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
- test -n "$$files" || exit 0; \
- echo " ( cd '$(DESTDIR)$(includeexecdir)' && rm -f" $$files ")"; \
- cd "$(DESTDIR)$(includeexecdir)" && rm -f $$files
+ dir='$(DESTDIR)$(includeexecdir)'; $(am__uninstall_files_from_dir)
# This directory's subdirectories are mostly independent; you can cd
# into them and run `make' without going through this Makefile.
done
@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
if test "$$subdir" = .; then :; else \
- test -d "$(distdir)/$$subdir" \
- || $(MKDIR_P) "$(distdir)/$$subdir" \
- || exit 1; \
- fi; \
- done
- @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
- if test "$$subdir" = .; then :; else \
+ $(am__make_dryrun) \
+ || test -d "$(distdir)/$$subdir" \
+ || $(MKDIR_P) "$(distdir)/$$subdir" \
+ || exit 1; \
dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
$(am__relativize); \
new_distdir=$$reldir; \
$(am__remove_distdir)
dist-bzip2: distdir
- tardir=$(distdir) && $(am__tar) | bzip2 -9 -c >$(distdir).tar.bz2
+ tardir=$(distdir) && $(am__tar) | BZIP2=$${BZIP2--9} bzip2 -c >$(distdir).tar.bz2
+ $(am__remove_distdir)
+
+dist-lzip: distdir
+ tardir=$(distdir) && $(am__tar) | lzip -c $${LZIP_OPT--9} >$(distdir).tar.lz
$(am__remove_distdir)
dist-lzma: distdir
$(am__remove_distdir)
dist-xz: distdir
- tardir=$(distdir) && $(am__tar) | xz -c >$(distdir).tar.xz
+ tardir=$(distdir) && $(am__tar) | XZ_OPT=$${XZ_OPT--e} xz -c >$(distdir).tar.xz
$(am__remove_distdir)
dist-tarZ: distdir
bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\
*.tar.lzma*) \
lzma -dc $(distdir).tar.lzma | $(am__untar) ;;\
+ *.tar.lz*) \
+ lzip -dc $(distdir).tar.lz | $(am__untar) ;;\
*.tar.xz*) \
xz -dc $(distdir).tar.xz | $(am__untar) ;;\
*.tar.Z*) \
*.zip*) \
unzip $(distdir).zip ;;\
esac
- chmod -R a-w $(distdir); chmod a+w $(distdir)
+ chmod -R a-w $(distdir); chmod u+w $(distdir)
mkdir $(distdir)/_build
mkdir $(distdir)/_inst
chmod a-w $(distdir)
&& am__cwd=`pwd` \
&& $(am__cd) $(distdir)/_build \
&& ../configure --srcdir=.. --prefix="$$dc_install_base" \
+ $(AM_DISTCHECK_CONFIGURE_FLAGS) \
$(DISTCHECK_CONFIGURE_FLAGS) \
&& $(MAKE) $(AM_MAKEFLAGS) \
&& $(MAKE) $(AM_MAKEFLAGS) dvi \
list='$(DIST_ARCHIVES)'; for i in $$list; do echo $$i; done) | \
sed -e 1h -e 1s/./=/g -e 1p -e 1x -e '$$p' -e '$$x'
distuninstallcheck:
- @$(am__cd) '$(distuninstallcheck_dir)' \
- && test `$(distuninstallcheck_listfiles) | wc -l` -le 1 \
+ @test -n '$(distuninstallcheck_dir)' || { \
+ echo 'ERROR: trying to run $@ with an empty' \
+ '$$(distuninstallcheck_dir)' >&2; \
+ exit 1; \
+ }; \
+ $(am__cd) '$(distuninstallcheck_dir)' || { \
+ echo 'ERROR: cannot chdir into $(distuninstallcheck_dir)' >&2; \
+ exit 1; \
+ }; \
+ test `$(am__distuninstallcheck_listfiles) | wc -l` -eq 0 \
|| { echo "ERROR: files left after uninstall:" ; \
if test -n "$(DESTDIR)"; then \
echo " (check DESTDIR support)"; \
check-am: all-am
check: $(BUILT_SOURCES)
$(MAKE) $(AM_MAKEFLAGS) check-recursive
-all-am: Makefile $(ANSI2KNR) $(LTLIBRARIES) $(HEADERS) config.h
+all-am: Makefile $(LTLIBRARIES) $(HEADERS) config.h
installdirs: installdirs-recursive
installdirs-am:
for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(includedir)" "$(DESTDIR)$(includeexecdir)"; do \
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
-test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES)
clean: clean-recursive
-clean-am: clean-generic clean-krextra clean-libLTLIBRARIES \
- clean-libtool mostlyclean-am
+clean-am: clean-generic clean-libLTLIBRARIES clean-libtool clean-local \
+ mostlyclean-am
distclean: distclean-recursive
-rm -f $(am__CONFIG_DISTCLEAN_FILES)
-rm -f Makefile
distclean-am: clean-am distclean-compile distclean-generic \
- distclean-hdr distclean-libtool distclean-tags
+ distclean-hdr distclean-libtool distclean-local distclean-tags
dvi: dvi-recursive
mostlyclean: mostlyclean-recursive
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-recursive
.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
all all-am am--refresh check check-am clean clean-generic \
- clean-krextra clean-libLTLIBRARIES clean-libtool ctags \
+ clean-libLTLIBRARIES clean-libtool clean-local ctags \
ctags-recursive dist dist-all dist-bzip2 dist-gzip dist-hook \
- dist-lzma dist-shar dist-tarZ dist-xz dist-zip distcheck \
- distclean distclean-compile distclean-generic distclean-hdr \
- distclean-libtool distclean-tags distcleancheck distdir \
- distuninstallcheck dvi dvi-am html html-am info info-am \
- install install-am install-data install-data-am \
- install-data-hook install-dvi install-dvi-am install-exec \
- install-exec-am install-html install-html-am \
+ dist-lzip dist-lzma dist-shar dist-tarZ dist-xz dist-zip \
+ distcheck distclean distclean-compile distclean-generic \
+ distclean-hdr distclean-libtool distclean-local distclean-tags \
+ distcleancheck distdir distuninstallcheck dvi dvi-am html \
+ html-am info info-am install install-am install-data \
+ install-data-am install-data-hook install-dvi install-dvi-am \
+ install-exec install-exec-am install-html install-html-am \
install-includeHEADERS install-info install-info-am \
install-libLTLIBRARIES install-man \
install-nodist_includeexecHEADERS install-pdf install-pdf-am \
install-ps install-ps-am install-strip installcheck \
installcheck-am installdirs installdirs-am maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
- mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
- pdf-am ps ps-am tags tags-recursive uninstall uninstall-am \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags tags-recursive uninstall uninstall-am \
uninstall-includeHEADERS uninstall-libLTLIBRARIES \
uninstall-nodist_includeexecHEADERS
@echo '+-------------------------------------------------------------+'
@echo ''
-mpz/fac_ui.h: gen-fac_ui$(EXEEXT_FOR_BUILD)
- ./gen-fac_ui $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mpz/fac_ui.h || (rm -f mpz/fac_ui.h; exit 1)
-
-gen-fac_ui$(EXEEXT_FOR_BUILD): gen-fac_ui$(U_FOR_BUILD).c dumbmp.c
- $(CC_FOR_BUILD) `test -f 'gen-fac_ui$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-fac_ui$(U_FOR_BUILD).c -o gen-fac_ui$(EXEEXT_FOR_BUILD)
+fac_table.h: gen-fac$(EXEEXT_FOR_BUILD)
+ ./gen-fac $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >fac_table.h || (rm -f fac_table.h; exit 1)
-gen-fac_ui_.c: gen-fac_ui.c $(ANSI2KNR)
- $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-fac_ui.c; then echo $(srcdir)/gen-fac_ui.c; else echo gen-fac_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-fac_ui_.c || rm -f gen-fac_ui_.c
+gen-fac$(EXEEXT_FOR_BUILD): gen-fac$(U_FOR_BUILD).c bootstrap.c
+ $(CC_FOR_BUILD) `test -f 'gen-fac$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-fac$(U_FOR_BUILD).c -o gen-fac$(EXEEXT_FOR_BUILD)
fib_table.h: gen-fib$(EXEEXT_FOR_BUILD)
./gen-fib header $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >fib_table.h || (rm -f fib_table.h; exit 1)
mpn/fib_table.c: gen-fib$(EXEEXT_FOR_BUILD)
./gen-fib table $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mpn/fib_table.c || (rm -f mpn/fib_table.c; exit 1)
-gen-fib$(EXEEXT_FOR_BUILD): gen-fib$(U_FOR_BUILD).c dumbmp.c
+gen-fib$(EXEEXT_FOR_BUILD): gen-fib$(U_FOR_BUILD).c bootstrap.c
$(CC_FOR_BUILD) `test -f 'gen-fib$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-fib$(U_FOR_BUILD).c -o gen-fib$(EXEEXT_FOR_BUILD)
-gen-fib_.c: gen-fib.c $(ANSI2KNR)
- $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-fib.c; then echo $(srcdir)/gen-fib.c; else echo gen-fib.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-fib_.c || rm -f gen-fib_.c
-
mp_bases.h: gen-bases$(EXEEXT_FOR_BUILD)
./gen-bases header $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mp_bases.h || (rm -f mp_bases.h; exit 1)
mpn/mp_bases.c: gen-bases$(EXEEXT_FOR_BUILD)
./gen-bases table $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mpn/mp_bases.c || (rm -f mpn/mp_bases.c; exit 1)
-gen-bases$(EXEEXT_FOR_BUILD): gen-bases$(U_FOR_BUILD).c dumbmp.c
+gen-bases$(EXEEXT_FOR_BUILD): gen-bases$(U_FOR_BUILD).c bootstrap.c
$(CC_FOR_BUILD) `test -f 'gen-bases$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-bases$(U_FOR_BUILD).c -o gen-bases$(EXEEXT_FOR_BUILD) $(LIBM_FOR_BUILD)
-gen-bases_.c: gen-bases.c $(ANSI2KNR)
- $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-bases.c; then echo $(srcdir)/gen-bases.c; else echo gen-bases.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-bases_.c || rm -f gen-bases_.c
-
trialdivtab.h: gen-trialdivtab$(EXEEXT_FOR_BUILD)
./gen-trialdivtab $(GMP_LIMB_BITS) 8000 >trialdivtab.h || (rm -f trialdivtab.h; exit 1)
-gen-trialdivtab$(EXEEXT_FOR_BUILD): gen-trialdivtab$(U_FOR_BUILD).c dumbmp.c
+gen-trialdivtab$(EXEEXT_FOR_BUILD): gen-trialdivtab$(U_FOR_BUILD).c bootstrap.c
$(CC_FOR_BUILD) `test -f 'gen-trialdivtab$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-trialdivtab$(U_FOR_BUILD).c -o gen-trialdivtab$(EXEEXT_FOR_BUILD) $(LIBM_FOR_BUILD)
-gen-trialdivtab_.c: gen-trialdivtab.c $(ANSI2KNR)
- $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-trialdivtab.c; then echo $(srcdir)/gen-trialdivtab.c; else echo gen-trialdivtab.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-trialdivtab_.c || rm -f gen-trialdivtab_.c
+mpn/jacobitab.h: gen-jacobitab$(EXEEXT_FOR_BUILD)
+ ./gen-jacobitab >mpn/jacobitab.h || (rm -f mpn/jacobitab.h; exit 1)
+
+gen-jacobitab$(EXEEXT_FOR_BUILD): gen-jacobitab$(U_FOR_BUILD).c
+ $(CC_FOR_BUILD) `test -f 'gen-jacobitab$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-jacobitab$(U_FOR_BUILD).c -o gen-jacobitab$(EXEEXT_FOR_BUILD)
mpn/perfsqr.h: gen-psqr$(EXEEXT_FOR_BUILD)
./gen-psqr $(GMP_LIMB_BITS) $(GMP_NAIL_BITS) >mpn/perfsqr.h || (rm -f mpn/perfsqr.h; exit 1)
-gen-psqr$(EXEEXT_FOR_BUILD): gen-psqr$(U_FOR_BUILD).c dumbmp.c
+gen-psqr$(EXEEXT_FOR_BUILD): gen-psqr$(U_FOR_BUILD).c bootstrap.c
$(CC_FOR_BUILD) `test -f 'gen-psqr$(U_FOR_BUILD).c' || echo '$(srcdir)/'`gen-psqr$(U_FOR_BUILD).c -o gen-psqr$(EXEEXT_FOR_BUILD) $(LIBM_FOR_BUILD)
-gen-psqr_.c: gen-psqr.c $(ANSI2KNR)
- $(CPP_FOR_BUILD) `if test -f $(srcdir)/gen-psqr.c; then echo $(srcdir)/gen-psqr.c; else echo gen-psqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > gen-psqr_.c || rm -f gen-psqr_.c
-
# Avoid: CVS - cvs directories
# *~ - emacs backups
# .#* - cvs merge originals
dist-hook:
-find $(distdir) \( -name CVS -type d \) -o -name "*~" -o -name ".#*" \
| xargs rm -rf
+ cp "$(srcdir)"/mini-gmp/tests/*.[ch] "$(distdir)/mini-gmp/tests"
# grep -F $(VERSION) $(srcdir)/Makefile.am \
-# | grep -q "^# *$(VERSION) *$(LIBGMP_LT_CURRENT):$(LIBGMP_LT_REVISION):$(LIBGMP_LT_AGE) *$(LIBGMPXX_LT_CURRENT):$(LIBGMPXX_LT_REVISION):$(LIBGMPXX_LT_AGE) *$(LIBMP_LT_CURRENT):$(LIBMP_LT_REVISION):$(LIBMP_LT_AGE)"
+# | grep -q "^# *$(VERSION) *$(LIBGMP_LT_CURRENT):$(LIBGMP_LT_REVISION):$(LIBGMP_LT_AGE) *$(LIBGMPXX_LT_CURRENT):$(LIBGMPXX_LT_REVISION):$(LIBGMPXX_LT_AGE)"
# test -z "`sed -n 's/^# *[0-9]*\.[0-9]*\.[0-9]* *\([0-9]*:[0-9]*:[0-9]*\) *\([0-9]*:[0-9]*:[0-9]*\) *\([0-9]*:[0-9]*:[0-9]*\).*/A\1\nB\2\nC\3/p' $(srcdir)/Makefile.am | grep -v 'A6:3:3\|B3:5:0\|C4:7:1' | sort | uniq -d`"
+.PHONY: check-mini-gmp clean-mini-gmp
+
+check-mini-gmp:
+ abs_srcdir="`cd $(srcdir) && pwd`" ; \
+ $(MKDIR_P) mini-gmp/tests \
+ && cd mini-gmp/tests \
+ && LD_LIBRARY_PATH="../../.libs:$$LD_LIBRARY_PATH" \
+ DYLD_LIBRARY_PATH="../../.libs:$$DYLD_LIBRARY_PATH" \
+ $(MAKE) -f "$$abs_srcdir/mini-gmp/tests/Makefile" \
+ VPATH="$$abs_srcdir/mini-gmp/tests" \
+ srcdir="$$abs_srcdir/mini-gmp/tests" \
+ MINI_GMP_DIR="$$abs_srcdir/mini-gmp" \
+ LDFLAGS="-L../../.libs" \
+ LIBS="-lgmp -lm" \
+ CC="$(CC_FOR_BUILD)" EXTRA_CFLAGS="-g -I../.." check
+
+clean-mini-gmp:
+ if [ -d mini-gmp/tests ] ; then \
+ abs_srcdir="`cd $(srcdir) && pwd`" ; \
+ cd mini-gmp/tests \
+ && $(MAKE) -f "$$abs_srcdir/mini-gmp/tests/Makefile" clean ; \
+ fi
+
+clean-local: clean-mini-gmp
+distclean-local: clean-mini-gmp
+
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:
Copyright 1996, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
-2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
Verbatim copying and distribution of this entire article is permitted in any
medium, provided this notice is preserved.
+Changes between GMP version 5.1.2 and 5.1.3
+
+ BUGS FIXED
+ * The internal functions mpn_sbpi1_div_qr_sec mpn_sbpi1_div_r_sec could
+ compute garbage with a low probability. They are now rewritten, and the
+ test code has been improved.
+
+ * A bug in the ia64 implementation of mpn_divrem_2, clobbering some
+ callee-save registers, has been fixed. This is an internal
+ function, with the bug manifesting itself as miscomputation in,
+ e.g., mpn_sqrtrem.
+
+ * The documentation now correctly says 'const' for input arguments.
+
+ SPEEDUPS
+ * None.
+
+ FEATURES
+ * None.
+
+ MISC
+ * None.
+
+Changes between GMP version 5.1.1 and 5.1.2
+
+ BUGS FIXED
+ * A bug in mpz_powm_ui triggered by base arguments of at least 15000 decimal
+ digits or mod arguments of at least 7500 decimal digits has been fixed.
+
+ * A AMD Bulldozer specific bug affecting the 64-bit Windows ABI has been
+ fixed. This bug was in a key function (mpn_mul_1) and made both Bulldozer
+ specific builds and fat builds run on Bulldozer completely non-functional.
+
+ SPEEDUPS
+ * None.
+
+ FEATURES
+ * None.
+
+ MISC
+ * Fixes and generalisations to the test suite.
+
+ * Minor portability enhancements.
+
+
+Changes between GMP version 5.1.0 and 5.1.1
+
+ BUGS FIXED
+ * On Windows 64-bit, an error causing link errors about
+ __gmp_binvert_limb_table has been fixed.
+
+ * Aarch64 alias ARM64 support now works.
+
+ * A possible buffer overrun in mpz_ior has been fixed.
+
+ * A rare sign flip in mpz_remove has been fixed.
+
+ * A bug causing problems with mpf numbers with absolute value >= 2^31 has
+ been fixed.
+
+ * Several bugs in mini-gmp have been fixed.
+
+ * A bug caused by automake, related to the 'distcheck' target, has been fixed
+ by upgrading the automake used for GMP release engineering.
+
+ SPEEDUPS
+ * None.
+
+ FEATURES
+ * Preliminary support for the x32 ABI under x86-64.
+
+ MISC
+ * The mini-gmp testsuite now tests the entire set of functions.
+
+ * Various improvements of the GMP testsuite.
+
+
+Changes between GMP version 5.0.* and 5.1.0
+
+ BUGS FIXED
+ * When reading a C++ number (like mpz_class) in an istream reaches the end
+ of the stream, the eofbit is now set.
+
+ * The result sign of mpz_rootrem's remainder is now always correct.
+
+ * The mpz_remove function now handles negative divisors.
+
+ * Contains all fixes from release 5.0.5.
+
+ SPEEDUPS
+ * The n-factorial and n-over-k functions have been reimplemented for great
+ speedups for small and large operands.
+
+ * New subquadratic algorithm for the Kronecker/Jacobi/Legendre symbol.
+
+ * Major speedup for ARM, in particular ARM Cortex-A9 and A15, thanks to broad
+ assembly support.
+
+ * Significant speedup for POWER6 and POWER7 thanks to improved assembly.
+
+ * The performance under M$ Windows' 64-bit ABI has been greatly improved
+ thanks to complete assembly support.
+
+ * Minor speed improvements of many functions and for many platforms.
+
+ FEATURES
+ * Many new CPUs recognised.
+
+ * New functions for multi-factorials, and primorial: mpz_2fac_ui,
+ mpz_mfac_uiui and mpz_primorial_ui.
+
+ * The mpz_powm_sec function now uses side-channel silent division for
+ converting into Montgomery residues.
+
+ * The fat binary mechanism is now more robust in its CPU recognition.
+
+ MISC
+ * Inclusion of assembly code is now controlled by the configure options
+ --enable-assembly and --disable-assembly. The "none" CPU target is gone.
+
+ * In C++, the conversions mpq_class->mpz_class, mpf_class->mpz_class and
+ mpf_class->mpq_class are now explicit.
+
+ * Includes "mini-gmp", a small, portable, but less efficient, implementation
+ of a subset of GMP's mpn and mpz interfaces. Used in GMP bootstrap, but it
+ can also be bundled with applications as a fallback when the real GMP
+ library is unavailable.
+
+ * The ABIs under AIX are no longer called aix32 and aix64, but mode64 and 32.
+ This is more consistent with other powerpc systems.
+
+ * The coverage of the testsuite has been improved, using the lcov tool. See
+ also http://gmplib.org/devel/lcov/.
+
+ * It is now possible to compile GMP using a C++ compiler.
+
+ * K&R C compilers are no longer supported.
+
+ * The BSD MP compatibility functions have been removed.
+
+
Changes between GMP version 5.0.4 and 5.0.5
BUGS FIXED
as one would have hoped CPU traps of some 'illegal instruction' sort).
* A bug affecting recent Intel Sandy Bridge CPUs resulting in configuration
- failures has been fixed,
+ failures has been fixed.
SPEEDUPS
* None.
])
-dnl GMP_H_ANSI
-dnl ----------
-dnl Check whether gmp.h recognises the compiler as ANSI capable.
-
-AC_DEFUN([GMP_H_ANSI],
-[AC_REQUIRE([AC_PROG_CC_STDC])
-case $ac_cv_prog_cc_stdc in
- no)
- ;;
- *)
- AC_TRY_COMPILE(
-GMP_INCLUDE_GMP_H
-[#if ! __GMP_HAVE_PROTOTYPES
-die die die
-#endif
-],,,
- [AC_MSG_WARN([gmp.h doesnt recognise compiler as ANSI, prototypes and "const" will be unavailable])])
- ;;
-esac
-])
-
-
dnl GMP_H_EXTERN_INLINE
dnl -------------------
dnl If the compiler has an "inline" of some sort, check whether the
-# generated automatically by aclocal 1.11.1 -*- Autoconf -*-
+# generated automatically by aclocal 1.11.6 -*- Autoconf -*-
# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
-# 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+# 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
+# Inc.
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
m4_ifndef([AC_AUTOCONF_VERSION],
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
-m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.65],,
-[m4_warning([this file was generated for autoconf 2.65.
+m4_if(m4_defn([AC_AUTOCONF_VERSION]), [2.69],,
+[m4_warning([this file was generated for autoconf 2.69.
You have another version of autoconf. It may work, but is not guaranteed to.
If you have problems, you may need to regenerate the build system entirely.
To do so, use the procedure documented by the package, typically `autoreconf'.])])
m4_ifndef([_LT_PROG_FC], [AC_DEFUN([_LT_PROG_FC])])
m4_ifndef([_LT_PROG_CXX], [AC_DEFUN([_LT_PROG_CXX])])
-# Copyright (C) 2002, 2003, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
+# Copyright (C) 2002, 2003, 2005, 2006, 2007, 2008, 2011 Free Software
+# Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
+# serial 1
+
# AM_AUTOMAKE_VERSION(VERSION)
# ----------------------------
# Automake X.Y traces this macro to ensure aclocal.m4 has been
[am__api_version='1.11'
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
dnl require some minimum version. Point them to the right macro.
-m4_if([$1], [1.11.1], [],
+m4_if([$1], [1.11.6], [],
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
])
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
-[AM_AUTOMAKE_VERSION([1.11.1])dnl
+[AM_AUTOMAKE_VERSION([1.11.6])dnl
m4_ifndef([AC_AUTOCONF_VERSION],
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
# AM_AUX_DIR_EXPAND -*- Autoconf -*-
-# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc.
+# Copyright (C) 2001, 2003, 2005, 2011 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
+# serial 1
+
# For projects using AC_CONFIG_AUX_DIR([foo]), Autoconf sets
# $ac_aux_dir to `$srcdir/foo'. In other projects, it is set to
# `$srcdir', `$srcdir/..', or `$srcdir/../..'.
Usually this means the macro was only invoked conditionally.]])
fi])])
-# Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005
-# Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 8
-
-# AM_CONFIG_HEADER is obsolete. It has been replaced by AC_CONFIG_HEADERS.
-AU_DEFUN([AM_CONFIG_HEADER], [AC_CONFIG_HEADERS($@)])
-
# Do all the work for Automake. -*- Autoconf -*-
# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
done
echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
-# Copyright (C) 2001, 2003, 2005, 2008 Free Software Foundation, Inc.
+# Copyright (C) 2001, 2003, 2005, 2008, 2011 Free Software Foundation,
+# Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
+# serial 1
+
# AM_PROG_INSTALL_SH
# ------------------
# Define $install_sh.
# Add --enable-maintainer-mode option to configure. -*- Autoconf -*-
# From Jim Meyering
-# Copyright (C) 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008
-# Free Software Foundation, Inc.
+# Copyright (C) 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008,
+# 2011 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
[disable], [m4_define([am_maintainer_other], [enable])],
[m4_define([am_maintainer_other], [enable])
m4_warn([syntax], [unexpected argument to AM@&t@_MAINTAINER_MODE: $1])])
-AC_MSG_CHECKING([whether to am_maintainer_other maintainer-specific portions of Makefiles])
+AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles])
dnl maintainer-mode's default is 'disable' unless 'enable' is passed
AC_ARG_ENABLE([maintainer-mode],
[ --][am_maintainer_other][-maintainer-mode am_maintainer_other make rules and dependencies not useful
fi
])
-# Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+# Copyright (C) 2003, 2004, 2005, 2006, 2011 Free Software Foundation,
+# Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
+# serial 1
+
# AM_PROG_MKDIR_P
# ---------------
# Check for `mkdir -p'.
# Helper functions for option handling. -*- Autoconf -*-
-# Copyright (C) 2001, 2002, 2003, 2005, 2008 Free Software Foundation, Inc.
+# Copyright (C) 2001, 2002, 2003, 2005, 2008, 2010 Free Software
+# Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
-# serial 4
+# serial 5
# _AM_MANGLE_OPTION(NAME)
# -----------------------
[[_AM_OPTION_]m4_bpatsubst($1, [[^a-zA-Z0-9_]], [_])])
# _AM_SET_OPTION(NAME)
-# ------------------------------
+# --------------------
# Set option NAME. Presently that only means defining a flag for this option.
AC_DEFUN([_AM_SET_OPTION],
[m4_define(_AM_MANGLE_OPTION([$1]), 1)])
# _AM_SET_OPTIONS(OPTIONS)
-# ----------------------------------
+# ------------------------
# OPTIONS is a space-separated list of Automake options.
AC_DEFUN([_AM_SET_OPTIONS],
[m4_foreach_w([_AM_Option], [$1], [_AM_SET_OPTION(_AM_Option)])])
AC_DEFUN([_AM_IF_OPTION],
[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
-# Copyright (C) 1996, 1997, 1998, 2000, 2001, 2002, 2003, 2005, 2006
-# Free Software Foundation, Inc.
-#
-# This file is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# serial 5
-
-AC_DEFUN([AM_C_PROTOTYPES],
-[AC_REQUIRE([AC_C_PROTOTYPES])
-if test "$ac_cv_prog_cc_stdc" != no; then
- U= ANSI2KNR=
-else
- U=_ ANSI2KNR=./ansi2knr
-fi
-# Ensure some checks needed by ansi2knr itself.
-AC_REQUIRE([AC_HEADER_STDC])
-AC_CHECK_HEADERS([string.h])
-AC_SUBST([U])dnl
-AC_SUBST([ANSI2KNR])dnl
-_AM_SUBST_NOTMAKE([ANSI2KNR])dnl
-])
-
-AU_DEFUN([fp_C_PROTOTYPES], [AM_C_PROTOTYPES])
-
# Check to make sure that the build environment is sane. -*- Autoconf -*-
# Copyright (C) 1996, 1997, 2000, 2001, 2003, 2005, 2008
fi
AC_MSG_RESULT(yes)])
-# Copyright (C) 2001, 2003, 2005 Free Software Foundation, Inc.
+# Copyright (C) 2001, 2003, 2005, 2011 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
+# serial 1
+
# AM_PROG_INSTALL_STRIP
# ---------------------
# One issue with vendor `install' (even GNU) is that you can't
INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
AC_SUBST([INSTALL_STRIP_PROGRAM])])
-# Copyright (C) 2006, 2008 Free Software Foundation, Inc.
+# Copyright (C) 2006, 2008, 2010 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
-# serial 2
+# serial 3
# _AM_SUBST_NOTMAKE(VARIABLE)
# ---------------------------
AC_DEFUN([_AM_SUBST_NOTMAKE])
# AM_SUBST_NOTMAKE(VARIABLE)
-# ---------------------------
+# --------------------------
# Public sister of _AM_SUBST_NOTMAKE.
AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
# Check how to create a tarball. -*- Autoconf -*-
-# Copyright (C) 2004, 2005 Free Software Foundation, Inc.
+# Copyright (C) 2004, 2005, 2012 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# a tarball read from stdin.
# $(am__untar) < result.tar
AC_DEFUN([_AM_PROG_TAR],
-[# Always define AMTAR for backward compatibility.
-AM_MISSING_PROG([AMTAR], [tar])
+[# Always define AMTAR for backward compatibility. Yes, it's still used
+# in the wild :-( We should find a proper way to deprecate it ...
+AC_SUBST([AMTAR], ['$${TAR-tar}'])
m4_if([$1], [v7],
- [am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'],
+ [am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'],
[m4_case([$1], [ustar],, [pax],,
[m4_fatal([Unknown tar format])])
AC_MSG_CHECKING([how to create a $1 tar archive])
+++ /dev/null
-.TH ANSI2KNR 1 "19 Jan 1996"
-.SH NAME
-ansi2knr \- convert ANSI C to Kernighan & Ritchie C
-.SH SYNOPSIS
-.I ansi2knr
-[--varargs] input_file [output_file]
-.SH DESCRIPTION
-If no output_file is supplied, output goes to stdout.
-.br
-There are no error messages.
-.sp
-.I ansi2knr
-recognizes function definitions by seeing a non-keyword identifier at the left
-margin, followed by a left parenthesis, with a right parenthesis as the last
-character on the line, and with a left brace as the first token on the
-following line (ignoring possible intervening comments). It will recognize a
-multi-line header provided that no intervening line ends with a left or right
-brace or a semicolon. These algorithms ignore whitespace and comments, except
-that the function name must be the first thing on the line.
-.sp
-The following constructs will confuse it:
-.br
- - Any other construct that starts at the left margin and follows the
-above syntax (such as a macro or function call).
-.br
- - Some macros that tinker with the syntax of the function header.
-.sp
-The --varargs switch is obsolete, and is recognized only for
-backwards compatibility. The present version of
-.I ansi2knr
-will always attempt to convert a ... argument to va_alist and va_dcl.
-.SH AUTHOR
-L. Peter Deutsch <ghost@aladdin.com> wrote the original ansi2knr and
-continues to maintain the current version; most of the code in the current
-version is his work. ansi2knr also includes contributions by Francois
-Pinard <pinard@iro.umontreal.ca> and Jim Avera <jima@netcom.com>.
+++ /dev/null
-/* Copyright (C) 1989, 2000 Aladdin Enterprises. All rights reserved. */
-
-/*$Id$*/
-/* Convert ANSI C function definitions to K&R ("traditional C") syntax */
-
-/*
-ansi2knr is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY. No author or distributor accepts responsibility to anyone for the
-consequences of using it or for whether it serves any particular purpose or
-works at all, unless he says so in writing. Refer to the GNU General Public
-License (the "GPL") for full details.
-
-Everyone is granted permission to copy, modify and redistribute ansi2knr,
-but only under the conditions described in the GPL. A copy of this license
-is supposed to have been given to you along with ansi2knr so you can know
-your rights and responsibilities. It should be in a file named COPYLEFT,
-or, if there is no file named COPYLEFT, a file named COPYING. Among other
-things, the copyright notice and this notice must be preserved on all
-copies.
-
-We explicitly state here what we believe is already implied by the GPL: if
-the ansi2knr program is distributed as a separate set of sources and a
-separate executable file which are aggregated on a storage medium together
-with another program, this in itself does not bring the other program under
-the GPL, nor does the mere fact that such a program or the procedures for
-constructing it invoke the ansi2knr executable bring any other part of the
-program under the GPL.
-*/
-
-/*
- * Usage:
- ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]]
- * --filename provides the file name for the #line directive in the output,
- * overriding input_file (if present).
- * If no input_file is supplied, input is read from stdin.
- * If no output_file is supplied, output goes to stdout.
- * There are no error messages.
- *
- * ansi2knr recognizes function definitions by seeing a non-keyword
- * identifier at the left margin, followed by a left parenthesis, with a
- * right parenthesis as the last character on the line, and with a left
- * brace as the first token on the following line (ignoring possible
- * intervening comments and/or preprocessor directives), except that a line
- * consisting of only
- * identifier1(identifier2)
- * will not be considered a function definition unless identifier2 is
- * the word "void", and a line consisting of
- * identifier1(identifier2, <<arbitrary>>)
- * will not be considered a function definition.
- * ansi2knr will recognize a multi-line header provided that no intervening
- * line ends with a left or right brace or a semicolon. These algorithms
- * ignore whitespace, comments, and preprocessor directives, except that
- * the function name must be the first thing on the line. The following
- * constructs will confuse it:
- * - Any other construct that starts at the left margin and
- * follows the above syntax (such as a macro or function call).
- * - Some macros that tinker with the syntax of function headers.
- */
-
-/*
- * The original and principal author of ansi2knr is L. Peter Deutsch
- * <ghost@aladdin.com>. Other authors are noted in the change history
- * that follows (in reverse chronological order):
-
- lpd 2000-04-12 backs out Eggert's changes because of bugs:
- - concatlits didn't declare the type of its bufend argument;
- - concatlits didn't recognize when it was inside a comment;
- - scanstring could scan backward past the beginning of the string; when
- - the check for \ + newline in scanstring was unnecessary.
-
- 2000-03-05 Paul Eggert <eggert@twinsun.com>
-
- Add support for concatenated string literals.
- * ansi2knr.c (concatlits): New decl.
- (main): Invoke concatlits to concatenate string literals.
- (scanstring): Handle backslash-newline correctly. Work with
- character constants. Fix bug when scanning backwards through
- backslash-quote. Check for unterminated strings.
- (convert1): Parse character constants, too.
- (appendline, concatlits): New functions.
- * ansi2knr.1: Document this.
-
- lpd 1999-08-17 added code to allow preprocessor directives
- wherever comments are allowed
- lpd 1999-04-12 added minor fixes from Pavel Roskin
- <pavel_roskin@geocities.com> for clean compilation with
- gcc -W -Wall
- lpd 1999-03-22 added hack to recognize lines consisting of
- identifier1(identifier2, xxx) as *not* being procedures
- lpd 1999-02-03 made indentation of preprocessor commands consistent
- lpd 1999-01-28 fixed two bugs: a '/' in an argument list caused an
- endless loop; quoted strings within an argument list
- confused the parser
- lpd 1999-01-24 added a check for write errors on the output,
- suggested by Jim Meyering <meyering@ascend.com>
- lpd 1998-11-09 added further hack to recognize identifier(void)
- as being a procedure
- lpd 1998-10-23 added hack to recognize lines consisting of
- identifier1(identifier2) as *not* being procedures
- lpd 1997-12-08 made input_file optional; only closes input and/or
- output file if not stdin or stdout respectively; prints
- usage message on stderr rather than stdout; adds
- --filename switch (changes suggested by
- <ceder@lysator.liu.se>)
- lpd 1996-01-21 added code to cope with not HAVE_CONFIG_H and with
- compilers that don't understand void, as suggested by
- Tom Lane
- lpd 1996-01-15 changed to require that the first non-comment token
- on the line following a function header be a left brace,
- to reduce sensitivity to macros, as suggested by Tom Lane
- <tgl@sss.pgh.pa.us>
- lpd 1995-06-22 removed #ifndefs whose sole purpose was to define
- undefined preprocessor symbols as 0; changed all #ifdefs
- for configuration symbols to #ifs
- lpd 1995-04-05 changed copyright notice to make it clear that
- including ansi2knr in a program does not bring the entire
- program under the GPL
- lpd 1994-12-18 added conditionals for systems where ctype macros
- don't handle 8-bit characters properly, suggested by
- Francois Pinard <pinard@iro.umontreal.ca>;
- removed --varargs switch (this is now the default)
- lpd 1994-10-10 removed CONFIG_BROKETS conditional
- lpd 1994-07-16 added some conditionals to help GNU `configure',
- suggested by Francois Pinard <pinard@iro.umontreal.ca>;
- properly erase prototype args in function parameters,
- contributed by Jim Avera <jima@netcom.com>;
- correct error in writeblanks (it shouldn't erase EOLs)
- lpd 1989-xx-xx original version
- */
-
-/* Most of the conditionals here are to make ansi2knr work with */
-/* or without the GNU configure machinery. */
-
-#if HAVE_CONFIG_H
-# include <config.h>
-#endif
-
-#include <stdio.h>
-#include <ctype.h>
-
-#if HAVE_CONFIG_H
-
-/*
- For properly autoconfiguring ansi2knr, use AC_CONFIG_HEADER(config.h).
- This will define HAVE_CONFIG_H and so, activate the following lines.
- */
-
-# if STDC_HEADERS || HAVE_STRING_H
-# include <string.h>
-# else
-# include <strings.h>
-# endif
-
-#else /* not HAVE_CONFIG_H */
-
-/* Otherwise do it the hard way */
-
-# ifdef BSD
-# include <strings.h>
-# else
-# ifdef VMS
- extern int strlen(), strncmp();
-# else
-# include <string.h>
-# endif
-# endif
-
-#endif /* not HAVE_CONFIG_H */
-
-#if STDC_HEADERS
-# include <stdlib.h>
-#else
-/*
- malloc and free should be declared in stdlib.h,
- but if you've got a K&R compiler, they probably aren't.
- */
-# ifdef MSDOS
-# include <malloc.h>
-# else
-# ifdef VMS
- extern char *malloc();
- extern void free();
-# else
- extern char *malloc();
- extern int free();
-# endif
-# endif
-
-#endif
-
-/* Define NULL (for *very* old compilers). */
-#ifndef NULL
-# define NULL (0)
-#endif
-
-/*
- * The ctype macros don't always handle 8-bit characters correctly.
- * Compensate for this here.
- */
-#ifdef isascii
-# undef HAVE_ISASCII /* just in case */
-# define HAVE_ISASCII 1
-#else
-#endif
-#if STDC_HEADERS || !HAVE_ISASCII
-# define is_ascii(c) 1
-#else
-# define is_ascii(c) isascii(c)
-#endif
-
-#define is_space(c) (is_ascii(c) && isspace(c))
-#define is_alpha(c) (is_ascii(c) && isalpha(c))
-#define is_alnum(c) (is_ascii(c) && isalnum(c))
-
-/* Scanning macros */
-#define isidchar(ch) (is_alnum(ch) || (ch) == '_')
-#define isidfirstchar(ch) (is_alpha(ch) || (ch) == '_')
-
-/* Forward references */
-char *ppdirforward();
-char *ppdirbackward();
-char *skipspace();
-char *scanstring();
-int writeblanks();
-int test1();
-int convert1();
-
-/* The main program */
-int
-main(argc, argv)
- int argc;
- char *argv[];
-{ FILE *in = stdin;
- FILE *out = stdout;
- char *filename = 0;
- char *program_name = argv[0];
- char *output_name = 0;
-#define bufsize 5000 /* arbitrary size */
- char *buf;
- char *line;
- char *more;
- char *usage =
- "Usage: ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]]\n";
- /*
- * In previous versions, ansi2knr recognized a --varargs switch.
- * If this switch was supplied, ansi2knr would attempt to convert
- * a ... argument to va_alist and va_dcl; if this switch was not
- * supplied, ansi2knr would simply drop any such arguments.
- * Now, ansi2knr always does this conversion, and we only
- * check for this switch for backward compatibility.
- */
- int convert_varargs = 1;
- int output_error;
-
- while ( argc > 1 && argv[1][0] == '-' ) {
- if ( !strcmp(argv[1], "--varargs") ) {
- convert_varargs = 1;
- argc--;
- argv++;
- continue;
- }
- if ( !strcmp(argv[1], "--filename") && argc > 2 ) {
- filename = argv[2];
- argc -= 2;
- argv += 2;
- continue;
- }
- fprintf(stderr, "%s: Unrecognized switch: %s\n", program_name,
- argv[1]);
- fprintf(stderr, usage);
- exit(1);
- }
- switch ( argc )
- {
- default:
- fprintf(stderr, usage);
- exit(0);
- case 3:
- output_name = argv[2];
- out = fopen(output_name, "w");
- if ( out == NULL ) {
- fprintf(stderr, "%s: Cannot open output file %s\n",
- program_name, output_name);
- exit(1);
- }
- /* falls through */
- case 2:
- in = fopen(argv[1], "r");
- if ( in == NULL ) {
- fprintf(stderr, "%s: Cannot open input file %s\n",
- program_name, argv[1]);
- exit(1);
- }
- if ( filename == 0 )
- filename = argv[1];
- /* falls through */
- case 1:
- break;
- }
- if ( filename )
- fprintf(out, "#line 1 \"%s\"\n", filename);
- buf = malloc(bufsize);
- if ( buf == NULL )
- {
- fprintf(stderr, "Unable to allocate read buffer!\n");
- exit(1);
- }
- line = buf;
- while ( fgets(line, (unsigned)(buf + bufsize - line), in) != NULL )
- {
-test: line += strlen(line);
- switch ( test1(buf) )
- {
- case 2: /* a function header */
- convert1(buf, out, 1, convert_varargs);
- break;
- case 1: /* a function */
- /* Check for a { at the start of the next line. */
- more = ++line;
-f: if ( line >= buf + (bufsize - 1) ) /* overflow check */
- goto wl;
- if ( fgets(line, (unsigned)(buf + bufsize - line), in) == NULL )
- goto wl;
- switch ( *skipspace(ppdirforward(more), 1) )
- {
- case '{':
- /* Definitely a function header. */
- convert1(buf, out, 0, convert_varargs);
- fputs(more, out);
- break;
- case 0:
- /* The next line was blank or a comment: */
- /* keep scanning for a non-comment. */
- line += strlen(line);
- goto f;
- default:
- /* buf isn't a function header, but */
- /* more might be. */
- fputs(buf, out);
- strcpy(buf, more);
- line = buf;
- goto test;
- }
- break;
- case -1: /* maybe the start of a function */
- if ( line != buf + (bufsize - 1) ) /* overflow check */
- continue;
- /* falls through */
- default: /* not a function */
-wl: fputs(buf, out);
- break;
- }
- line = buf;
- }
- if ( line != buf )
- fputs(buf, out);
- free(buf);
- if ( output_name ) {
- output_error = ferror(out);
- output_error |= fclose(out);
- } else { /* out == stdout */
- fflush(out);
- output_error = ferror(out);
- }
- if ( output_error ) {
- fprintf(stderr, "%s: error writing to %s\n", program_name,
- (output_name ? output_name : "stdout"));
- exit(1);
- }
- if ( in != stdin )
- fclose(in);
- return 0;
-}
-
-/*
- * Skip forward or backward over one or more preprocessor directives.
- */
-char *
-ppdirforward(p)
- char *p;
-{
- for (; *p == '#'; ++p) {
- for (; *p != '\r' && *p != '\n'; ++p)
- if (*p == 0)
- return p;
- if (*p == '\r' && p[1] == '\n')
- ++p;
- }
- return p;
-}
-char *
-ppdirbackward(p, limit)
- char *p;
- char *limit;
-{
- char *np = p;
-
- for (;; p = --np) {
- if (*np == '\n' && np[-1] == '\r')
- --np;
- for (; np > limit && np[-1] != '\r' && np[-1] != '\n'; --np)
- if (np[-1] == 0)
- return np;
- if (*np != '#')
- return p;
- }
-}
-
-/*
- * Skip over whitespace, comments, and preprocessor directives,
- * in either direction.
- */
-char *
-skipspace(p, dir)
- char *p;
- int dir; /* 1 for forward, -1 for backward */
-{
- for ( ; ; ) {
- while ( is_space(*p) )
- p += dir;
- if ( !(*p == '/' && p[dir] == '*') )
- break;
- p += dir; p += dir;
- while ( !(*p == '*' && p[dir] == '/') ) {
- if ( *p == 0 )
- return p; /* multi-line comment?? */
- p += dir;
- }
- p += dir; p += dir;
- }
- return p;
-}
-
-/* Scan over a quoted string, in either direction. */
-char *
-scanstring(p, dir)
- char *p;
- int dir;
-{
- for (p += dir; ; p += dir)
- if (*p == '"' && p[-dir] != '\\')
- return p + dir;
-}
-
-/*
- * Write blanks over part of a string.
- * Don't overwrite end-of-line characters.
- */
-int
-writeblanks(start, end)
- char *start;
- char *end;
-{ char *p;
- for ( p = start; p < end; p++ )
- if ( *p != '\r' && *p != '\n' )
- *p = ' ';
- return 0;
-}
-
-/*
- * Test whether the string in buf is a function definition.
- * The string may contain and/or end with a newline.
- * Return as follows:
- * 0 - definitely not a function definition;
- * 1 - definitely a function definition;
- * 2 - definitely a function prototype (NOT USED);
- * -1 - may be the beginning of a function definition,
- * append another line and look again.
- * The reason we don't attempt to convert function prototypes is that
- * Ghostscript's declaration-generating macros look too much like
- * prototypes, and confuse the algorithms.
- */
-int
-test1(buf)
- char *buf;
-{ char *p = buf;
- char *bend;
- char *endfn;
- int contin;
-
- if ( !isidfirstchar(*p) )
- return 0; /* no name at left margin */
- bend = skipspace(ppdirbackward(buf + strlen(buf) - 1, buf), -1);
- switch ( *bend )
- {
- case ';': contin = 0 /*2*/; break;
- case ')': contin = 1; break;
- case '{': return 0; /* not a function */
- case '}': return 0; /* not a function */
- default: contin = -1;
- }
- while ( isidchar(*p) )
- p++;
- endfn = p;
- p = skipspace(p, 1);
- if ( *p++ != '(' )
- return 0; /* not a function */
- p = skipspace(p, 1);
- if ( *p == ')' )
- return 0; /* no parameters */
- /* Check that the apparent function name isn't a keyword. */
- /* We only need to check for keywords that could be followed */
- /* by a left parenthesis (which, unfortunately, is most of them). */
- { static char *words[] =
- { "asm", "auto", "case", "char", "const", "double",
- "extern", "float", "for", "if", "int", "long",
- "register", "return", "short", "signed", "sizeof",
- "static", "switch", "typedef", "unsigned",
- "void", "volatile", "while", 0
- };
- char **key = words;
- char *kp;
- unsigned len = endfn - buf;
-
- while ( (kp = *key) != 0 )
- { if ( strlen(kp) == len && !strncmp(kp, buf, len) )
- return 0; /* name is a keyword */
- key++;
- }
- }
- {
- char *id = p;
- int len;
- /*
- * Check for identifier1(identifier2) and not
- * identifier1(void), or identifier1(identifier2, xxxx).
- */
-
- while ( isidchar(*p) )
- p++;
- len = p - id;
- p = skipspace(p, 1);
- if (*p == ',' ||
- (*p == ')' && (len != 4 || strncmp(id, "void", 4)))
- )
- return 0; /* not a function */
- }
- /*
- * If the last significant character was a ), we need to count
- * parentheses, because it might be part of a formal parameter
- * that is a procedure.
- */
- if (contin > 0) {
- int level = 0;
-
- for (p = skipspace(buf, 1); *p; p = skipspace(p + 1, 1))
- level += (*p == '(' ? 1 : *p == ')' ? -1 : 0);
- if (level > 0)
- contin = -1;
- }
- return contin;
-}
-
-/* Convert a recognized function definition or header to K&R syntax. */
-int
-convert1(buf, out, header, convert_varargs)
- char *buf;
- FILE *out;
- int header; /* Boolean */
- int convert_varargs; /* Boolean */
-{ char *endfn;
- char *p;
- /*
- * The breaks table contains pointers to the beginning and end
- * of each argument.
- */
- char **breaks;
- unsigned num_breaks = 2; /* for testing */
- char **btop;
- char **bp;
- char **ap;
- char *vararg = 0;
-
- /* Pre-ANSI implementations don't agree on whether strchr */
- /* is called strchr or index, so we open-code it here. */
- for ( endfn = buf; *(endfn++) != '('; )
- ;
-top: p = endfn;
- breaks = (char **)malloc(sizeof(char *) * num_breaks * 2);
- if ( breaks == NULL )
- { /* Couldn't allocate break table, give up */
- fprintf(stderr, "Unable to allocate break table!\n");
- fputs(buf, out);
- return -1;
- }
- btop = breaks + num_breaks * 2 - 2;
- bp = breaks;
- /* Parse the argument list */
- do
- { int level = 0;
- char *lp = NULL;
- char *rp = NULL;
- char *end = NULL;
-
- if ( bp >= btop )
- { /* Filled up break table. */
- /* Allocate a bigger one and start over. */
- free((char *)breaks);
- num_breaks <<= 1;
- goto top;
- }
- *bp++ = p;
- /* Find the end of the argument */
- for ( ; end == NULL; p++ )
- { switch(*p)
- {
- case ',':
- if ( !level ) end = p;
- break;
- case '(':
- if ( !level ) lp = p;
- level++;
- break;
- case ')':
- if ( --level < 0 ) end = p;
- else rp = p;
- break;
- case '/':
- if (p[1] == '*')
- p = skipspace(p, 1) - 1;
- break;
- case '"':
- p = scanstring(p, 1) - 1;
- break;
- default:
- ;
- }
- }
- /* Erase any embedded prototype parameters. */
- if ( lp && rp )
- writeblanks(lp + 1, rp);
- p--; /* back up over terminator */
- /* Find the name being declared. */
- /* This is complicated because of procedure and */
- /* array modifiers. */
- for ( ; ; )
- { p = skipspace(p - 1, -1);
- switch ( *p )
- {
- case ']': /* skip array dimension(s) */
- case ')': /* skip procedure args OR name */
- { int level = 1;
- while ( level )
- switch ( *--p )
- {
- case ']': case ')':
- level++;
- break;
- case '[': case '(':
- level--;
- break;
- case '/':
- if (p > buf && p[-1] == '*')
- p = skipspace(p, -1) + 1;
- break;
- case '"':
- p = scanstring(p, -1) + 1;
- break;
- default: ;
- }
- }
- if ( *p == '(' && *skipspace(p + 1, 1) == '*' )
- { /* We found the name being declared */
- while ( !isidfirstchar(*p) )
- p = skipspace(p, 1) + 1;
- goto found;
- }
- break;
- default:
- goto found;
- }
- }
-found: if ( *p == '.' && p[-1] == '.' && p[-2] == '.' )
- { if ( convert_varargs )
- { *bp++ = "va_alist";
- vararg = p-2;
- }
- else
- { p++;
- if ( bp == breaks + 1 ) /* sole argument */
- writeblanks(breaks[0], p);
- else
- writeblanks(bp[-1] - 1, p);
- bp--;
- }
- }
- else
- { while ( isidchar(*p) ) p--;
- *bp++ = p+1;
- }
- p = end;
- }
- while ( *p++ == ',' );
- *bp = p;
- /* Make a special check for 'void' arglist */
- if ( bp == breaks+2 )
- { p = skipspace(breaks[0], 1);
- if ( !strncmp(p, "void", 4) )
- { p = skipspace(p+4, 1);
- if ( p == breaks[2] - 1 )
- { bp = breaks; /* yup, pretend arglist is empty */
- writeblanks(breaks[0], p + 1);
- }
- }
- }
- /* Put out the function name and left parenthesis. */
- p = buf;
- while ( p != endfn ) putc(*p, out), p++;
- /* Put out the declaration. */
- if ( header )
- { fputs(");", out);
- for ( p = breaks[0]; *p; p++ )
- if ( *p == '\r' || *p == '\n' )
- putc(*p, out);
- }
- else
- { for ( ap = breaks+1; ap < bp; ap += 2 )
- { p = *ap;
- while ( isidchar(*p) )
- putc(*p, out), p++;
- if ( ap < bp - 1 )
- fputs(", ", out);
- }
- fputs(") ", out);
- /* Put out the argument declarations */
- for ( ap = breaks+2; ap <= bp; ap += 2 )
- (*ap)[-1] = ';';
- if ( vararg != 0 )
- { *vararg = 0;
- fputs(breaks[0], out); /* any prior args */
- fputs("va_dcl", out); /* the final arg */
- fputs(bp[0], out);
- }
- else
- fputs(breaks[0], out);
- }
- free((char *)breaks);
- return 0;
-}
--- /dev/null
+/* Functions needed for bootstrapping the gmp build, based on mini-gmp.
+
+Copyright 2001, 2002, 2004, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+
+#include "mini-gmp/mini-gmp.c"
+
+#define MIN(l,o) ((l) < (o) ? (l) : (o))
+#define PTR(x) ((x)->_mp_d)
+#define SIZ(x) ((x)->_mp_size)
+
+#define xmalloc gmp_default_alloc
+
+int
+isprime (unsigned long int t)
+{
+ unsigned long int q, r, d;
+
+ if (t < 32)
+ return (0xa08a28acUL >> t) & 1;
+ if ((t & 1) == 0)
+ return 0;
+
+ if (t % 3 == 0)
+ return 0;
+ if (t % 5 == 0)
+ return 0;
+ if (t % 7 == 0)
+ return 0;
+
+ for (d = 11;;)
+ {
+ q = t / d;
+ r = t - q * d;
+ if (q < d)
+ return 1;
+ if (r == 0)
+ break;
+ d += 2;
+ q = t / d;
+ r = t - q * d;
+ if (q < d)
+ return 1;
+ if (r == 0)
+ break;
+ d += 4;
+ }
+ return 0;
+}
+
+int
+log2_ceil (int n)
+{
+ int e;
+ assert (n >= 1);
+ for (e = 0; ; e++)
+ if ((1 << e) >= n)
+ break;
+ return e;
+}
+
+/* Set inv to the inverse of d, in the style of invert_limb, ie. for
+ udiv_qrnnd_preinv. */
+void
+mpz_preinv_invert (mpz_t inv, mpz_t d, int numb_bits)
+{
+ mpz_t t;
+ int norm;
+ assert (SIZ(d) > 0);
+
+ norm = numb_bits - mpz_sizeinbase (d, 2);
+ assert (norm >= 0);
+ mpz_init_set_ui (t, 1L);
+ mpz_mul_2exp (t, t, 2*numb_bits - norm);
+ mpz_tdiv_q (inv, t, d);
+ mpz_set_ui (t, 1L);
+ mpz_mul_2exp (t, t, numb_bits);
+ mpz_sub (inv, inv, t);
+
+ mpz_clear (t);
+}
+
+/* Calculate r satisfying r*d == 1 mod 2^n. */
+void
+mpz_invert_2exp (mpz_t r, mpz_t a, unsigned long n)
+{
+ unsigned long i;
+ mpz_t inv, prod;
+
+ assert (mpz_odd_p (a));
+
+ mpz_init_set_ui (inv, 1L);
+ mpz_init (prod);
+
+ for (i = 1; i < n; i++)
+ {
+ mpz_mul (prod, inv, a);
+ if (mpz_tstbit (prod, i) != 0)
+ mpz_setbit (inv, i);
+ }
+
+ mpz_mul (prod, inv, a);
+ mpz_tdiv_r_2exp (prod, prod, n);
+ assert (mpz_cmp_ui (prod, 1L) == 0);
+
+ mpz_set (r, inv);
+
+ mpz_clear (inv);
+ mpz_clear (prod);
+}
+
+/* Calculate inv satisfying r*a == 1 mod 2^n. */
+void
+mpz_invert_ui_2exp (mpz_t r, unsigned long a, unsigned long n)
+{
+ mpz_t az;
+ mpz_init_set_ui (az, a);
+ mpz_invert_2exp (r, az, n);
+ mpz_clear (az);
+}
# GMP config.guess wrapper.
-# Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2011 Free Software
-# Foundation, Inc.
+# Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2011, 2012 Free
+# Software Foundation, Inc.
#
# This file is part of the GNU MP Library.
#
rm -f $dummy.s $dummy.o $dummy
;;
+arm*-*-*)
+ cpu_code=`sed -n 's/^CPU part.*\(0x.*\)$/\1/p' /proc/cpuinfo 2>/dev/null`
+ case "$cpu_code" in
+ 0x210) exact_cpu="armxscale";; # v5 pxa250
+ 0x290) exact_cpu="armxscale";; # v5 pxa260
+ 0x2d0) exact_cpu="armxscale";; # v5 pxa255
+ 0x2d0) exact_cpu="armxscale";; # v5 pxa261
+ 0x2d0) exact_cpu="armxscale";; # v5 pxa262
+ 0x411) exact_cpu="armxscale";; # v5 pxa270
+ 0x915) exact_cpu="armti915t";; # v?
+ 0x925) exact_cpu="armti925t";; # v?
+ 0x926) exact_cpu="arm926";; # v5
+ 0x946) exact_cpu="arm946";; # v5
+ 0x966) exact_cpu="arm966";; # v5
+ 0xa11) exact_cpu="armsa1";; # v4 strongarm/sa1100
+ 0xa26) exact_cpu="arm1026";; # v5
+ 0xb02) exact_cpu="arm11mpcore";; # v6
+ 0xb11) exact_cpu="armsa1";; # v4 strongarm/sa1110
+ 0xb36) exact_cpu="arm1136";; # v6
+ 0xb56) exact_cpu="arm1156";; # v6t2
+ 0xb76) exact_cpu="arm1176";; # v6
+ 0xc05) exact_cpu="armcortexa5";; # v7a
+ 0xc08) exact_cpu="armcortexa8";; # v7a
+ 0xc09) exact_cpu="armcortexa9";; # v7a
+ 0xc0f) exact_cpu="armcortexa15";; # v7a
+ 0xc14) exact_cpu="armcortexr4";; # v7r
+ 0xc15) exact_cpu="armcortexr5";; # v7r
+ 0xc23) exact_cpu="armcortexm3";; # v7m
+ esac
+ ;;
+
ia64*-*-*)
# CPUID[3] bits 24 to 31 is the processor family. itanium2 is documented
# as 0x1f, plain itanium has been seen returning 0x07 on two systems, but
else if (model == 0x2d) cpu_64bit = 1, modelstr = "coreisbr"; /* SBC-EP */
else if (model == 0x2e) cpu_64bit = 1, modelstr = "coreinhm"; /* NHM Beckton */
else if (model == 0x2f) cpu_64bit = 1, modelstr = "coreiwsm"; /* WSM Eagleton */
+ else if (model == 0x3a) cpu_64bit = 1, modelstr = "coreisbr"; /* IBR */
+ else if (model == 0x3c) cpu_64bit = 1, modelstr = "coreisbr"; /* Haswell */
+ else if (model == 0x36) cpu_64bit = 1, modelstr = "atom"; /* Cedarview/Saltwell */
else cpu_64bit = 1, modelstr = "corei"; /* default */
break;
case 15:
case 21: /* Bulldozer */
cpu_64bit = 1, modelstr = "bulldozer";
break;
- case 22: /* AMD Internal, assume future bulldozer */
- cpu_64bit = 1, modelstr = "bulldozer";
+ case 22: /* jaguar, an improved bobcat */
+ cpu_64bit = 1, modelstr = "bobcat";
break;
}
}
-/* config.in. Generated from configure.in by autoheader. */
+/* config.in. Generated from configure.ac by autoheader. */
/*
Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
-2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
/* Define to 1 if you have the `clock' function. */
#undef HAVE_CLOCK
-/* Define to 1 if you have the `clock_gettime' function. */
+/* Define to 1 if you have the `clock_gettime' function */
#undef HAVE_CLOCK_GETTIME
/* Define to 1 if you have the `cputime' function. */
#undef HAVE_NATIVE_mpn_add_n_sub_n
#undef HAVE_NATIVE_mpn_add_nc
#undef HAVE_NATIVE_mpn_addaddmul_1msb0
+#undef HAVE_NATIVE_mpn_addcnd_n
#undef HAVE_NATIVE_mpn_addlsh1_n
#undef HAVE_NATIVE_mpn_addlsh2_n
#undef HAVE_NATIVE_mpn_addlsh_n
+#undef HAVE_NATIVE_mpn_addlsh1_nc
+#undef HAVE_NATIVE_mpn_addlsh2_nc
+#undef HAVE_NATIVE_mpn_addlsh_nc
+#undef HAVE_NATIVE_mpn_addlsh1_n_ip1
+#undef HAVE_NATIVE_mpn_addlsh2_n_ip1
+#undef HAVE_NATIVE_mpn_addlsh_n_ip1
+#undef HAVE_NATIVE_mpn_addlsh1_nc_ip1
+#undef HAVE_NATIVE_mpn_addlsh2_nc_ip1
+#undef HAVE_NATIVE_mpn_addlsh_nc_ip1
+#undef HAVE_NATIVE_mpn_addlsh1_n_ip2
+#undef HAVE_NATIVE_mpn_addlsh2_n_ip2
+#undef HAVE_NATIVE_mpn_addlsh_n_ip2
+#undef HAVE_NATIVE_mpn_addlsh1_nc_ip2
+#undef HAVE_NATIVE_mpn_addlsh2_nc_ip2
+#undef HAVE_NATIVE_mpn_addlsh_nc_ip2
#undef HAVE_NATIVE_mpn_addmul_1c
#undef HAVE_NATIVE_mpn_addmul_2
#undef HAVE_NATIVE_mpn_addmul_3
#undef HAVE_NATIVE_mpn_addmul_6
#undef HAVE_NATIVE_mpn_addmul_7
#undef HAVE_NATIVE_mpn_addmul_8
+#undef HAVE_NATIVE_mpn_addmul_2s
#undef HAVE_NATIVE_mpn_and_n
#undef HAVE_NATIVE_mpn_andn_n
#undef HAVE_NATIVE_mpn_bdiv_dbm1c
#undef HAVE_NATIVE_mpn_com
#undef HAVE_NATIVE_mpn_copyd
#undef HAVE_NATIVE_mpn_copyi
+#undef HAVE_NATIVE_mpn_div_qr_2
#undef HAVE_NATIVE_mpn_divexact_1
#undef HAVE_NATIVE_mpn_divexact_by3c
#undef HAVE_NATIVE_mpn_divrem_1
#undef HAVE_NATIVE_mpn_mul_2
#undef HAVE_NATIVE_mpn_mul_3
#undef HAVE_NATIVE_mpn_mul_4
+#undef HAVE_NATIVE_mpn_mul_5
+#undef HAVE_NATIVE_mpn_mul_6
#undef HAVE_NATIVE_mpn_mul_basecase
#undef HAVE_NATIVE_mpn_nand_n
#undef HAVE_NATIVE_mpn_nior_n
#undef HAVE_NATIVE_mpn_rsblsh1_n
#undef HAVE_NATIVE_mpn_rsblsh2_n
#undef HAVE_NATIVE_mpn_rsblsh_n
+#undef HAVE_NATIVE_mpn_rsblsh1_nc
+#undef HAVE_NATIVE_mpn_rsblsh2_nc
+#undef HAVE_NATIVE_mpn_rsblsh_nc
#undef HAVE_NATIVE_mpn_rsh1add_n
#undef HAVE_NATIVE_mpn_rsh1add_nc
#undef HAVE_NATIVE_mpn_rsh1sub_n
#undef HAVE_NATIVE_mpn_rshift
#undef HAVE_NATIVE_mpn_sqr_basecase
#undef HAVE_NATIVE_mpn_sqr_diagonal
+#undef HAVE_NATIVE_mpn_sqr_diag_addlsh1
#undef HAVE_NATIVE_mpn_sub_n
#undef HAVE_NATIVE_mpn_sub_nc
+#undef HAVE_NATIVE_mpn_subcnd_n
#undef HAVE_NATIVE_mpn_sublsh1_n
#undef HAVE_NATIVE_mpn_sublsh2_n
#undef HAVE_NATIVE_mpn_sublsh_n
+#undef HAVE_NATIVE_mpn_sublsh1_nc
+#undef HAVE_NATIVE_mpn_sublsh2_nc
+#undef HAVE_NATIVE_mpn_sublsh_nc
+#undef HAVE_NATIVE_mpn_sublsh1_n_ip1
+#undef HAVE_NATIVE_mpn_sublsh2_n_ip1
+#undef HAVE_NATIVE_mpn_sublsh_n_ip1
+#undef HAVE_NATIVE_mpn_sublsh1_nc_ip1
+#undef HAVE_NATIVE_mpn_sublsh2_nc_ip1
+#undef HAVE_NATIVE_mpn_sublsh_nc_ip1
#undef HAVE_NATIVE_mpn_submul_1c
+#undef HAVE_NATIVE_mpn_tabselect
#undef HAVE_NATIVE_mpn_udiv_qrnnd
#undef HAVE_NATIVE_mpn_udiv_qrnnd_r
#undef HAVE_NATIVE_mpn_umul_ppmm
/* Define to 1 if you have the `vsnprintf' function and it works properly. */
#undef HAVE_VSNPRINTF
+/* Define to 1 for Windos/64 */
+#undef HOST_DOS64
+
/* Assembler local label prefix */
#undef LSYM_PREFIX
/* Define to the version of this package. */
#undef PACKAGE_VERSION
-/* Define to 1 if the C compiler supports function prototypes. */
-#undef PROTOTYPES
-
/* Define as the return type of signal handlers (`int' or `void'). */
#undef RETSIGTYPE
/* Version number of package */
#undef VERSION
+/* Defined to 1 as per --enable-assembly */
+#undef WANT_ASSEMBLY
+
/* Define to 1 to enable ASSERT checking, per --enable-assert */
#undef WANT_ASSERT
`char[]'. */
#undef YYTEXT_POINTER
-/* Define like PROTOTYPES; this can be used by system headers. */
-#undef __PROTOTYPES
-
/* Define to `__inline__' or `__inline' if that's what the C compiler
calls it, or to nothing if 'inline' is not supported under any name. */
#ifndef __cplusplus
z900esa | z990esa | z9esa | z10esa | z196esa)
test_cpu=s390;;
+armxscale | armxscale | armxscale | armxscale | armxscale | armxscale | \
+armti915t | armti925t | arm926 | arm946 | arm966 | armsa1 | arm1026 | \
+arm11mpcore | armsa1 | arm1136 | arm1156 | arm1176 | armcortexa5 | \
+armcortexa8 | armcortexa9 | armcortexa15 | armcortexr4 | armcortexr5 | \
+armcortexm3)
+ test_cpu="arm";;
+
*)
# Don't need or want to change the given name, just run configfsf.sub
$SHELL $configfsf_sub "$given_full"
# Attempt to guess a canonical system name.
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
-# 2011 Free Software Foundation, Inc.
+# 2011, 2012 Free Software Foundation, Inc.
-timestamp='2011-02-02'
+timestamp='2012-09-25'
# This file is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
-# 02110-1301, USA.
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
#
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
Originally written by Per Bothner.
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free
-Software Foundation, Inc.
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
+Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in
*:NetBSD:*:*)
# NetBSD (nbsd) targets should (where applicable) match one or
- # more of the tupples: *-*-netbsdelf*, *-*-netbsdaout*,
+ # more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
# *-*-netbsdecoff* and *-*-netbsd*. For targets that recently
# switched to ELF, *-*-netbsd* would select the old
# object file format. This provides both forward
# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
echo "${machine}-${os}${release}"
exit ;;
+ *:Bitrig:*:*)
+ UNAME_MACHINE_ARCH=`arch | sed 's/Bitrig.//'`
+ echo ${UNAME_MACHINE_ARCH}-unknown-bitrig${UNAME_RELEASE}
+ exit ;;
*:OpenBSD:*:*)
UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'`
echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE}
arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
echo arm-acorn-riscix${UNAME_RELEASE}
exit ;;
- arm:riscos:*:*|arm:RISCOS:*:*)
+ arm*:riscos:*:*|arm*:RISCOS:*:*)
echo arm-unknown-riscos
exit ;;
SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE}
exit ;;
*:FreeBSD:*:*)
- case ${UNAME_MACHINE} in
- pc98)
- echo i386-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+ UNAME_PROCESSOR=`/usr/bin/uname -p`
+ case ${UNAME_PROCESSOR} in
amd64)
echo x86_64-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
*)
- echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
+ echo ${UNAME_PROCESSOR}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` ;;
esac
exit ;;
i*:CYGWIN*:*)
echo ${UNAME_MACHINE}-pc-cygwin
exit ;;
+ *:MINGW64*:*)
+ echo ${UNAME_MACHINE}-pc-mingw64
+ exit ;;
*:MINGW*:*)
echo ${UNAME_MACHINE}-pc-mingw32
exit ;;
+ i*:MSYS*:*)
+ echo ${UNAME_MACHINE}-pc-msys
+ exit ;;
i*:windows32*:*)
# uname -m includes "-pc" on this system.
echo ${UNAME_MACHINE}-mingw32
i*86:Minix:*:*)
echo ${UNAME_MACHINE}-pc-minix
exit ;;
+ aarch64:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+ exit ;;
+ aarch64_be:Linux:*:*)
+ UNAME_MACHINE=aarch64_be
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+ exit ;;
alpha:Linux:*:*)
case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
EV5) UNAME_MACHINE=alphaev5 ;;
then
echo ${UNAME_MACHINE}-unknown-linux-gnu
else
- echo ${UNAME_MACHINE}-unknown-linux-gnueabi
+ if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
+ | grep -q __ARM_PCS_VFP
+ then
+ echo ${UNAME_MACHINE}-unknown-linux-gnueabi
+ else
+ echo ${UNAME_MACHINE}-unknown-linux-gnueabihf
+ fi
fi
exit ;;
avr32*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
cris:Linux:*:*)
- echo cris-axis-linux-gnu
+ echo ${UNAME_MACHINE}-axis-linux-gnu
exit ;;
crisv32:Linux:*:*)
- echo crisv32-axis-linux-gnu
+ echo ${UNAME_MACHINE}-axis-linux-gnu
exit ;;
frv:Linux:*:*)
- echo frv-unknown-linux-gnu
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
+ exit ;;
+ hexagon:Linux:*:*)
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
i*86:Linux:*:*)
LIBC=gnu
test x"${CPU}" != x && { echo "${CPU}-unknown-linux-gnu"; exit; }
;;
or32:Linux:*:*)
- echo or32-unknown-linux-gnu
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
padre:Linux:*:*)
echo sparc-unknown-linux-gnu
echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
tile*:Linux:*:*)
- echo ${UNAME_MACHINE}-tilera-linux-gnu
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
vax:Linux:*:*)
echo ${UNAME_MACHINE}-dec-linux-gnu
exit ;;
x86_64:Linux:*:*)
- echo x86_64-unknown-linux-gnu
+ echo ${UNAME_MACHINE}-unknown-linux-gnu
exit ;;
xtensa*:Linux:*:*)
echo ${UNAME_MACHINE}-unknown-linux-gnu
BePC:Haiku:*:*) # Haiku running on Intel PC compatible.
echo i586-pc-haiku
exit ;;
+ x86_64:Haiku:*:*)
+ echo x86_64-unknown-haiku
+ exit ;;
SX-4:SUPER-UX:*:*)
echo sx4-nec-superux${UNAME_RELEASE}
exit ;;
NEO-?:NONSTOP_KERNEL:*:*)
echo neo-tandem-nsk${UNAME_RELEASE}
exit ;;
- NSE-?:NONSTOP_KERNEL:*:*)
+ NSE-*:NONSTOP_KERNEL:*:*)
echo nse-tandem-nsk${UNAME_RELEASE}
exit ;;
NSR-?:NONSTOP_KERNEL:*:*)
i*86:AROS:*:*)
echo ${UNAME_MACHINE}-pc-aros
exit ;;
+ x86_64:VMkernel:*:*)
+ echo ${UNAME_MACHINE}-unknown-esx
+ exit ;;
esac
-#echo '(No uname command or uname output not recognized.)' 1>&2
-#echo "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" 1>&2
-
eval $set_cc_for_build
cat >$dummy.c <<EOF
#ifdef _SEQUENT_
# Configuration validation subroutine script.
# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
# 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
-# 2011 Free Software Foundation, Inc.
+# 2011, 2012 Free Software Foundation, Inc.
-timestamp='2011-03-23'
+timestamp='2012-12-06'
# This file is (in principle) common to ALL GNU software.
# The presence of a machine in this file suggests that SOME GNU software
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
-# 02110-1301, USA.
+# along with this program; if not, see <http://www.gnu.org/licenses/>.
#
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
GNU config.sub ($timestamp)
Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free
-Software Foundation, Inc.
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
+Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'`
case $maybe_os in
nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \
- linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
+ linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \
knetbsd*-gnu* | netbsd*-gnu* | \
kopensolaris*-gnu* | \
storm-chaos* | os2-emx* | rtmk-nova*)
os=-$maybe_os
basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`
;;
+ android-linux)
+ os=-linux-android
+ basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'`-unknown
+ ;;
*)
basic_machine=`echo $1 | sed 's/-[^-]*$//'`
if [ $basic_machine != $1 ]
-convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\
-c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \
-harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \
- -apple | -axis | -knuth | -cray | -microblaze)
+ -apple | -axis | -knuth | -cray | -microblaze*)
os=
basic_machine=$1
;;
-isc*)
basic_machine=`echo $1 | sed -e 's/86-.*/86-pc/'`
;;
+ -lynx*178)
+ os=-lynxos178
+ ;;
+ -lynx*5)
+ os=-lynxos5
+ ;;
-lynx*)
os=-lynxos
;;
# Some are omitted here because they have special meanings below.
1750a | 580 \
| a29k \
+ | aarch64 | aarch64_be \
| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] | alphapca5[67] \
| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] | alpha64pca5[67] \
| am33_2.0 \
- | arc | arm | arm[bl]e | arme[lb] | armv[2345] | armv[345][lb] | avr | avr32 \
+ | arc \
+ | arm | arm[bl]e | arme[lb] | armv[2-8] | armv[3-8][lb] | armv7[arm] \
+ | avr | avr32 \
+ | be32 | be64 \
| bfin \
| c4x | clipper \
| d10v | d30v | dlx | dsp16xx \
+ | epiphany \
| fido | fr30 | frv \
| h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
+ | hexagon \
| i370 | i860 | i960 | ia64 \
| ip2k | iq2000 \
+ | le32 | le64 \
| lm32 \
| m32c | m32r | m32rle | m68000 | m68k | m88k \
- | maxq | mb | microblaze | mcore | mep | metag \
+ | maxq | mb | microblaze | microblazeel | mcore | mep | metag \
| mips | mipsbe | mipseb | mipsel | mipsle \
| mips16 \
| mips64 | mips64el \
| pdp10 | pdp11 | pj | pjl \
| powerpc | powerpc64 | powerpc64le | powerpcle \
| pyramid \
- | rx \
+ | rl78 | rx \
| score \
| sh | sh[1234] | sh[24]a | sh[24]aeb | sh[23]e | sh[34]eb | sheb | shbe | shle | sh[1234]le | sh3ele \
| sh64 | sh64le \
| spu \
| tahoe | tic4x | tic54x | tic55x | tic6x | tic80 | tron \
| ubicom32 \
- | v850 | v850e \
+ | v850 | v850e | v850e1 | v850e2 | v850es | v850e2v3 \
| we32k \
| x86 | xc16x | xstormy16 | xtensa \
| z8k | z80)
c6x)
basic_machine=tic6x-unknown
;;
- m6811 | m68hc11 | m6812 | m68hc12 | picochip)
- # Motorola 68HC11/12.
+ m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x | picochip)
basic_machine=$basic_machine-unknown
os=-none
;;
strongarm | thumb | xscale)
basic_machine=arm-unknown
;;
-
+ xgate)
+ basic_machine=$basic_machine-unknown
+ os=-none
+ ;;
xscaleeb)
basic_machine=armeb-unknown
;;
# Recognize the basic CPU types with company name.
580-* \
| a29k-* \
+ | aarch64-* | aarch64_be-* \
| alpha-* | alphaev[4-8]-* | alphaev56-* | alphaev6[78]-* \
| alpha64-* | alpha64ev[4-8]-* | alpha64ev56-* | alpha64ev6[78]-* \
| alphapca5[67]-* | alpha64pca5[67]-* | arc-* \
| arm-* | armbe-* | armle-* | armeb-* | armv*-* \
| avr-* | avr32-* \
+ | be32-* | be64-* \
| bfin-* | bs2000-* \
| c[123]* | c30-* | [cjt]90-* | c4x-* \
| clipper-* | craynv-* | cydra-* \
| f30[01]-* | f700-* | fido-* | fr30-* | frv-* | fx80-* \
| h8300-* | h8500-* \
| hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \
+ | hexagon-* \
| i*86-* | i860-* | i960-* | ia64-* \
| ip2k-* | iq2000-* \
+ | le32-* | le64-* \
| lm32-* \
| m32c-* | m32r-* | m32rle-* \
| m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \
- | m88110-* | m88k-* | maxq-* | mcore-* | metag-* | microblaze-* \
+ | m88110-* | m88k-* | maxq-* | mcore-* | metag-* \
+ | microblaze-* | microblazeel-* \
| mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \
| mips16-* \
| mips64-* | mips64el-* \
| pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \
| powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* \
| pyramid-* \
- | romp-* | rs6000-* | rx-* \
+ | rl78-* | romp-* | rs6000-* | rx-* \
| sh-* | sh[1234]-* | sh[24]a-* | sh[24]aeb-* | sh[23]e-* | sh[34]eb-* | sheb-* | shbe-* \
| shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \
| sparc-* | sparc64-* | sparc64b-* | sparc64v-* | sparc86x-* | sparclet-* \
| sparcv8-* | sparcv9-* | sparcv9b-* | sparcv9v-* | sv1-* | sx?-* \
| tahoe-* \
| tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \
- | tile-* | tilegx-* \
+ | tile*-* \
| tron-* \
| ubicom32-* \
- | v850-* | v850e-* | vax-* \
+ | v850-* | v850e-* | v850e1-* | v850es-* | v850e2-* | v850e2v3-* \
+ | vax-* \
| we32k-* \
| x86-* | x86_64-* | xc16x-* | xps100-* \
| xstormy16-* | xtensa*-* \
i370-ibm* | ibm*)
basic_machine=i370-ibm
;;
-# I'm not sure what "Sysv32" means. Should this be sysv3.2?
i*86v32)
basic_machine=`echo $1 | sed -e 's/86.*/86-pc/'`
os=-sysv32
basic_machine=ns32k-utek
os=-sysv
;;
- microblaze)
+ microblaze*)
basic_machine=microblaze-xilinx
;;
+ mingw64)
+ basic_machine=x86_64-pc
+ os=-mingw64
+ ;;
mingw32)
basic_machine=i386-pc
os=-mingw32
ms1-*)
basic_machine=`echo $basic_machine | sed -e 's/ms1-/mt-/'`
;;
+ msys)
+ basic_machine=i386-pc
+ os=-msys
+ ;;
mvs)
basic_machine=i370-ibm
os=-mvs
;;
+ nacl)
+ basic_machine=le32-unknown
+ os=-nacl
+ ;;
ncr3000)
basic_machine=i486-ncr
os=-sysv4
basic_machine=i586-unknown
os=-pw32
;;
- rdos)
+ rdos | rdos64)
+ basic_machine=x86_64-pc
+ os=-rdos
+ ;;
+ rdos32)
basic_machine=i386-pc
os=-rdos
;;
basic_machine=t90-cray
os=-unicos
;;
- # This must be matched before tile*.
- tilegx*)
- basic_machine=tilegx-unknown
- os=-linux-gnu
- ;;
tile*)
- basic_machine=tile-unknown
+ basic_machine=$basic_machine-unknown
os=-linux-gnu
;;
tx39)
| -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \
| -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \
| -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \
- | -openbsd* | -solidbsd* \
+ | -bitrig* | -openbsd* | -solidbsd* \
| -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \
| -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \
| -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \
| -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \
| -chorusos* | -chorusrdb* | -cegcc* \
- | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
- | -mingw32* | -linux-gnu* | -linux-android* \
- | -linux-newlib* | -linux-uclibc* \
+ | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \
+ | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \
+ | -linux-newlib* | -linux-musl* | -linux-uclibc* \
| -uxpv* | -beos* | -mpeix* | -udk* \
| -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \
| -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \
c4x-* | tic4x-*)
os=-coff
;;
+ hexagon-*)
+ os=-elf
+ ;;
tic54x-*)
os=-coff
;;
;;
m68000-sun)
os=-sunos3
- # This also exists in the configure program, but was not the
- # default.
- # os=-sunos4
;;
m68*-cisco)
os=-aout
#! /bin/sh
-# From configure.in Revision.
+# From configure.ac Revision.
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.65 for GNU MP 5.0.5.
+# Generated by GNU Autoconf 2.69 for GNU MP 5.1.3.
#
# Report bugs to <gmp-bugs@gmplib.org, see http://gmplib.org/manual/Reporting-Bugs.html>.
#
#
#
# Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
-# 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+# 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
#
# This file is part of the GNU MP Library.
#
#
#
#
-# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
-# 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
#
#
# This configure script is free software; the Free Software Foundation
IFS=" "" $as_nl"
# Find who we are. Look in the path if we contain no directory separator.
+as_myself=
case $0 in #((
*[\\/]* ) as_myself=$0 ;;
*) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
# CDPATH.
(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+# Use a proper internal environment variable to ensure we don't fall
+ # into an infinite loop, continuously re-executing ourselves.
+ if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then
+ _as_can_reexec=no; export _as_can_reexec;
+ # We cannot yet assume a decent shell, so we have to provide a
+# neutralization value for shells without unset; and this also
+# works around shells that cannot unset nonexistent variables.
+# Preserve -v and -x to the replacement shell.
+BASH_ENV=/dev/null
+ENV=/dev/null
+(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+case $- in # ((((
+ *v*x* | *x*v* ) as_opts=-vx ;;
+ *v* ) as_opts=-v ;;
+ *x* ) as_opts=-x ;;
+ * ) as_opts= ;;
+esac
+exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
+# Admittedly, this is quite paranoid, since all the known shells bail
+# out after a failed `exec'.
+$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2
+as_fn_exit 255
+ fi
+ # We don't want this to propagate to other subprocesses.
+ { _as_can_reexec=; unset _as_can_reexec;}
if test "x$CONFIG_SHELL" = x; then
as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then :
emulate sh
else
exitcode=1; echo positional parameters were not saved.
fi
-test x\$exitcode = x0 || exit 1"
+test x\$exitcode = x0 || exit 1
+test -x / || exit 1"
as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO
as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO
eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" &&
if test "x$CONFIG_SHELL" != x; then :
- # We cannot yet assume a decent shell, so we have to provide a
- # neutralization value for shells without unset; and this also
- # works around shells that cannot unset nonexistent variables.
- BASH_ENV=/dev/null
- ENV=/dev/null
- (unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
- export CONFIG_SHELL
- exec "$CONFIG_SHELL" "$as_myself" ${1+"$@"}
+ export CONFIG_SHELL
+ # We cannot yet assume a decent shell, so we have to provide a
+# neutralization value for shells without unset; and this also
+# works around shells that cannot unset nonexistent variables.
+# Preserve -v and -x to the replacement shell.
+BASH_ENV=/dev/null
+ENV=/dev/null
+(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+case $- in # ((((
+ *v*x* | *x*v* ) as_opts=-vx ;;
+ *v* ) as_opts=-v ;;
+ *x* ) as_opts=-x ;;
+ * ) as_opts= ;;
+esac
+exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
+# Admittedly, this is quite paranoid, since all the known shells bail
+# out after a failed `exec'.
+$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2
+exit 255
fi
if test x$as_have_required = xno; then :
test -d "$as_dir" && break
done
test -z "$as_dirs" || eval "mkdir $as_dirs"
- } || test -d "$as_dir" || as_fn_error "cannot create directory $as_dir"
+ } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
} # as_fn_mkdir_p
+
+# as_fn_executable_p FILE
+# -----------------------
+# Test if FILE is an executable regular file.
+as_fn_executable_p ()
+{
+ test -f "$1" && test -x "$1"
+} # as_fn_executable_p
# as_fn_append VAR VALUE
# ----------------------
# Append the text in VALUE to the end of the definition contained in VAR. Take
fi # as_fn_arith
-# as_fn_error ERROR [LINENO LOG_FD]
-# ---------------------------------
+# as_fn_error STATUS ERROR [LINENO LOG_FD]
+# ----------------------------------------
# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
-# script with status $?, using 1 if that was 0.
+# script with STATUS, using 1 if that was 0.
as_fn_error ()
{
- as_status=$?; test $as_status -eq 0 && as_status=1
- if test "$3"; then
- as_lineno=${as_lineno-"$2"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- $as_echo "$as_me:${as_lineno-$LINENO}: error: $1" >&$3
+ as_status=$1; test $as_status -eq 0 && as_status=1
+ if test "$4"; then
+ as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
fi
- $as_echo "$as_me: error: $1" >&2
+ $as_echo "$as_me: error: $2" >&2
as_fn_exit $as_status
} # as_fn_error
chmod +x "$as_me.lineno" ||
{ $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; }
+ # If we had to re-execute with $CONFIG_SHELL, we're ensured to have
+ # already done that, so ensure we don't try to do so again and fall
+ # in an infinite loop. This has already happened in practice.
+ _as_can_reexec=no; export _as_can_reexec
# Don't try to exec as it changes $[0], causing all sort of problems
# (the dirname of $[0] is not the place where we might find the
# original and so on. Autoconf is especially sensitive to this).
# ... but there are two gotchas:
# 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
# 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
- # In both cases, we have to default to `cp -p'.
+ # In both cases, we have to default to `cp -pR'.
ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
- as_ln_s='cp -p'
+ as_ln_s='cp -pR'
elif ln conf$$.file conf$$ 2>/dev/null; then
as_ln_s=ln
else
- as_ln_s='cp -p'
+ as_ln_s='cp -pR'
fi
else
- as_ln_s='cp -p'
+ as_ln_s='cp -pR'
fi
rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
rmdir conf$$.dir 2>/dev/null
as_mkdir_p=false
fi
-if test -x / >/dev/null 2>&1; then
- as_test_x='test -x'
-else
- if ls -dL / >/dev/null 2>&1; then
- as_ls_L_option=L
- else
- as_ls_L_option=
- fi
- as_test_x='
- eval sh -c '\''
- if test -d "$1"; then
- test -d "$1/.";
- else
- case $1 in #(
- -*)set "./$1";;
- esac;
- case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #((
- ???[sx]*):;;*)false;;esac;fi
- '\'' sh
- '
-fi
-as_executable_p=$as_test_x
+as_test_x='test -x'
+as_executable_p=as_fn_executable_p
# Sed expression to map a string onto a valid CPP name.
as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
exec 6>&1
# Name of the host.
-# hostname on some systems (SVR3.2, Linux) returns a bogus exit status,
+# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status,
# so uname gets run too.
ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
# Identity of this package.
PACKAGE_NAME='GNU MP'
PACKAGE_TARNAME='gmp'
-PACKAGE_VERSION='5.0.5'
-PACKAGE_STRING='GNU MP 5.0.5'
+PACKAGE_VERSION='5.1.3'
+PACKAGE_STRING='GNU MP 5.1.3'
PACKAGE_BUGREPORT='gmp-bugs@gmplib.org, see http://gmplib.org/manual/Reporting-Bugs.html'
PACKAGE_URL='http://www.gnu.org/software/gmp/'
gmp_srclinks
mpn_objs_in_libgmp
mpn_objects
-mpn_objs_in_libmp
GMP_LIMB_BITS
M4
+TUNE_LIBS
TAL_OBJECT
LIBM
ENABLE_STATIC_FALSE
DUMPBIN
AR
ASMFLAGS
-ANSI2KNR
-U
EGREP
GREP
CXXCPP
HAVE_HOST_CPU_FAMILY_power
ABI
GMP_NAIL_BITS
-WANT_MPBSD_FALSE
-WANT_MPBSD_TRUE
MAINT
MAINTAINER_MODE_FALSE
MAINTAINER_MODE_TRUE
enable_assert
enable_alloca
enable_cxx
+enable_assembly
enable_fft
enable_old_fft_full
-enable_mpbsd
enable_nails
enable_profiling
with_readline
fi
case $ac_option in
- *=*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;;
- *) ac_optarg=yes ;;
+ *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;;
+ *=) ac_optarg= ;;
+ *) ac_optarg=yes ;;
esac
# Accept the important Cygnus configure options, so we can diagnose typos.
ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
# Reject names that are not valid shell variable names.
expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
- as_fn_error "invalid feature name: $ac_useropt"
+ as_fn_error $? "invalid feature name: $ac_useropt"
ac_useropt_orig=$ac_useropt
ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
case $ac_user_opts in
ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
# Reject names that are not valid shell variable names.
expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
- as_fn_error "invalid feature name: $ac_useropt"
+ as_fn_error $? "invalid feature name: $ac_useropt"
ac_useropt_orig=$ac_useropt
ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
case $ac_user_opts in
ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
# Reject names that are not valid shell variable names.
expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
- as_fn_error "invalid package name: $ac_useropt"
+ as_fn_error $? "invalid package name: $ac_useropt"
ac_useropt_orig=$ac_useropt
ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
case $ac_user_opts in
ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'`
# Reject names that are not valid shell variable names.
expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
- as_fn_error "invalid package name: $ac_useropt"
+ as_fn_error $? "invalid package name: $ac_useropt"
ac_useropt_orig=$ac_useropt
ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'`
case $ac_user_opts in
| --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
x_libraries=$ac_optarg ;;
- -*) as_fn_error "unrecognized option: \`$ac_option'
-Try \`$0 --help' for more information."
+ -*) as_fn_error $? "unrecognized option: \`$ac_option'
+Try \`$0 --help' for more information"
;;
*=*)
# Reject names that are not valid shell variable names.
case $ac_envvar in #(
'' | [0-9]* | *[!_$as_cr_alnum]* )
- as_fn_error "invalid variable name: \`$ac_envvar'" ;;
+ as_fn_error $? "invalid variable name: \`$ac_envvar'" ;;
esac
eval $ac_envvar=\$ac_optarg
export $ac_envvar ;;
$as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2
expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null &&
$as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2
- : ${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}
+ : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}"
;;
esac
if test -n "$ac_prev"; then
ac_option=--`echo $ac_prev | sed 's/_/-/g'`
- as_fn_error "missing argument to $ac_option"
+ as_fn_error $? "missing argument to $ac_option"
fi
if test -n "$ac_unrecognized_opts"; then
case $enable_option_checking in
no) ;;
- fatal) as_fn_error "unrecognized options: $ac_unrecognized_opts" ;;
+ fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;;
*) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;;
esac
fi
[\\/$]* | ?:[\\/]* ) continue;;
NONE | '' ) case $ac_var in *prefix ) continue;; esac;;
esac
- as_fn_error "expected an absolute directory name for --$ac_var: $ac_val"
+ as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val"
done
# There might be people who depend on the old broken behavior: `$host'
if test "x$host_alias" != x; then
if test "x$build_alias" = x; then
cross_compiling=maybe
- $as_echo "$as_me: WARNING: If you wanted to set the --build type, don't use --host.
- If a cross compiler is detected then cross compile mode will be used." >&2
elif test "x$build_alias" != "x$host_alias"; then
cross_compiling=yes
fi
ac_pwd=`pwd` && test -n "$ac_pwd" &&
ac_ls_di=`ls -di .` &&
ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` ||
- as_fn_error "working directory cannot be determined"
+ as_fn_error $? "working directory cannot be determined"
test "X$ac_ls_di" = "X$ac_pwd_ls_di" ||
- as_fn_error "pwd does not report name of working directory"
+ as_fn_error $? "pwd does not report name of working directory"
# Find the source files, if location was not specified.
fi
if test ! -r "$srcdir/$ac_unique_file"; then
test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .."
- as_fn_error "cannot find sources ($ac_unique_file) in $srcdir"
+ as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir"
fi
ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work"
ac_abs_confdir=`(
- cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error "$ac_msg"
+ cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg"
pwd)`
# When building in place, set srcdir=.
if test "$ac_abs_confdir" = "$ac_pwd"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures GNU MP 5.0.5 to adapt to many kinds of systems.
+\`configure' configures GNU MP 5.1.3 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
--help=short display options specific to this package
--help=recursive display the short help of all the included packages
-V, --version display version information and exit
- -q, --quiet, --silent do not print \`checking...' messages
+ -q, --quiet, --silent do not print \`checking ...' messages
--cache-file=FILE cache test results in FILE [disabled]
-C, --config-cache alias for \`--cache-file=config.cache'
-n, --no-create do not create output files
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of GNU MP 5.0.5:";;
+ short | recursive ) echo "Configuration of GNU MP 5.1.3:";;
esac
cat <<\_ACEOF
--enable-assert enable ASSERT checking [[default=no]]
--enable-alloca how to get temp memory [[default=reentrant]]
--enable-cxx enable C++ support [[default=no]]
+ --enable-assembly enable the use of assembly loops [[default=yes]]
--enable-fft enable FFTs for multiplication [[default=yes]]
--enable-old-fft-full enable old mpn_mul_fft_full for multiplication
[[default=no]]
- --enable-mpbsd build Berkeley MP compatibility library
- [[default=no]]
--enable-nails use nails on limbs [[default=no]]
--enable-profiling build with profiler support [[default=no]]
--enable-fat build a fat binary on systems that support it
CXXFLAGS C++ compiler flags
CXXCPP C++ preprocessor
M4 m4 macro processor
- YACC The `Yet Another C Compiler' implementation to use. Defaults to
- the first program found out of: `bison -y', `byacc', `yacc'.
+ YACC The `Yet Another Compiler Compiler' implementation to use.
+ Defaults to the first program found out of: `bison -y', `byacc',
+ `yacc'.
YFLAGS The list of arguments that will be passed by default to $YACC.
This script will default YFLAGS to the empty string to avoid a
default value of `-d' given by some make applications.
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-GNU MP configure 5.0.5
-generated by GNU Autoconf 2.65
+GNU MP configure 5.1.3
+generated by GNU Autoconf 2.69
-Copyright (C) 2009 Free Software Foundation, Inc.
+Copyright (C) 2012 Free Software Foundation, Inc.
This configure script is free software; the Free Software Foundation
gives unlimited permission to copy, distribute and modify it.
Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
-2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
ac_retval=1
fi
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
as_fn_set_status $ac_retval
} # ac_fn_c_try_compile
mv -f conftest.er1 conftest.err
fi
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } >/dev/null && {
+ test $ac_status = 0; } > conftest.i && {
test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
test ! -s conftest.err
}; then :
ac_retval=1
fi
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
as_fn_set_status $ac_retval
} # ac_fn_c_try_cpp
ac_retval=1
fi
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
as_fn_set_status $ac_retval
} # ac_fn_cxx_try_compile
mv -f conftest.er1 conftest.err
fi
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } >/dev/null && {
+ test $ac_status = 0; } > conftest.i && {
test -z "$ac_cxx_preproc_warn_flag$ac_cxx_werror_flag" ||
test ! -s conftest.err
}; then :
ac_retval=1
fi
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
as_fn_set_status $ac_retval
} # ac_fn_cxx_try_cpp
-# ac_fn_c_try_run LINENO
-# ----------------------
-# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes
-# that executables *can* be run.
-ac_fn_c_try_run ()
+# ac_fn_c_try_link LINENO
+# -----------------------
+# Try to link conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_link ()
{
as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ rm -f conftest.$ac_objext conftest$ac_exeext
if { { ac_try="$ac_link"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
esac
eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_link") 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } && { ac_try='./conftest$ac_exeext'
- { { case "(($ac_try" in
- *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
- *) ac_try_echo=$ac_try;;
-esac
-eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
-$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_try") 2>&5
+ (eval "$ac_link") 2>conftest.err
ac_status=$?
+ if test -s conftest.err; then
+ grep -v '^ *+' conftest.err >conftest.er1
+ cat conftest.er1 >&5
+ mv -f conftest.er1 conftest.err
+ fi
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; }; then :
+ test $ac_status = 0; } && {
+ test -z "$ac_c_werror_flag" ||
+ test ! -s conftest.err
+ } && test -s conftest$ac_exeext && {
+ test "$cross_compiling" = yes ||
+ test -x conftest$ac_exeext
+ }; then :
ac_retval=0
else
- $as_echo "$as_me: program exited with status $ac_status" >&5
- $as_echo "$as_me: failed program was:" >&5
+ $as_echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
- ac_retval=$ac_status
+ ac_retval=1
fi
+ # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information
+ # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would
+ # interfere with the next link command; also delete a directory that is
+ # left behind by Apple's compiler. We do this before executing the actions.
rm -rf conftest.dSYM conftest_ipa8_conftest.oo
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
as_fn_set_status $ac_retval
-} # ac_fn_c_try_run
-
-# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES
-# -------------------------------------------------------
-# Tests whether HEADER exists, giving a warning if it cannot be compiled using
-# the include files in INCLUDES and setting the cache variable VAR
-# accordingly.
-ac_fn_c_check_header_mongrel ()
-{
- as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
-$as_echo_n "checking for $2... " >&6; }
-if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
- $as_echo_n "(cached) " >&6
-fi
-eval ac_res=\$$3
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
-$as_echo "$ac_res" >&6; }
-else
- # Is the header compilable?
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5
-$as_echo_n "checking $2 usability... " >&6; }
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-$4
-#include <$2>
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
- ac_header_compiler=yes
-else
- ac_header_compiler=no
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5
-$as_echo "$ac_header_compiler" >&6; }
-
-# Is the header present?
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5
-$as_echo_n "checking $2 presence... " >&6; }
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <$2>
-_ACEOF
-if ac_fn_c_try_cpp "$LINENO"; then :
- ac_header_preproc=yes
-else
- ac_header_preproc=no
-fi
-rm -f conftest.err conftest.$ac_ext
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
-$as_echo "$ac_header_preproc" >&6; }
-
-# So? What about this header?
-case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #((
- yes:no: )
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5
-$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;}
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
-$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
- ;;
- no:yes:* )
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5
-$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;}
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5
-$as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;}
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5
-$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;}
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5
-$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;}
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
-$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
-( cat <<\_ASBOX
-## ssssssssssssssssssssssssssssssssss ##
-## Report this to gmp-bugs@gmplib.org ##
-## ssssssssssssssssssssssssssssssssss ##
-_ASBOX
- ) | sed "s/^/$as_me: WARNING: /" >&2
- ;;
-esac
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
-$as_echo_n "checking for $2... " >&6; }
-if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
- $as_echo_n "(cached) " >&6
-else
- eval "$3=\$ac_header_compiler"
-fi
-eval ac_res=\$$3
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
-$as_echo "$ac_res" >&6; }
-fi
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
-
-} # ac_fn_c_check_header_mongrel
+} # ac_fn_c_try_link
# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES
# -------------------------------------------------------
as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
$as_echo_n "checking for $2... " >&6; }
-if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+if eval \${$3+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
eval ac_res=\$$3
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
$as_echo "$ac_res" >&6; }
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
} # ac_fn_c_check_header_compile
-# ac_fn_c_try_link LINENO
-# -----------------------
-# Try to link conftest.$ac_ext, and return whether this succeeded.
-ac_fn_c_try_link ()
+# ac_fn_c_try_run LINENO
+# ----------------------
+# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes
+# that executables *can* be run.
+ac_fn_c_try_run ()
{
as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- rm -f conftest.$ac_objext conftest$ac_exeext
if { { ac_try="$ac_link"
case "(($ac_try" in
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
esac
eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
$as_echo "$ac_try_echo"; } >&5
- (eval "$ac_link") 2>conftest.err
+ (eval "$ac_link") 2>&5
ac_status=$?
- if test -s conftest.err; then
- grep -v '^ *+' conftest.err >conftest.er1
- cat conftest.er1 >&5
- mv -f conftest.er1 conftest.err
- fi
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; } && {
- test -z "$ac_c_werror_flag" ||
- test ! -s conftest.err
- } && test -s conftest$ac_exeext && {
- test "$cross_compiling" = yes ||
- $as_test_x conftest$ac_exeext
- }; then :
+ test $ac_status = 0; } && { ac_try='./conftest$ac_exeext'
+ { { case "(($ac_try" in
+ *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+ *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+$as_echo "$ac_try_echo"; } >&5
+ (eval "$ac_try") 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
ac_retval=0
else
- $as_echo "$as_me: failed program was:" >&5
+ $as_echo "$as_me: program exited with status $ac_status" >&5
+ $as_echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
- ac_retval=1
+ ac_retval=$ac_status
fi
- # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information
- # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would
- # interfere with the next link command; also delete a directory that is
- # left behind by Apple's compiler. We do this before executing the actions.
rm -rf conftest.dSYM conftest_ipa8_conftest.oo
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
as_fn_set_status $ac_retval
-} # ac_fn_c_try_link
+} # ac_fn_c_try_run
# ac_fn_c_check_func LINENO FUNC VAR
# ----------------------------------
as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
$as_echo_n "checking for $2... " >&6; }
-if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+if eval \${$3+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
eval ac_res=\$$3
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
$as_echo "$ac_res" >&6; }
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
} # ac_fn_c_check_func
test ! -s conftest.err
} && test -s conftest$ac_exeext && {
test "$cross_compiling" = yes ||
- $as_test_x conftest$ac_exeext
+ test -x conftest$ac_exeext
}; then :
ac_retval=0
else
# interfere with the next link command; also delete a directory that is
# left behind by Apple's compiler. We do this before executing the actions.
rm -rf conftest.dSYM conftest_ipa8_conftest.oo
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
as_fn_set_status $ac_retval
} # ac_fn_cxx_try_link
test ! -s conftest.err
} && test -s conftest$ac_exeext && {
test "$cross_compiling" = yes ||
- $as_test_x conftest$ac_exeext
+ test -x conftest$ac_exeext
}; then :
ac_retval=0
else
# interfere with the next link command; also delete a directory that is
# left behind by Apple's compiler. We do this before executing the actions.
rm -rf conftest.dSYM conftest_ipa8_conftest.oo
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
as_fn_set_status $ac_retval
} # ac_fn_f77_try_link
-# ac_fn_c_check_decl LINENO SYMBOL VAR
-# ------------------------------------
-# Tests whether SYMBOL is declared, setting cache variable VAR accordingly.
+# ac_fn_c_check_header_mongrel LINENO HEADER VAR INCLUDES
+# -------------------------------------------------------
+# Tests whether HEADER exists, giving a warning if it cannot be compiled using
+# the include files in INCLUDES and setting the cache variable VAR
+# accordingly.
+ac_fn_c_check_header_mongrel ()
+{
+ as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ if eval \${$3+:} false; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+ $as_echo_n "(cached) " >&6
+fi
+eval ac_res=\$$3
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+else
+ # Is the header compilable?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 usability" >&5
+$as_echo_n "checking $2 usability... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+$4
+#include <$2>
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_header_compiler=yes
+else
+ ac_header_compiler=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_compiler" >&5
+$as_echo "$ac_header_compiler" >&6; }
+
+# Is the header present?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking $2 presence" >&5
+$as_echo_n "checking $2 presence... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <$2>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"; then :
+ ac_header_preproc=yes
+else
+ ac_header_preproc=no
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
+$as_echo "$ac_header_preproc" >&6; }
+
+# So? What about this header?
+case $ac_header_compiler:$ac_header_preproc:$ac_c_preproc_warn_flag in #((
+ yes:no: )
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&5
+$as_echo "$as_me: WARNING: $2: accepted by the compiler, rejected by the preprocessor!" >&2;}
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
+ ;;
+ no:yes:* )
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: present but cannot be compiled" >&5
+$as_echo "$as_me: WARNING: $2: present but cannot be compiled" >&2;}
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: check for missing prerequisite headers?" >&5
+$as_echo "$as_me: WARNING: $2: check for missing prerequisite headers?" >&2;}
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: see the Autoconf documentation" >&5
+$as_echo "$as_me: WARNING: $2: see the Autoconf documentation" >&2;}
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&5
+$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;}
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
+$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
+( $as_echo "## ssssssssssssssssssssssssssssssssss ##
+## Report this to gmp-bugs@gmplib.org ##
+## ssssssssssssssssssssssssssssssssss ##"
+ ) | sed "s/^/$as_me: WARNING: /" >&2
+ ;;
+esac
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+$as_echo_n "checking for $2... " >&6; }
+if eval \${$3+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ eval "$3=\$ac_header_compiler"
+fi
+eval ac_res=\$$3
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+fi
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_header_mongrel
+
+# ac_fn_c_check_decl LINENO SYMBOL VAR INCLUDES
+# ---------------------------------------------
+# Tests whether SYMBOL is declared in INCLUDES, setting cache variable VAR
+# accordingly.
ac_fn_c_check_decl ()
{
as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $2 is declared" >&5
-$as_echo_n "checking whether $2 is declared... " >&6; }
-if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+ as_decl_name=`echo $2|sed 's/ *(.*//'`
+ as_decl_use=`echo $2|sed -e 's/(/((/' -e 's/)/) 0&/' -e 's/,/) 0& (/g'`
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $as_decl_name is declared" >&5
+$as_echo_n "checking whether $as_decl_name is declared... " >&6; }
+if eval \${$3+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
int
main ()
{
-#ifndef $2
- (void) $2;
+#ifndef $as_decl_name
+#ifdef __cplusplus
+ (void) $as_decl_use;
+#else
+ (void) $as_decl_name;
+#endif
#endif
;
eval ac_res=\$$3
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
$as_echo "$ac_res" >&6; }
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
} # ac_fn_c_check_decl
as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
$as_echo_n "checking for $2... " >&6; }
-if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+if eval \${$3+:} false; then :
$as_echo_n "(cached) " >&6
else
eval "$3=no"
eval ac_res=\$$3
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
$as_echo "$ac_res" >&6; }
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
} # ac_fn_c_check_type
as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2.$3" >&5
$as_echo_n "checking for $2.$3... " >&6; }
-if { as_var=$4; eval "test \"\${$as_var+set}\" = set"; }; then :
+if eval \${$4+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
eval ac_res=\$$4
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
$as_echo "$ac_res" >&6; }
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
} # ac_fn_c_check_member
ac_fn_cxx_check_header_mongrel ()
{
as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+ if eval \${$3+:} false; then :
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
$as_echo_n "checking for $2... " >&6; }
-if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+if eval \${$3+:} false; then :
$as_echo_n "(cached) " >&6
fi
eval ac_res=\$$3
else
ac_header_preproc=no
fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_header_preproc" >&5
$as_echo "$ac_header_preproc" >&6; }
$as_echo "$as_me: WARNING: $2: section \"Present But Cannot Be Compiled\"" >&2;}
{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $2: proceeding with the compiler's result" >&5
$as_echo "$as_me: WARNING: $2: proceeding with the compiler's result" >&2;}
-( cat <<\_ASBOX
-## ssssssssssssssssssssssssssssssssss ##
+( $as_echo "## ssssssssssssssssssssssssssssssssss ##
## Report this to gmp-bugs@gmplib.org ##
-## ssssssssssssssssssssssssssssssssss ##
-_ASBOX
+## ssssssssssssssssssssssssssssssssss ##"
) | sed "s/^/$as_me: WARNING: /" >&2
;;
esac
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
$as_echo_n "checking for $2... " >&6; }
-if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+if eval \${$3+:} false; then :
$as_echo_n "(cached) " >&6
else
eval "$3=\$ac_header_compiler"
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
$as_echo "$ac_res" >&6; }
fi
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
} # ac_fn_cxx_check_header_mongrel
as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
$as_echo_n "checking for $2... " >&6; }
-if { as_var=$3; eval "test \"\${$as_var+set}\" = set"; }; then :
+if eval \${$3+:} false; then :
$as_echo_n "(cached) " >&6
else
eval "$3=no"
eval ac_res=\$$3
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
$as_echo "$ac_res" >&6; }
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
} # ac_fn_cxx_check_type
main ()
{
static int test_array [1 - 2 * !(($2) >= 0)];
-test_array [0] = 0
+test_array [0] = 0;
+return test_array [0];
;
return 0;
main ()
{
static int test_array [1 - 2 * !(($2) <= $ac_mid)];
-test_array [0] = 0
+test_array [0] = 0;
+return test_array [0];
;
return 0;
main ()
{
static int test_array [1 - 2 * !(($2) < 0)];
-test_array [0] = 0
+test_array [0] = 0;
+return test_array [0];
;
return 0;
main ()
{
static int test_array [1 - 2 * !(($2) >= $ac_mid)];
-test_array [0] = 0
+test_array [0] = 0;
+return test_array [0];
;
return 0;
main ()
{
static int test_array [1 - 2 * !(($2) <= $ac_mid)];
-test_array [0] = 0
+test_array [0] = 0;
+return test_array [0];
;
return 0;
rm -f conftest.val
fi
- eval $as_lineno_stack; test "x$as_lineno_stack" = x && { as_lineno=; unset as_lineno;}
+ eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
as_fn_set_status $ac_retval
} # ac_fn_c_compute_int
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by GNU MP $as_me 5.0.5, which was
-generated by GNU Autoconf 2.65. Invocation command line was
+It was created by GNU MP $as_me 5.1.3, which was
+generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
{
echo
- cat <<\_ASBOX
-## ---------------- ##
+ $as_echo "## ---------------- ##
## Cache variables. ##
-## ---------------- ##
-_ASBOX
+## ---------------- ##"
echo
# The following way of writing the cache mishandles newlines in values,
(
)
echo
- cat <<\_ASBOX
-## ----------------- ##
+ $as_echo "## ----------------- ##
## Output variables. ##
-## ----------------- ##
-_ASBOX
+## ----------------- ##"
echo
for ac_var in $ac_subst_vars
do
echo
if test -n "$ac_subst_files"; then
- cat <<\_ASBOX
-## ------------------- ##
+ $as_echo "## ------------------- ##
## File substitutions. ##
-## ------------------- ##
-_ASBOX
+## ------------------- ##"
echo
for ac_var in $ac_subst_files
do
fi
if test -s confdefs.h; then
- cat <<\_ASBOX
-## ----------- ##
+ $as_echo "## ----------- ##
## confdefs.h. ##
-## ----------- ##
-_ASBOX
+## ----------- ##"
echo
cat confdefs.h
echo
ac_site_file1=NONE
ac_site_file2=NONE
if test -n "$CONFIG_SITE"; then
- ac_site_file1=$CONFIG_SITE
+ # We do not want a PATH search for config.site.
+ case $CONFIG_SITE in #((
+ -*) ac_site_file1=./$CONFIG_SITE;;
+ */*) ac_site_file1=$CONFIG_SITE;;
+ *) ac_site_file1=./$CONFIG_SITE;;
+ esac
elif test "x$prefix" != xNONE; then
ac_site_file1=$prefix/share/config.site
ac_site_file2=$prefix/etc/config.site
{ $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5
$as_echo "$as_me: loading site script $ac_site_file" >&6;}
sed 's/^/| /' "$ac_site_file" >&5
- . "$ac_site_file"
+ . "$ac_site_file" \
+ || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "failed to load site script $ac_site_file
+See \`config.log' for more details" "$LINENO" 5; }
fi
done
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
{ $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5
$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;}
- as_fn_error "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5
+ as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5
fi
## -------------------- ##
## Main body of script. ##
+
# If --target is not used then $target_alias is empty, but if say
# "./configure athlon-pc-freebsd3.5" is used, then all three of
# $build_alias, $host_alias and $target_alias are set to
# "athlon-pc-freebsd3.5".
#
if test -n "$target_alias" && test "$target_alias" != "$host_alias"; then
- as_fn_error "--target is not appropriate for GMP
+ as_fn_error $? "--target is not appropriate for GMP
Use --build=CPU-VENDOR-OS if you need to specify your CPU and/or system
explicitly. Use --host if cross-compiling (see \"Installing GMP\" in the
manual for more on this)." "$LINENO" 5
ac_aux_dir=
for ac_dir in "$srcdir" "$srcdir/.." "$srcdir/../.."; do
- for ac_t in install-sh install.sh shtool; do
- if test -f "$ac_dir/$ac_t"; then
- ac_aux_dir=$ac_dir
- ac_install_sh="$ac_aux_dir/$ac_t -c"
- break 2
- fi
- done
+ if test -f "$ac_dir/install-sh"; then
+ ac_aux_dir=$ac_dir
+ ac_install_sh="$ac_aux_dir/install-sh -c"
+ break
+ elif test -f "$ac_dir/install.sh"; then
+ ac_aux_dir=$ac_dir
+ ac_install_sh="$ac_aux_dir/install.sh -c"
+ break
+ elif test -f "$ac_dir/shtool"; then
+ ac_aux_dir=$ac_dir
+ ac_install_sh="$ac_aux_dir/shtool install -c"
+ break
+ fi
done
if test -z "$ac_aux_dir"; then
- as_fn_error "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5
+ as_fn_error $? "cannot find install-sh, install.sh, or shtool in \"$srcdir\" \"$srcdir/..\" \"$srcdir/../..\"" "$LINENO" 5
fi
# These three variables are undocumented and unsupported,
# Make sure we can run config.sub.
$SHELL "$ac_aux_dir/config.sub" sun4 >/dev/null 2>&1 ||
- as_fn_error "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5
+ as_fn_error $? "cannot run $SHELL $ac_aux_dir/config.sub" "$LINENO" 5
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking build system type" >&5
$as_echo_n "checking build system type... " >&6; }
-if test "${ac_cv_build+set}" = set; then :
+if ${ac_cv_build+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_build_alias=$build_alias
test "x$ac_build_alias" = x &&
ac_build_alias=`$SHELL "$ac_aux_dir/config.guess"`
test "x$ac_build_alias" = x &&
- as_fn_error "cannot guess build type; you must specify one" "$LINENO" 5
+ as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5
ac_cv_build=`$SHELL "$ac_aux_dir/config.sub" $ac_build_alias` ||
- as_fn_error "$SHELL $ac_aux_dir/config.sub $ac_build_alias failed" "$LINENO" 5
+ as_fn_error $? "$SHELL $ac_aux_dir/config.sub $ac_build_alias failed" "$LINENO" 5
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5
$as_echo "$ac_cv_build" >&6; }
case $ac_cv_build in
*-*-*) ;;
-*) as_fn_error "invalid value of canonical build" "$LINENO" 5;;
+*) as_fn_error $? "invalid value of canonical build" "$LINENO" 5;;
esac
build=$ac_cv_build
ac_save_IFS=$IFS; IFS='-'
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking host system type" >&5
$as_echo_n "checking host system type... " >&6; }
-if test "${ac_cv_host+set}" = set; then :
+if ${ac_cv_host+:} false; then :
$as_echo_n "(cached) " >&6
else
if test "x$host_alias" = x; then
ac_cv_host=$ac_cv_build
else
ac_cv_host=`$SHELL "$ac_aux_dir/config.sub" $host_alias` ||
- as_fn_error "$SHELL $ac_aux_dir/config.sub $host_alias failed" "$LINENO" 5
+ as_fn_error $? "$SHELL $ac_aux_dir/config.sub $host_alias failed" "$LINENO" 5
fi
fi
$as_echo "$ac_cv_host" >&6; }
case $ac_cv_host in
*-*-*) ;;
-*) as_fn_error "invalid value of canonical host" "$LINENO" 5;;
+*) as_fn_error $? "invalid value of canonical host" "$LINENO" 5;;
esac
host=$ac_cv_host
ac_save_IFS=$IFS; IFS='-'
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible install" >&5
$as_echo_n "checking for a BSD-compatible install... " >&6; }
if test -z "$INSTALL"; then
-if test "${ac_cv_path_install+set}" = set; then :
+if ${ac_cv_path_install+:} false; then :
$as_echo_n "(cached) " >&6
else
as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
# by default.
for ac_prog in ginstall scoinst install; do
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_prog$ac_exec_ext" && $as_test_x "$as_dir/$ac_prog$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext"; then
if test $ac_prog = install &&
grep dspmsg "$as_dir/$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
# AIX install. It has an incompatible calling convention.
'
case `pwd` in
*[\\\"\#\$\&\'\`$am_lf]*)
- as_fn_error "unsafe absolute working directory name" "$LINENO" 5;;
+ as_fn_error $? "unsafe absolute working directory name" "$LINENO" 5;;
esac
case $srcdir in
*[\\\"\#\$\&\'\`$am_lf\ \ ]*)
- as_fn_error "unsafe srcdir value: \`$srcdir'" "$LINENO" 5;;
+ as_fn_error $? "unsafe srcdir value: \`$srcdir'" "$LINENO" 5;;
esac
# Do `set' in a subshell so we don't clobber the current shell's
# if, for instance, CONFIG_SHELL is bash and it inherits a
# broken ls alias from the environment. This has actually
# happened. Such a system could not be considered "sane".
- as_fn_error "ls -t appears to fail. Make sure there is not a broken
+ as_fn_error $? "ls -t appears to fail. Make sure there is not a broken
alias in your environment" "$LINENO" 5
fi
# Ok.
:
else
- as_fn_error "newly created file is older than distributed files!
+ as_fn_error $? "newly created file is older than distributed files!
Check your system clock" "$LINENO" 5
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
set dummy ${ac_tool_prefix}strip; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_STRIP+set}" = set; then :
+if ${ac_cv_prog_STRIP+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$STRIP"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_STRIP="${ac_tool_prefix}strip"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy strip; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_STRIP+set}" = set; then :
+if ${ac_cv_prog_ac_ct_STRIP+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_STRIP"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_ac_ct_STRIP="strip"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a thread-safe mkdir -p" >&5
$as_echo_n "checking for a thread-safe mkdir -p... " >&6; }
if test -z "$MKDIR_P"; then
- if test "${ac_cv_path_mkdir+set}" = set; then :
+ if ${ac_cv_path_mkdir+:} false; then :
$as_echo_n "(cached) " >&6
else
as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
test -z "$as_dir" && as_dir=.
for ac_prog in mkdir gmkdir; do
for ac_exec_ext in '' $ac_executable_extensions; do
- { test -f "$as_dir/$ac_prog$ac_exec_ext" && $as_test_x "$as_dir/$ac_prog$ac_exec_ext"; } || continue
+ as_fn_executable_p "$as_dir/$ac_prog$ac_exec_ext" || continue
case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #(
'mkdir (GNU coreutils) '* | \
'mkdir (coreutils) '* | \
set dummy $ac_prog; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_AWK+set}" = set; then :
+if ${ac_cv_prog_AWK+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$AWK"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_AWK="$ac_prog"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
$as_echo_n "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; }
set x ${MAKE-make}
ac_make=`$as_echo "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'`
-if { as_var=ac_cv_prog_make_${ac_make}_set; eval "test \"\${$as_var+set}\" = set"; }; then :
+if eval \${ac_cv_prog_make_${ac_make}_set+:} false; then :
$as_echo_n "(cached) " >&6
else
cat >conftest.make <<\_ACEOF
all:
@echo '@@@%%%=$(MAKE)=@@@%%%'
_ACEOF
-# GNU make sometimes prints "make[1]: Entering...", which would confuse us.
+# GNU make sometimes prints "make[1]: Entering ...", which would confuse us.
case `${MAKE-make} -f conftest.make 2>/dev/null` in
*@@@%%%=?*=@@@%%%*)
eval ac_cv_prog_make_${ac_make}_set=yes;;
am__isrc=' -I$(srcdir)'
# test to see if srcdir already configured
if test -f $srcdir/config.status; then
- as_fn_error "source directory already configured; run \"make distclean\" there first" "$LINENO" 5
+ as_fn_error $? "source directory already configured; run \"make distclean\" there first" "$LINENO" 5
fi
fi
# Define the identity of the package.
PACKAGE='gmp'
- VERSION='5.0.5'
+ VERSION='5.1.3'
cat >>confdefs.h <<_ACEOF
# We need awk for the "check" target. The system "awk" is bad on
# some platforms.
-# Always define AMTAR for backward compatibility.
-
-AMTAR=${AMTAR-"${am_missing_run}tar"}
+# Always define AMTAR for backward compatibility. Yes, it's still used
+# in the wild :-( We should find a proper way to deprecate it ...
+AMTAR='$${TAR-tar}'
-am__tar='${AMTAR} chof - "$$tardir"'; am__untar='${AMTAR} xf -'
+am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'
if test "${enable_assert+set}" = set; then :
enableval=$enable_assert; case $enableval in
yes|no) ;;
-*) as_fn_error "bad value $enableval for --enable-assert, need yes or no" "$LINENO" 5 ;;
+*) as_fn_error $? "bad value $enableval for --enable-assert, need yes or no" "$LINENO" 5 ;;
esac
else
enable_assert=no
yes|no|reentrant|notreentrant) ;;
debug) ;;
*)
- as_fn_error "bad value $enableval for --enable-alloca, need one of:
+ as_fn_error $? "bad value $enableval for --enable-alloca, need one of:
yes no reentrant notreentrant alloca malloc-reentrant malloc-notreentrant debug" "$LINENO" 5 ;;
esac
else
if test "${enable_cxx+set}" = set; then :
enableval=$enable_cxx; case $enableval in
yes|no|detect) ;;
-*) as_fn_error "bad value $enableval for --enable-cxx, need yes/no/detect" "$LINENO" 5 ;;
+*) as_fn_error $? "bad value $enableval for --enable-cxx, need yes/no/detect" "$LINENO" 5 ;;
esac
else
enable_cxx=no
-# Check whether --enable-fft was given.
-if test "${enable_fft+set}" = set; then :
- enableval=$enable_fft; case $enableval in
+# Check whether --enable-assembly was given.
+if test "${enable_assembly+set}" = set; then :
+ enableval=$enable_assembly; case $enableval in
yes|no) ;;
-*) as_fn_error "bad value $enableval for --enable-fft, need yes or no" "$LINENO" 5 ;;
+*) as_fn_error $? "bad value $enableval for --enable-assembly, need yes or no" "$LINENO" 5 ;;
esac
else
- enable_fft=yes
+ enable_assembly=yes
fi
-if test "$enable_fft" = "yes"; then
+if test "$enable_assembly" = "yes"; then
-$as_echo "#define WANT_FFT 1" >>confdefs.h
+$as_echo "#define WANT_ASSEMBLY 1" >>confdefs.h
fi
-# Check whether --enable-old-fft-full was given.
-if test "${enable_old_fft_full+set}" = set; then :
- enableval=$enable_old_fft_full; case $enableval in
+# Check whether --enable-fft was given.
+if test "${enable_fft+set}" = set; then :
+ enableval=$enable_fft; case $enableval in
yes|no) ;;
-*) as_fn_error "bad value $enableval for --enable-old-fft-full, need yes or no" "$LINENO" 5 ;;
+*) as_fn_error $? "bad value $enableval for --enable-fft, need yes or no" "$LINENO" 5 ;;
esac
else
- enable_old_fft_full=no
+ enable_fft=yes
fi
-if test "$enable_old_fft_full" = "yes"; then
+if test "$enable_fft" = "yes"; then
-$as_echo "#define WANT_OLD_FFT_FULL 1" >>confdefs.h
+$as_echo "#define WANT_FFT 1" >>confdefs.h
fi
-# Check whether --enable-mpbsd was given.
-if test "${enable_mpbsd+set}" = set; then :
- enableval=$enable_mpbsd; case $enableval in
+# Check whether --enable-old-fft-full was given.
+if test "${enable_old_fft_full+set}" = set; then :
+ enableval=$enable_old_fft_full; case $enableval in
yes|no) ;;
-*) as_fn_error "bad value $enableval for --enable-mpbsd, need yes or no" "$LINENO" 5 ;;
+*) as_fn_error $? "bad value $enableval for --enable-old-fft-full, need yes or no" "$LINENO" 5 ;;
esac
else
- enable_mpbsd=no
-fi
-
- if test "$enable_mpbsd" = "yes"; then
- WANT_MPBSD_TRUE=
- WANT_MPBSD_FALSE='#'
-else
- WANT_MPBSD_TRUE='#'
- WANT_MPBSD_FALSE=
+ enable_old_fft_full=no
fi
+if test "$enable_old_fft_full" = "yes"; then
+
+$as_echo "#define WANT_OLD_FFT_FULL 1" >>confdefs.h
+
+fi
+
# Check whether --enable-nails was given.
if test "${enable_nails+set}" = set; then :
enableval=$enable_nails; case $enableval in
yes|no|[02468]|[0-9][02468]) ;;
*[13579])
- as_fn_error "bad value $enableval for --enable-nails, only even nail sizes supported" "$LINENO" 5 ;;
+ as_fn_error $? "bad value $enableval for --enable-nails, only even nail sizes supported" "$LINENO" 5 ;;
*)
- as_fn_error "bad value $enableval for --enable-nails, need yes/no/number" "$LINENO" 5 ;;
+ as_fn_error $? "bad value $enableval for --enable-nails, need yes/no/number" "$LINENO" 5 ;;
esac
else
enable_nails=no
if test "${enable_profiling+set}" = set; then :
enableval=$enable_profiling; case $enableval in
no|prof|gprof|instrument) ;;
-*) as_fn_error "bad value $enableval for --enable-profiling, need no/prof/gprof/instrument" "$LINENO" 5 ;;
+*) as_fn_error $? "bad value $enableval for --enable-profiling, need no/prof/gprof/instrument" "$LINENO" 5 ;;
esac
else
enable_profiling=no
if test "${with_readline+set}" = set; then :
withval=$with_readline; case $withval in
yes|no|detect) ;;
-*) as_fn_error "bad value $withval for --with-readline, need yes/no/detect" "$LINENO" 5 ;;
+*) as_fn_error $? "bad value $withval for --with-readline, need yes/no/detect" "$LINENO" 5 ;;
esac
else
with_readline=detect
if test "${enable_fat+set}" = set; then :
enableval=$enable_fat; case $enableval in
yes|no) ;;
-*) as_fn_error "bad value $enableval for --enable-fat, need yes or no" "$LINENO" 5 ;;
+*) as_fn_error $? "bad value $enableval for --enable-fat, need yes or no" "$LINENO" 5 ;;
esac
else
enable_fat=no
if test "${enable_minithres+set}" = set; then :
enableval=$enable_minithres; case $enableval in
yes|no) ;;
-*) as_fn_error "bad value $enableval for --enable-minithres, need yes or no" "$LINENO" 5 ;;
+*) as_fn_error $? "bad value $enableval for --enable-minithres, need yes or no" "$LINENO" 5 ;;
esac
else
enable_minithres=no
+if test $enable_fat = yes && test $enable_assembly = no ; then
+ as_fn_error $? "when doing a fat build, disabling assembly will not work" "$LINENO" 5
+fi
+
tmp_host=`echo $host_cpu | sed 's/\./_/'`
cat >>confdefs.h <<_ACEOF
arm*-*-*)
- path="arm"
gcc_cflags="$gcc_cflags $fomit_frame_pointer"
+ gcc_cflags_optlist="arch tune"
+ gcc_cflags_maybe="-marm"
gcc_testlist="gcc-arm-umodsi"
echo "include_mpn(\`arm/arm-defs.m4')" >> $gmp_tmpconfigm4i
+ CALLING_CONVENTIONS_OBJS='arm32call.lo arm32check.lo'
+
+ case $host_cpu in
+ armsa1 | armv4*)
+ path="arm"
+ gcc_cflags_arch="-march=armv4"
+ ;;
+ armxscale | arm926 | arm946 | arm966 | arm1026 | armv5*)
+ path="arm/v5 arm"
+ gcc_cflags_arch="-march=armv5"
+ ;;
+ arm11mpcore | arm1136 | arm1176 | armv6*)
+ path="arm/v6 arm/v5 arm"
+ gcc_cflags_arch="-march=armv6"
+ ;;
+ arm1156)
+ path="arm/v6t2 arm/v6 arm/v5 arm"
+ gcc_cflags_arch="-march=armv6t2"
+ ;;
+ armcortexa9)
+ path="arm/v7a/cora9 arm/v6t2 arm/v6 arm/v5 arm"
+ gcc_cflags_arch="-march=armv7-a"
+ gcc_cflags_tune="-mtune=cortex-a9"
+ ;;
+ armcortexa15)
+ path="arm/v7a/cora15 arm/v6t2 arm/v6 arm/v5 arm"
+ gcc_cflags_arch="-march=armv7-a"
+ gcc_cflags_tune="-mtune=cortex-a15 -mtune=cortex-a9"
+ ;;
+ armcortexa5 | armcortexa8 | armv7a*)
+ path="arm/v6t2 arm/v6 arm/v5 arm"
+ gcc_cflags_arch="-march=armv7-a"
+ ;;
+ *)
+ path="arm"
+ ;;
+ esac
;;
# -mpa-risc-2-0 is only an optional flag, in case an old gcc is
# used. Assembler support for 2.0 is essential though, for our asm
# files.
- gcc_20n_cflags="-O2"
+ gcc_20n_cflags="$gcc_cflags"
gcc_20n_cflags_optlist="arch"
gcc_20n_cflags_arch="-mpa-risc-2-0 -mpa-risc-1-1"
gcc_20n_testlist="sizeof-long-4 hppa-level-2.0"
esac
cclist_20w="gcc cc"
- gcc_20w_cflags="-O2 -mpa-risc-2-0"
+ gcc_20w_cflags="$gcc_cflags -mpa-risc-2-0"
cc_20w_cflags="+DD64 +O2"
cc_20w_testlist="hpc-hppa-2-0"
path_20w="pa64"
echo "include_mpn(\`ia64/ia64-defs.m4')" >> $gmp_tmpconfigm4i
SPEED_CYCLECOUNTER_OBJ=ia64.lo
+ any_32_testlist="sizeof-long-4"
case $host_cpu in
itanium) path="ia64/itanium ia64" ;;
# let us use whatever seems to work.
#
abilist="32 64"
+ any_64_testlist="sizeof-long-8"
cclist_32="gcc cc"
path_32="ia64"
cc_32_cflags=""
cc_32_cflags_optlist="opt"
cc_32_cflags_opt="+O3 +O2 +O1"
- gcc_32_cflags="-milp32 -O2"
+ gcc_32_cflags="$gcc_cflags -milp32"
limb_32=longlong
SPEED_CYCLECOUNTER_OBJ_32=ia64.lo
cyclecounter_size_32=2
cc_64_cppflags="+DD64"
cc_64_cflags_optlist="opt"
cc_64_cflags_opt="+O3 +O2 +O1"
- gcc_64_cflags="$gcc_64_cflags -mlp64"
+ gcc_64_cflags="$gcc_cflags -mlp64"
;;
esac
;;
abilist="n32 64 o32"
cclist_n32="gcc cc"
- gcc_n32_cflags="-O2 -mabi=n32"
+ gcc_n32_cflags="$gcc_cflags -mabi=n32"
cc_n32_cflags="-O2 -n32" # no -g, it disables all optimizations
limb_n32=longlong
path_n32="mips64"
cclist_64="gcc cc"
- gcc_64_cflags="$gcc_64_cflags -mabi=64"
+ gcc_64_cflags="$gcc_cflags -mabi=64"
gcc_64_ldflags="-Wc,-mabi=64"
cc_64_cflags="-O2 -64" # no -g, it disables all optimizations
cc_64_ldflags="-Wc,-64"
*-*-aix*)
# On AIX a true 64-bit ABI is available.
# Need -Wc to pass object type flags through to the linker.
- abilist="aix64 $abilist"
- cclist_aix64="gcc xlc"
- gcc_aix64_cflags="-O2 -maix64 -mpowerpc64"
- gcc_aix64_cflags_optlist="cpu"
- gcc_aix64_ldflags="-Wc,-maix64"
- xlc_aix64_cflags="-O2 -q64 -qmaxmem=20000"
- xlc_aix64_cflags_optlist="arch"
- xlc_aix64_ldflags="-Wc,-q64"
+ abilist="mode64 $abilist"
+ cclist_mode64="gcc xlc"
+ gcc_mode64_cflags="$gcc_cflags -maix64 -mpowerpc64"
+ gcc_mode64_cflags_optlist="cpu"
+ gcc_mode64_ldflags="-Wc,-maix64"
+ xlc_mode64_cflags="-O2 -q64 -qmaxmem=20000"
+ xlc_mode64_cflags_optlist="arch"
+ xlc_mode64_ldflags="-Wc,-q64"
# Must indicate object type to ar and nm
- ar_aix64_flags="-X64"
- nm_aix64_flags="-X64"
- path_aix64=""
- for i in $cpu_path; do path_aix64="${path_aix64}powerpc64/mode64/$i "; done
- path_aix64="${path_aix64}powerpc64/mode64 $vmx_path powerpc64"
+ ar_mode64_flags="-X64"
+ nm_mode64_flags="-X64"
+ path_mode64=""
+ p=""
+ for i in $cpu_path
+ do path_mode64="${path_mode64}powerpc64/mode64/$i "
+ path_mode64="${path_mode64}powerpc64/$i "
+ p="${p} powerpc32/$i "
+ done
+ path_mode64="${path_mode64}powerpc64/mode64 $vmx_path powerpc64"
+ path="$p $path"
# grab this object, though it's not a true cycle counter routine
- SPEED_CYCLECOUNTER_OBJ_aix64=powerpc64.lo
- cyclecounter_size_aix64=0
+ SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo
+ cyclecounter_size_mode64=0
;;
*-*-darwin*)
# On Darwin we can use 64-bit instructions with a longlong limb,
gcc_mode32_cflags_optlist="subtype cpu opt"
gcc_mode32_cflags_subtype="-force_cpusubtype_ALL"
gcc_mode32_cflags_opt="-O3 -O2 -O1"
- path_mode32="powerpc64/mode32 $vmx_path powerpc64"
limb_mode32=longlong
cclist_mode64="gcc"
gcc_mode64_cflags="-m64"
gcc_mode64_cflags_optlist="cpu opt"
gcc_mode64_cflags_opt="-O3 -O2 -O1"
path_mode64=""
- for i in $cpu_path; do path_mode64="${path_mode64}powerpc64/mode64/$i "; done
+ path_mode32=""
+ p=""
+ for i in $cpu_path
+ do path_mode64="${path_mode64}powerpc64/mode64/$i "
+ path_mode64="${path_mode64}powerpc64/$i "
+ path_mode32="${path_mode32}powerpc64/mode32/$i "
+ path_mode32="${path_mode32}powerpc64/$i "
+ p="${p} powerpc32/$i "
+ done
path_mode64="${path_mode64}powerpc64/mode64 $vmx_path powerpc64"
+ path_mode32="${path_mode32}powerpc64/mode32 $vmx_path powerpc64"
+ path="$p $path"
SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo
cyclecounter_size_mode64=0
any_mode64_testlist="sizeof-long-8"
gcc_mode32_cflags="-mpowerpc64"
gcc_mode32_cflags_optlist="cpu opt"
gcc_mode32_cflags_opt="-O3 -O2 -O1"
- path_mode32="powerpc64/mode32 $vmx_path powerpc64"
limb_mode32=longlong
cclist_mode64="gcc gcc64"
gcc_mode64_cflags_maybe="-m64"
gcc_mode64_cflags_optlist="cpu opt"
gcc_mode64_cflags_opt="-O3 -O2 -O1"
path_mode64=""
- for i in $cpu_path; do path_mode64="${path_mode64}powerpc64/mode64/$i "; done
+ path_mode32=""
+ p=""
+ for i in $cpu_path
+ do path_mode64="${path_mode64}powerpc64/mode64/$i "
+ path_mode64="${path_mode64}powerpc64/$i "
+ path_mode32="${path_mode32}powerpc64/mode32/$i "
+ path_mode32="${path_mode32}powerpc64/$i "
+ p="${p} powerpc32/$i "
+ done
path_mode64="${path_mode64}powerpc64/mode64 $vmx_path powerpc64"
+ path_mode32="${path_mode32}powerpc64/mode32 $vmx_path powerpc64"
+ path="$p $path"
SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo
cyclecounter_size_mode64=0
any_mode64_testlist="sizeof-long-8"
path="sparc32/v8 sparc32" ;;
supersparc)
path="sparc32/v8/supersparc sparc32/v8 sparc32" ;;
- sparc64 | sparcv9* | ultrasparc*)
+ sparc64 | sparcv9* | ultrasparc | ultrasparc[234]*)
path="sparc32/v9 sparc32/v8 sparc32" ;;
+ ultrasparct[12345])
+ path="sparc32/ultrasparct1 sparc32/v8 sparc32" ;;
*)
path="sparc32" ;;
esac
# abilist="64" only.
#
case $host_cpu in
+ ultrasparct[345])
+ gcc_32_cflags="$gcc_cflags -Wa,-xarch=v8plusd" ;;
sparc64 | sparcv9* | ultrasparc*)
- gcc_cflags="$gcc_cflags -Wa,-xarch=v8plus" ;;
- *)
- gcc_cflags="$gcc_cflags" ;;
+ gcc_32_cflags="$gcc_cflags -Wa,-xarch=v8plus" ;;
esac
gcc_32_cflags_maybe="-m32"
gcc_cflags_optlist="cpu"
case $host_cpu in
sparcv8 | microsparc | supersparc | turbosparc)
cc_cflags_arch="-xarch=v8" ;;
+ ultrasparct[345]) cc_cflags_arch="-xarch=v8plusd" ;;
sparc64 | sparcv9* | ultrasparc*) cc_cflags_arch="-xarch=v8plus" ;;
*) cc_cflags_arch="-xarch=v7" ;;
esac
case $host_cpu in
ultrasparc | ultrasparc2 | ultrasparc2i)
- path_64="sparc64/ultrasparc12 sparc64" ;;
+ path_64="sparc64/ultrasparc1234 sparc64" ;;
ultrasparc[34])
path_64="sparc64/ultrasparc34 sparc64/ultrasparc1234 sparc64" ;;
- ultrasparct[1234])
- path_64="sparc64" ;;
+ ultrasparct[12345])
+ path_64="sparc64/ultrasparct1 sparc64" ;;
*)
path_64="sparc64"
esac
# it until we're sure. (Might want -xarch=v9a or -xarch=v9b for the
# higher cpu types instead.)
#
- gcc_64_cflags="$gcc_64_cflags -m64 -mptr64"
+ gcc_64_cflags="$gcc_cflags -m64 -mptr64"
gcc_64_ldflags="-Wc,-m64"
gcc_64_cflags_optlist="cpu"
# VAX
+ vax*-*-*elf*)
+ # Use elf conventions (i.e., '%' register prefix, no global prefix)
+ #
+
+echo "include_mpn(\`vax/elf.m4')" >> $gmp_tmpconfigm4i
+
+ gcc_cflags="$gcc_cflags $fomit_frame_pointer"
+ path="vax"
+ extra_functions="udiv_w_sdiv"
+ ;;
vax*-*-*)
- # Currently gcc (version 3.0) on vax always uses a frame pointer
- # (config/vax/vax.h FRAME_POINTER_REQUIRED=1), so -fomit-frame-pointer
- # will be ignored.
+ # Default to aout conventions (i.e., no register prefix, '_' global prefix)
#
gcc_cflags="$gcc_cflags $fomit_frame_pointer"
path="vax"
gcc_cflags_cpu="-mtune=bdver1 -mtune=amdfam10 -mtune=k8"
gcc_cflags_arch="-march=bdver1 -march=amdfam10 -march=k8 -march=k8~-mno-sse2"
path="x86/bd1 x86/k7/mmx x86/k7 x86"
- path_64="x86_64/bd1 x86_64"
+ path_64="x86_64/bd1 x86_64/k10 x86_64/k8 x86_64"
;;
core2)
gcc_cflags_cpu="-mtune=core2 -mtune=k8"
path="x86/coreinhm x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
path_64="x86_64/coreinhm x86_64/core2 x86_64"
;;
- coreisbr)
+ coreisbr | coreihwl | coreibwl)
gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
case $host in
athlon64-*-* | k8-*-* | k10-*-* | bobcat-*-* | bulldozer-*-* | pentium4-*-* | atom-*-* | core2-*-* | corei*-*-* | x86_64-*-* | nano-*-*)
cclist_64="gcc"
- gcc_64_cflags="$gcc_64_cflags -m64"
+ gcc_64_cflags="$gcc_cflags -m64"
gcc_64_cflags_optlist="cpu arch"
CALLING_CONVENTIONS_OBJS_64='amd64call.lo amd64check$U.lo'
SPEED_CYCLECOUNTER_OBJ_64=x86_64.lo
cyclecounter_size_64=2
- abilist="64 32"
+
+ cclist_x32="gcc"
+ gcc_x32_cflags="$gcc_cflags -mx32"
+ gcc_x32_cflags_optlist="$gcc_64_cflags_optlist"
+ CALLING_CONVENTIONS_OBJS_x32="$CALLING_CONVENTIONS_OBJS_64"
+ SPEED_CYCLECOUNTER_OBJ_x32="$SPEED_CYCLECOUNTER_OBJ_64"
+ cyclecounter_size_x32="$cyclecounter_size_64"
+ path_x32="$path_64"
+ limb_x32=longlong
+ any_x32_testlist="sizeof-long-4"
+
+ abilist="64 x32 32"
+ if test "$enable_assembly" = "yes" ; then
+ extra_functions_64="invert_limb_table"
+ extra_functions_x32=$extra_functions_64
+ fi
case $host in
*-*-solaris*)
;;
*-*-mingw* | *-*-cygwin)
limb_64=longlong
- path_64="" # Windows amd64 calling conventions are *different*
- # Silence many pedantic warnings for w64. FIXME.
- gcc_64_cflags="$gcc_64_cflags -std=gnu99"
+ CALLING_CONVENTIONS_OBJS_64=""
+
+$as_echo "#define HOST_DOS64 1" >>confdefs.h
+
+ GMP_NONSTD_ABI_64=DOS64
;;
esac
;;
;;
- # Special CPU "none" selects generic C. -DNO_ASM is used to disable gcc
- # asm blocks in longlong.h (since they're driven by cpp pre-defined
- # symbols like __alpha rather than the configured $host_cpu).
- #
+ # Special CPU "none" used to select generic C, now this is obsolete.
none-*-*)
- abilist="long longlong"
- cclist_long=$cclist
- gcc_long_cflags=$gcc_cflags
- gcc_long_cppflags="-DNO_ASM"
- cc_long_cflags=$cc_cflags
- cclist_longlong=$cclist
- gcc_longlong_cflags=$gcc_cflags
- gcc_longlong_cppflags="-DNO_ASM"
- cc_longlong_cflags=$cc_cflags
- limb_longlong=longlong
+ enable_assembly=no
+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: the \"none\" host is obsolete, use --disable-assembly" >&5
+$as_echo "$as_me: WARNING: the \"none\" host is obsolete, use --disable-assembly" >&2;}
;;
esac
if test $abi = "$ABI"; then found=yes; break; fi
done
if test $found = no; then
- as_fn_error "ABI=$ABI is not among the following valid choices: $abilist" "$LINENO" 5
+ as_fn_error $? "ABI=$ABI is not among the following valid choices: $abilist" "$LINENO" 5
fi
abilist="$ABI"
fi
# C on MS-DOS systems).
#
if test $found_compiler = no && test -n "$path"; then
- as_fn_error "could not find a working compiler, see config.log for details" "$LINENO" 5
+ as_fn_error $? "could not find a working compiler, see config.log for details" "$LINENO" 5
fi
case $host in
gcc_cflags_cpu=""
gcc_cflags_arch=""
+ fat_functions="add_n addmul_1 bdiv_dbm1c com copyd copyi dive_1 divrem_1
+ gcd_1 lshift lshiftc mod_1 mod_1_1 mod_1_1_cps mod_1_2
+ mod_1_2_cps mod_1_4 mod_1_4_cps mod_34lsub1 mode1o mul_1
+ mul_basecase mullo_basecase pre_divrem_1 pre_mod_1 redc_1
+ redc_2 rshift sqr_basecase sub_n submul_1"
+
if test "$abi" = 32; then
extra_functions="$extra_functions fat fat_entry"
path="x86/fat x86"
fat_path="x86 x86/fat x86/i486
x86/k6 x86/k6/mmx x86/k6/k62mmx
x86/k7 x86/k7/mmx
+ x86/k8 x86/k10 x86/bobcat
x86/pentium x86/pentium/mmx
x86/p6 x86/p6/mmx x86/p6/p3mmx x86/p6/sse2
- x86/pentium4 x86/pentium4/mmx x86/pentium4/sse2"
+ x86/pentium4 x86/pentium4/mmx x86/pentium4/sse2
+ x86/core2 x86/coreinhm x86/coreisbr
+ x86/atom x86/atom/mmx x86/atom/sse2 x86/nano"
fi
if test "$abi" = 64; then
gcc_64_cflags=""
extra_functions_64="$extra_functions_64 fat fat_entry"
path_64="x86_64/fat x86_64"
- fat_path="x86_64 x86_64/fat x86_64/pentium4 x86_64/core2 x86_64/coreinhm x86_64/coreisbr x86_64/atom x86_64/nano"
+ fat_path="x86_64 x86_64/fat
+ x86_64/k8 x86_64/k10 x86_64/bd1 x86_64/bobcat
+ x86_64/pentium4 x86_64/core2 x86_64/coreinhm x86_64/coreisbr
+ x86_64/atom x86_64/nano"
+ fat_functions="$fat_functions addmul_2 addlsh1_n addlsh2_n sublsh1_n"
fi
- fat_functions="add_n addmul_1 copyd copyi
- dive_1 diveby3 divrem_1 gcd_1 lshift
- mod_1 mod_34lsub1 mode1o mul_1 mul_basecase
- pre_divrem_1 pre_mod_1 rshift
- sqr_basecase sub_n submul_1"
fat_thresholds="MUL_TOOM22_THRESHOLD MUL_TOOM33_THRESHOLD
- SQR_TOOM2_THRESHOLD SQR_TOOM3_THRESHOLD"
+ SQR_TOOM2_THRESHOLD SQR_TOOM3_THRESHOLD
+ BMOD_1_TO_MOD_1_THRESHOLD"
fi
;;
esac
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the operating system supports XMM registers" >&5
$as_echo_n "checking whether the operating system supports XMM registers... " >&6; }
-if test "${gmp_cv_os_x86_xmm+set}" = set; then :
+if ${gmp_cv_os_x86_xmm+:} false; then :
$as_echo_n "(cached) " >&6
else
if test "$build" = "$host"; then
CC="$cc"
CFLAGS="$cflags"
CPPFLAGS="$cppflags"
-
+ eval GMP_NONSTD_ABI=\"\$GMP_NONSTD_ABI_$ABI\"
# Could easily have this in config.h too, if desired.
ABI_nodots=`echo $ABI | sed 's/\./_/'`
#
eval tmp=\"\$CALLING_CONVENTIONS_OBJS$abi1\"
test -n "$tmp" || eval tmp=\"\$CALLING_CONVENTIONS_OBJS$abi2\"
- CALLING_CONVENTIONS_OBJS="$tmp"
+ if test "$enable_assembly" = "yes"; then
+ CALLING_CONVENTIONS_OBJS="$tmp"
+ else
+ CALLING_CONVENTIONS_OBJS=""
+ fi
if test -n "$CALLING_CONVENTIONS_OBJS"; then
set dummy ${ac_tool_prefix}gcc; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if ${ac_cv_prog_CC+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$CC"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_CC="${ac_tool_prefix}gcc"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy gcc; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_CC+set}" = set; then :
+if ${ac_cv_prog_ac_ct_CC+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_CC"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_ac_ct_CC="gcc"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy ${ac_tool_prefix}cc; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if ${ac_cv_prog_CC+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$CC"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_CC="${ac_tool_prefix}cc"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy cc; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if ${ac_cv_prog_CC+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$CC"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
ac_prog_rejected=yes
continue
set dummy $ac_tool_prefix$ac_prog; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CC+set}" = set; then :
+if ${ac_cv_prog_CC+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$CC"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy $ac_prog; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_CC+set}" = set; then :
+if ${ac_cv_prog_ac_ct_CC+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_CC"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_ac_ct_CC="$ac_prog"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "no acceptable C compiler found in \$PATH
-See \`config.log' for more details." "$LINENO" 5; }
+as_fn_error $? "no acceptable C compiler found in \$PATH
+See \`config.log' for more details" "$LINENO" 5; }
# Provide some information about the compiler.
$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5
{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-{ as_fn_set_status 77
-as_fn_error "C compiler cannot create executables
-See \`config.log' for more details." "$LINENO" 5; }; }
+as_fn_error 77 "C compiler cannot create executables
+See \`config.log' for more details" "$LINENO" 5; }
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
else
{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot compute suffix of executables: cannot compile and link
-See \`config.log' for more details." "$LINENO" 5; }
+as_fn_error $? "cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details" "$LINENO" 5; }
fi
rm -f conftest conftest$ac_cv_exeext
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5
else
{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot run C compiled programs.
+as_fn_error $? "cannot run C compiled programs.
If you meant to cross compile, use \`--host'.
-See \`config.log' for more details." "$LINENO" 5; }
+See \`config.log' for more details" "$LINENO" 5; }
fi
fi
fi
ac_clean_files=$ac_clean_files_save
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5
$as_echo_n "checking for suffix of object files... " >&6; }
-if test "${ac_cv_objext+set}" = set; then :
+if ${ac_cv_objext+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "cannot compute suffix of object files: cannot compile
-See \`config.log' for more details." "$LINENO" 5; }
+as_fn_error $? "cannot compute suffix of object files: cannot compile
+See \`config.log' for more details" "$LINENO" 5; }
fi
rm -f conftest.$ac_cv_objext conftest.$ac_ext
fi
ac_objext=$OBJEXT
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5
$as_echo_n "checking whether we are using the GNU C compiler... " >&6; }
-if test "${ac_cv_c_compiler_gnu+set}" = set; then :
+if ${ac_cv_c_compiler_gnu+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
ac_save_CFLAGS=$CFLAGS
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5
$as_echo_n "checking whether $CC accepts -g... " >&6; }
-if test "${ac_cv_prog_cc_g+set}" = set; then :
+if ${ac_cv_prog_cc_g+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_save_c_werror_flag=$ac_c_werror_flag
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5
$as_echo_n "checking for $CC option to accept ISO C89... " >&6; }
-if test "${ac_cv_prog_cc_c89+set}" = set; then :
+if ${ac_cv_prog_cc_c89+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_cv_prog_cc_c89=no
/* end confdefs.h. */
#include <stdarg.h>
#include <stdio.h>
-#include <sys/types.h>
-#include <sys/stat.h>
+struct stat;
/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */
struct buf { int x; };
FILE * (*rcsopen) (struct buf *, struct stat *, int);
*) :
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C99" >&5
$as_echo_n "checking for $CC option to accept ISO C99... " >&6; }
-if test "${ac_cv_prog_cc_c99+set}" = set; then :
+if ${ac_cv_prog_cc_c99+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_cv_prog_cc_c99=no
return 0;
}
_ACEOF
-for ac_arg in '' -std=gnu99 -std=c99 -c99 -AC99 -xc99=all -qlanglvl=extc99
+for ac_arg in '' -std=gnu99 -std=c99 -c99 -AC99 -D_STDC_C99= -qlanglvl=extc99
do
CC="$ac_save_CC $ac_arg"
if ac_fn_c_try_compile "$LINENO"; then :
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5
$as_echo_n "checking for $CC option to accept ISO C89... " >&6; }
-if test "${ac_cv_prog_cc_c89+set}" = set; then :
+if ${ac_cv_prog_cc_c89+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_cv_prog_cc_c89=no
/* end confdefs.h. */
#include <stdarg.h>
#include <stdio.h>
-#include <sys/types.h>
-#include <sys/stat.h>
+struct stat;
/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */
struct buf { int x; };
FILE * (*rcsopen) (struct buf *, struct stat *, int);
esac
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO Standard C" >&5
$as_echo_n "checking for $CC option to accept ISO Standard C... " >&6; }
- if test "${ac_cv_prog_cc_stdc+set}" = set; then :
+ if ${ac_cv_prog_cc_stdc+:} false; then :
$as_echo_n "(cached) " >&6
fi
CPP=
fi
if test -z "$CPP"; then
- if test "${ac_cv_prog_CPP+set}" = set; then :
+ if ${ac_cv_prog_CPP+:} false; then :
$as_echo_n "(cached) " >&6
else
# Double quotes because CPP needs to be expanded
# Broken: fails on valid input.
continue
fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
# OK, works on sane cases. Now check whether nonexistent headers
# can be detected and how.
ac_preproc_ok=:
break
fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
done
# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.i conftest.err conftest.$ac_ext
if $ac_preproc_ok; then :
break
fi
# Broken: fails on valid input.
continue
fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
# OK, works on sane cases. Now check whether nonexistent headers
# can be detected and how.
ac_preproc_ok=:
break
fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
done
# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.i conftest.err conftest.$ac_ext
if $ac_preproc_ok; then :
else
{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "C preprocessor \"$CPP\" fails sanity check
-See \`config.log' for more details." "$LINENO" 5; }
+as_fn_error $? "C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details" "$LINENO" 5; }
fi
ac_ext=c
-case $ac_cv_prog_cc_stdc in
- no)
- ;;
- *)
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#define __GMP_WITHIN_CONFIGURE 1 /* ignore template stuff */
-#define GMP_NAIL_BITS $GMP_NAIL_BITS
-#define GMP_LIMB_BITS 123
-$DEFN_LONG_LONG_LIMB
-#include "$srcdir/gmp-h.in"
-
-#if ! __GMP_HAVE_PROTOTYPES
-die die die
-#endif
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
-
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: gmp.h doesnt recognise compiler as ANSI, prototypes and \"const\" will be unavailable" >&5
-$as_echo "$as_me: WARNING: gmp.h doesnt recognise compiler as ANSI, prototypes and \"const\" will be unavailable" >&2;}
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
- ;;
-esac
-
-
-
# The C compiler on the build system, and associated tests.
if test -n "$CC_FOR_BUILD"; then
if test "$cc_for_build_works" = yes; then
:
else
- as_fn_error "Specified CC_FOR_BUILD doesn't seem to work" "$LINENO" 5
+ as_fn_error $? "Specified CC_FOR_BUILD doesn't seem to work" "$LINENO" 5
fi
elif test -n "$HOST_CC"; then
if test "$cc_for_build_works" = yes; then
CC_FOR_BUILD=$HOST_CC
else
- as_fn_error "Specified HOST_CC doesn't seem to work" "$LINENO" 5
+ as_fn_error $? "Specified HOST_CC doesn't seem to work" "$LINENO" 5
fi
else
done
if test -z "$CC_FOR_BUILD"; then
- as_fn_error "Cannot find a build system compiler" "$LINENO" 5
+ as_fn_error $? "Cannot find a build system compiler" "$LINENO" 5
fi
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for build system preprocessor" >&5
$as_echo_n "checking for build system preprocessor... " >&6; }
if test -z "$CPP_FOR_BUILD"; then
- if test "${gmp_cv_prog_cpp_for_build+set}" = set; then :
+ if ${gmp_cv_prog_cpp_for_build+:} false; then :
$as_echo_n "(cached) " >&6
else
cat >conftest.c <<EOF
done
rm -f conftest* a.out b.out a.exe a_out.exe
if test -z "$gmp_cv_prog_cpp_for_build"; then
- as_fn_error "Cannot find build system C preprocessor." "$LINENO" 5
+ as_fn_error $? "Cannot find build system C preprocessor." "$LINENO" 5
fi
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for build system executable suffix" >&5
$as_echo_n "checking for build system executable suffix... " >&6; }
-if test "${gmp_cv_prog_exeext_for_build+set}" = set; then :
+if ${gmp_cv_prog_exeext_for_build+:} false; then :
$as_echo_n "(cached) " >&6
else
cat >conftest.c <<EOF
done
rm -f conftest*
if test "${gmp_cv_prog_exeext_for_build+set}" != set; then
- as_fn_error "Cannot determine executable suffix" "$LINENO" 5
+ as_fn_error $? "Cannot determine executable suffix" "$LINENO" 5
fi
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether build system compiler is ANSI" >&5
$as_echo_n "checking whether build system compiler is ANSI... " >&6; }
-if test "${gmp_cv_c_for_build_ansi+set}" = set; then :
+if ${gmp_cv_c_for_build_ansi+:} false; then :
$as_echo_n "(cached) " >&6
else
cat >conftest.c <<EOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for build system compiler math library" >&5
$as_echo_n "checking for build system compiler math library... " >&6; }
-if test "${gmp_cv_check_libm_for_build+set}" = set; then :
+if ${gmp_cv_check_libm_for_build+:} false; then :
$as_echo_n "(cached) " >&6
else
cat >conftest.c <<EOF
set dummy $ac_tool_prefix$ac_prog; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_CXX+set}" = set; then :
+if ${ac_cv_prog_CXX+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$CXX"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_CXX="$ac_tool_prefix$ac_prog"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy $ac_prog; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_CXX+set}" = set; then :
+if ${ac_cv_prog_ac_ct_CXX+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_CXX"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_ac_ct_CXX="$ac_prog"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C++ compiler" >&5
$as_echo_n "checking whether we are using the GNU C++ compiler... " >&6; }
-if test "${ac_cv_cxx_compiler_gnu+set}" = set; then :
+if ${ac_cv_cxx_compiler_gnu+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
ac_save_CXXFLAGS=$CXXFLAGS
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX accepts -g" >&5
$as_echo_n "checking whether $CXX accepts -g... " >&6; }
-if test "${ac_cv_prog_cxx_g+set}" = set; then :
+if ${ac_cv_prog_cxx_g+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_save_cxx_werror_flag=$ac_cxx_werror_flag
# If --enable-cxx=yes but a C++ compiler can't be found, then abort.
if test $want_cxx = no && test $enable_cxx = yes; then
- as_fn_error "C++ compiler not available, see config.log for details" "$LINENO" 5
+ as_fn_error $? "C++ compiler not available, see config.log for details" "$LINENO" 5
fi
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C++ preprocessor" >&5
$as_echo_n "checking how to run the C++ preprocessor... " >&6; }
if test -z "$CXXCPP"; then
- if test "${ac_cv_prog_CXXCPP+set}" = set; then :
+ if ${ac_cv_prog_CXXCPP+:} false; then :
$as_echo_n "(cached) " >&6
else
# Double quotes because CXXCPP needs to be expanded
# Broken: fails on valid input.
continue
fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
# OK, works on sane cases. Now check whether nonexistent headers
# can be detected and how.
ac_preproc_ok=:
break
fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
done
# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.i conftest.err conftest.$ac_ext
if $ac_preproc_ok; then :
break
fi
# Broken: fails on valid input.
continue
fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
# OK, works on sane cases. Now check whether nonexistent headers
# can be detected and how.
ac_preproc_ok=:
break
fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
done
# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.i conftest.err conftest.$ac_ext
if $ac_preproc_ok; then :
else
{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "C++ preprocessor \"$CXXCPP\" fails sanity check
-See \`config.log' for more details." "$LINENO" 5; }
+as_fn_error $? "C++ preprocessor \"$CXXCPP\" fails sanity check
+See \`config.log' for more details" "$LINENO" 5; }
fi
ac_ext=c
# deciding the compiler.
#
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for grep that handles long lines and -e" >&5
$as_echo_n "checking for grep that handles long lines and -e... " >&6; }
-if test "${ac_cv_path_GREP+set}" = set; then :
+if ${ac_cv_path_GREP+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -z "$GREP"; then
for ac_prog in grep ggrep; do
for ac_exec_ext in '' $ac_executable_extensions; do
ac_path_GREP="$as_dir/$ac_prog$ac_exec_ext"
- { test -f "$ac_path_GREP" && $as_test_x "$ac_path_GREP"; } || continue
+ as_fn_executable_p "$ac_path_GREP" || continue
# Check for GNU ac_path_GREP and select it if it is found.
# Check for GNU $ac_path_GREP
case `"$ac_path_GREP" --version 2>&1` in
done
IFS=$as_save_IFS
if test -z "$ac_cv_path_GREP"; then
- as_fn_error "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+ as_fn_error $? "no acceptable grep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
fi
else
ac_cv_path_GREP=$GREP
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5
$as_echo_n "checking for egrep... " >&6; }
-if test "${ac_cv_path_EGREP+set}" = set; then :
+if ${ac_cv_path_EGREP+:} false; then :
$as_echo_n "(cached) " >&6
else
if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
for ac_prog in egrep; do
for ac_exec_ext in '' $ac_executable_extensions; do
ac_path_EGREP="$as_dir/$ac_prog$ac_exec_ext"
- { test -f "$ac_path_EGREP" && $as_test_x "$ac_path_EGREP"; } || continue
+ as_fn_executable_p "$ac_path_EGREP" || continue
# Check for GNU ac_path_EGREP and select it if it is found.
# Check for GNU $ac_path_EGREP
case `"$ac_path_EGREP" --version 2>&1` in
done
IFS=$as_save_IFS
if test -z "$ac_cv_path_EGREP"; then
- as_fn_error "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+ as_fn_error $? "no acceptable egrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
fi
else
ac_cv_path_EGREP=$EGREP
case "$path $fat_path" in
*mmx*) { $as_echo "$as_me:${as_lineno-$LINENO}: checking if the assembler knows about MMX instructions" >&5
$as_echo_n "checking if the assembler knows about MMX instructions... " >&6; }
-if test "${gmp_cv_asm_x86_mmx+set}" = set; then :
+if ${gmp_cv_asm_x86_mmx+:} false; then :
$as_echo_n "(cached) " >&6
else
cat >conftest.s <<EOF
case "$path $fat_path" in
*sse2*) { $as_echo "$as_me:${as_lineno-$LINENO}: checking if the assembler knows about SSE2 instructions" >&5
$as_echo_n "checking if the assembler knows about SSE2 instructions... " >&6; }
-if test "${gmp_cv_asm_x86_sse2+set}" = set; then :
+if ${gmp_cv_asm_x86_sse2+:} false; then :
$as_echo_n "(cached) " >&6
else
cat >conftest.s <<EOF
tmp_path=
for i in $fat_path; do
- case $i in
- */sse2) ;;
- *) tmp_path="$tmp_path $i" ;;
- esac
-done
-fat_path="$tmp_path"
-
-
- ;;
-esac
- ;;
- esac
- fi
- ;;
-esac
-
-
-cat >&5 <<EOF
-Decided:
-ABI=$ABI
-CC=$CC
-CFLAGS=$CFLAGS
-CPPFLAGS=$CPPFLAGS
-GMP_LDFLAGS=$GMP_LDFLAGS
-CXX=$CXX
-CXXFLAGS=$CXXFLAGS
-path=$path
-EOF
-echo "using ABI=\"$ABI\""
-echo " CC=\"$CC\""
-echo " CFLAGS=\"$CFLAGS\""
-echo " CPPFLAGS=\"$CPPFLAGS\""
-if test $want_cxx = yes; then
- echo " CXX=\"$CXX\""
- echo " CXXFLAGS=\"$CXXFLAGS\""
-fi
-echo " MPN_PATH=\"$path\""
-
-
-# Automake ansi2knr support.
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for function prototypes" >&5
-$as_echo_n "checking for function prototypes... " >&6; }
-if test "$ac_cv_prog_cc_c89" != no; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-
-$as_echo "#define PROTOTYPES 1" >>confdefs.h
-
-
-$as_echo "#define __PROTOTYPES 1" >>confdefs.h
-
-else
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5
-$as_echo_n "checking for ANSI C header files... " >&6; }
-if test "${ac_cv_header_stdc+set}" = set; then :
- $as_echo_n "(cached) " >&6
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <stdlib.h>
-#include <stdarg.h>
-#include <string.h>
-#include <float.h>
-
-int
-main ()
-{
-
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
- ac_cv_header_stdc=yes
-else
- ac_cv_header_stdc=no
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-
-if test $ac_cv_header_stdc = yes; then
- # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <string.h>
-
-_ACEOF
-if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
- $EGREP "memchr" >/dev/null 2>&1; then :
-
-else
- ac_cv_header_stdc=no
-fi
-rm -f conftest*
-
-fi
-
-if test $ac_cv_header_stdc = yes; then
- # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <stdlib.h>
-
-_ACEOF
-if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
- $EGREP "free" >/dev/null 2>&1; then :
-
-else
- ac_cv_header_stdc=no
-fi
-rm -f conftest*
-
-fi
-
-if test $ac_cv_header_stdc = yes; then
- # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
- if test "$cross_compiling" = yes; then :
- :
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <ctype.h>
-#include <stdlib.h>
-#if ((' ' & 0x0FF) == 0x020)
-# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
-# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
-#else
-# define ISLOWER(c) \
- (('a' <= (c) && (c) <= 'i') \
- || ('j' <= (c) && (c) <= 'r') \
- || ('s' <= (c) && (c) <= 'z'))
-# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
-#endif
-
-#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
-int
-main ()
-{
- int i;
- for (i = 0; i < 256; i++)
- if (XOR (islower (i), ISLOWER (i))
- || toupper (i) != TOUPPER (i))
- return 2;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_run "$LINENO"; then :
-
-else
- ac_cv_header_stdc=no
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
-fi
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5
-$as_echo "$ac_cv_header_stdc" >&6; }
-if test $ac_cv_header_stdc = yes; then
-
-$as_echo "#define STDC_HEADERS 1" >>confdefs.h
-
-fi
-
-# On IRIX 5.3, sys/types and inttypes.h are conflicting.
-for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \
- inttypes.h stdint.h unistd.h
-do :
- as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
-ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default
-"
-eval as_val=\$$as_ac_Header
- if test "x$as_val" = x""yes; then :
- cat >>confdefs.h <<_ACEOF
-#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
-_ACEOF
-
-fi
-
+ case $i in
+ */sse2) ;;
+ *) tmp_path="$tmp_path $i" ;;
+ esac
done
+fat_path="$tmp_path"
+ ;;
+esac
+ ;;
+ esac
+ fi
+ ;;
+esac
-if test "$ac_cv_prog_cc_stdc" != no; then
- U= ANSI2KNR=
-else
- U=_ ANSI2KNR=./ansi2knr
-fi
-# Ensure some checks needed by ansi2knr itself.
-
-for ac_header in string.h
-do :
- ac_fn_c_check_header_mongrel "$LINENO" "string.h" "ac_cv_header_string_h" "$ac_includes_default"
-if test "x$ac_cv_header_string_h" = x""yes; then :
- cat >>confdefs.h <<_ACEOF
-#define HAVE_STRING_H 1
-_ACEOF
+if test "$enable_assembly" = "no"; then
+ path="generic"
+ CFLAGS="$CFLAGS -DNO_ASM"
+# for abi in $abilist; do
+# eval unset "path_\$abi"
+# eval gcc_${abi}_cflags=\"\$gcc_${abi}_cflags -DNO_ASM\"
+# done
fi
-done
+cat >&5 <<EOF
+Decided:
+ABI=$ABI
+CC=$CC
+CFLAGS=$CFLAGS
+CPPFLAGS=$CPPFLAGS
+GMP_LDFLAGS=$GMP_LDFLAGS
+CXX=$CXX
+CXXFLAGS=$CXXFLAGS
+path=$path
+EOF
+echo "using ABI=\"$ABI\""
+echo " CC=\"$CC\""
+echo " CFLAGS=\"$CFLAGS\""
+echo " CPPFLAGS=\"$CPPFLAGS\""
+if test $want_cxx = yes; then
+ echo " CXX=\"$CXX\""
+ echo " CXXFLAGS=\"$CXXFLAGS\""
+fi
+echo " MPN_PATH=\"$path\""
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether assembler supports --noexecstack option" >&5
$as_echo_n "checking whether assembler supports --noexecstack option... " >&6; }
-if test "${cl_cv_as_noexecstack+set}" = set; then :
+if ${cl_cv_as_noexecstack+:} false; then :
$as_echo_n "(cached) " >&6
else
cat > conftest.c <<EOF
set dummy ${ac_tool_prefix}ar; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_AR+set}" = set; then :
+if ${ac_cv_prog_AR+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$AR"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_AR="${ac_tool_prefix}ar"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy ar; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_AR+set}" = set; then :
+if ${ac_cv_prog_ac_ct_AR+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_AR"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_ac_ct_AR="ar"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
gmp_user_NM=$NM
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for BSD- or MS-compatible name lister (nm)" >&5
$as_echo_n "checking for BSD- or MS-compatible name lister (nm)... " >&6; }
-if test "${lt_cv_path_NM+set}" = set; then :
+if ${lt_cv_path_NM+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$NM"; then
set dummy $ac_tool_prefix$ac_prog; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_DUMPBIN+set}" = set; then :
+if ${ac_cv_prog_DUMPBIN+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$DUMPBIN"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_DUMPBIN="$ac_tool_prefix$ac_prog"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy $ac_prog; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_DUMPBIN+set}" = set; then :
+if ${ac_cv_prog_ac_ct_DUMPBIN+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_DUMPBIN"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_ac_ct_DUMPBIN="$ac_prog"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking the name lister ($NM) interface" >&5
$as_echo_n "checking the name lister ($NM) interface... " >&6; }
-if test "${lt_cv_nm_interface+set}" = set; then :
+if ${lt_cv_nm_interface+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_nm_interface="BSD nm"
NM=
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for BSD- or MS-compatible name lister (nm)" >&5
$as_echo_n "checking for BSD- or MS-compatible name lister (nm)... " >&6; }
-if test "${lt_cv_path_NM+set}" = set; then :
+if ${lt_cv_path_NM+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$NM"; then
set dummy $ac_tool_prefix$ac_prog; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_DUMPBIN+set}" = set; then :
+if ${ac_cv_prog_DUMPBIN+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$DUMPBIN"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_DUMPBIN="$ac_tool_prefix$ac_prog"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy $ac_prog; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_DUMPBIN+set}" = set; then :
+if ${ac_cv_prog_ac_ct_DUMPBIN+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_DUMPBIN"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_ac_ct_DUMPBIN="$ac_prog"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking the name lister ($NM) interface" >&5
$as_echo_n "checking the name lister ($NM) interface... " >&6; }
-if test "${lt_cv_nm_interface+set}" = set; then :
+if ${lt_cv_nm_interface+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_nm_interface="BSD nm"
set dummy ${ac_tool_prefix}as; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_AS+set}" = set; then :
+if ${ac_cv_prog_AS+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$AS"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_AS="${ac_tool_prefix}as"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy as; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_AS+set}" = set; then :
+if ${ac_cv_prog_ac_ct_AS+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_AS"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_ac_ct_AS="as"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy ${ac_tool_prefix}dlltool; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_DLLTOOL+set}" = set; then :
+if ${ac_cv_prog_DLLTOOL+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$DLLTOOL"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy dlltool; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_DLLTOOL+set}" = set; then :
+if ${ac_cv_prog_ac_ct_DLLTOOL+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_DLLTOOL"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_ac_ct_DLLTOOL="dlltool"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy ${ac_tool_prefix}objdump; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_OBJDUMP+set}" = set; then :
+if ${ac_cv_prog_OBJDUMP+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$OBJDUMP"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy objdump; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_OBJDUMP+set}" = set; then :
+if ${ac_cv_prog_ac_ct_OBJDUMP+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_OBJDUMP"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_ac_ct_OBJDUMP="objdump"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
fi
# Don't allow both static and DLL.
if test "$enable_shared" != no && test "$enable_static" != no; then
- as_fn_error "cannot build both static and DLL, since gmp.h is different for each.
+ as_fn_error $? "cannot build both static and DLL, since gmp.h is different for each.
Use \"--disable-static --enable-shared\" to build just a DLL." "$LINENO" 5
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for a sed that does not truncate output" >&5
$as_echo_n "checking for a sed that does not truncate output... " >&6; }
-if test "${ac_cv_path_SED+set}" = set; then :
+if ${ac_cv_path_SED+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/
for ac_prog in sed gsed; do
for ac_exec_ext in '' $ac_executable_extensions; do
ac_path_SED="$as_dir/$ac_prog$ac_exec_ext"
- { test -f "$ac_path_SED" && $as_test_x "$ac_path_SED"; } || continue
+ as_fn_executable_p "$ac_path_SED" || continue
# Check for GNU ac_path_SED and select it if it is found.
# Check for GNU $ac_path_SED
case `"$ac_path_SED" --version 2>&1` in
done
IFS=$as_save_IFS
if test -z "$ac_cv_path_SED"; then
- as_fn_error "no acceptable sed could be found in \$PATH" "$LINENO" 5
+ as_fn_error $? "no acceptable sed could be found in \$PATH" "$LINENO" 5
fi
else
ac_cv_path_SED=$SED
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for fgrep" >&5
$as_echo_n "checking for fgrep... " >&6; }
-if test "${ac_cv_path_FGREP+set}" = set; then :
+if ${ac_cv_path_FGREP+:} false; then :
$as_echo_n "(cached) " >&6
else
if echo 'ab*c' | $GREP -F 'ab*c' >/dev/null 2>&1
for ac_prog in fgrep; do
for ac_exec_ext in '' $ac_executable_extensions; do
ac_path_FGREP="$as_dir/$ac_prog$ac_exec_ext"
- { test -f "$ac_path_FGREP" && $as_test_x "$ac_path_FGREP"; } || continue
+ as_fn_executable_p "$ac_path_FGREP" || continue
# Check for GNU ac_path_FGREP and select it if it is found.
# Check for GNU $ac_path_FGREP
case `"$ac_path_FGREP" --version 2>&1` in
done
IFS=$as_save_IFS
if test -z "$ac_cv_path_FGREP"; then
- as_fn_error "no acceptable fgrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+ as_fn_error $? "no acceptable fgrep could be found in $PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
fi
else
ac_cv_path_FGREP=$FGREP
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for non-GNU ld" >&5
$as_echo_n "checking for non-GNU ld... " >&6; }
fi
-if test "${lt_cv_path_LD+set}" = set; then :
+if ${lt_cv_path_LD+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -z "$LD"; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
fi
-test -z "$LD" && as_fn_error "no acceptable ld found in \$PATH" "$LINENO" 5
+test -z "$LD" && as_fn_error $? "no acceptable ld found in \$PATH" "$LINENO" 5
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the linker ($LD) is GNU ld" >&5
$as_echo_n "checking if the linker ($LD) is GNU ld... " >&6; }
-if test "${lt_cv_prog_gnu_ld+set}" = set; then :
+if ${lt_cv_prog_gnu_ld+:} false; then :
$as_echo_n "(cached) " >&6
else
# I'd rather use --version here, but apparently some GNU lds only accept -v.
# find the maximum length of command line arguments
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking the maximum length of command line arguments" >&5
$as_echo_n "checking the maximum length of command line arguments... " >&6; }
-if test "${lt_cv_sys_max_cmd_len+set}" = set; then :
+if ${lt_cv_sys_max_cmd_len+:} false; then :
$as_echo_n "(cached) " >&6
else
i=0
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to $host format" >&5
$as_echo_n "checking how to convert $build file names to $host format... " >&6; }
-if test "${lt_cv_to_host_file_cmd+set}" = set; then :
+if ${lt_cv_to_host_file_cmd+:} false; then :
$as_echo_n "(cached) " >&6
else
case $host in
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to convert $build file names to toolchain format" >&5
$as_echo_n "checking how to convert $build file names to toolchain format... " >&6; }
-if test "${lt_cv_to_tool_file_cmd+set}" = set; then :
+if ${lt_cv_to_tool_file_cmd+:} false; then :
$as_echo_n "(cached) " >&6
else
#assume ordinary cross tools, or native build.
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $LD option to reload object files" >&5
$as_echo_n "checking for $LD option to reload object files... " >&6; }
-if test "${lt_cv_ld_reload_flag+set}" = set; then :
+if ${lt_cv_ld_reload_flag+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_ld_reload_flag='-r'
set dummy ${ac_tool_prefix}objdump; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_OBJDUMP+set}" = set; then :
+if ${ac_cv_prog_OBJDUMP+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$OBJDUMP"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy objdump; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_OBJDUMP+set}" = set; then :
+if ${ac_cv_prog_ac_ct_OBJDUMP+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_OBJDUMP"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_ac_ct_OBJDUMP="objdump"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to recognize dependent libraries" >&5
$as_echo_n "checking how to recognize dependent libraries... " >&6; }
-if test "${lt_cv_deplibs_check_method+set}" = set; then :
+if ${lt_cv_deplibs_check_method+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_file_magic_cmd='$MAGIC_CMD'
set dummy ${ac_tool_prefix}dlltool; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_DLLTOOL+set}" = set; then :
+if ${ac_cv_prog_DLLTOOL+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$DLLTOOL"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy dlltool; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_DLLTOOL+set}" = set; then :
+if ${ac_cv_prog_ac_ct_DLLTOOL+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_DLLTOOL"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_ac_ct_DLLTOOL="dlltool"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to associate runtime and link libraries" >&5
$as_echo_n "checking how to associate runtime and link libraries... " >&6; }
-if test "${lt_cv_sharedlib_from_linklib_cmd+set}" = set; then :
+if ${lt_cv_sharedlib_from_linklib_cmd+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_sharedlib_from_linklib_cmd='unknown'
set dummy $ac_tool_prefix$ac_prog; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_AR+set}" = set; then :
+if ${ac_cv_prog_AR+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$AR"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_AR="$ac_tool_prefix$ac_prog"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy $ac_prog; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_AR+set}" = set; then :
+if ${ac_cv_prog_ac_ct_AR+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_AR"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_ac_ct_AR="$ac_prog"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for archiver @FILE support" >&5
$as_echo_n "checking for archiver @FILE support... " >&6; }
-if test "${lt_cv_ar_at_file+set}" = set; then :
+if ${lt_cv_ar_at_file+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_ar_at_file=no
set dummy ${ac_tool_prefix}strip; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_STRIP+set}" = set; then :
+if ${ac_cv_prog_STRIP+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$STRIP"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_STRIP="${ac_tool_prefix}strip"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy strip; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_STRIP+set}" = set; then :
+if ${ac_cv_prog_ac_ct_STRIP+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_STRIP"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_ac_ct_STRIP="strip"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy ${ac_tool_prefix}ranlib; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_RANLIB+set}" = set; then :
+if ${ac_cv_prog_RANLIB+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$RANLIB"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy ranlib; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_RANLIB+set}" = set; then :
+if ${ac_cv_prog_ac_ct_RANLIB+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_RANLIB"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_ac_ct_RANLIB="ranlib"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
# Check for command to grab the raw symbol name followed by C symbol from nm.
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking command to parse $NM output from $compiler object" >&5
$as_echo_n "checking command to parse $NM output from $compiler object... " >&6; }
-if test "${lt_cv_sys_global_symbol_pipe+set}" = set; then :
+if ${lt_cv_sys_global_symbol_pipe+:} false; then :
$as_echo_n "(cached) " >&6
else
*)
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: ${with_sysroot}" >&5
$as_echo "${with_sysroot}" >&6; }
- as_fn_error "The sysroot must be an absolute path." "$LINENO" 5
+ as_fn_error $? "The sysroot must be an absolute path." "$LINENO" 5
;;
esac
CFLAGS="$CFLAGS -belf"
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler needs -belf" >&5
$as_echo_n "checking whether the C compiler needs -belf... " >&6; }
-if test "${lt_cv_cc_needs_belf+set}" = set; then :
+if ${lt_cv_cc_needs_belf+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_ext=c
set dummy ${ac_tool_prefix}mt; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_MANIFEST_TOOL+set}" = set; then :
+if ${ac_cv_prog_MANIFEST_TOOL+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$MANIFEST_TOOL"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_MANIFEST_TOOL="${ac_tool_prefix}mt"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy mt; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_MANIFEST_TOOL+set}" = set; then :
+if ${ac_cv_prog_ac_ct_MANIFEST_TOOL+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_MANIFEST_TOOL"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_ac_ct_MANIFEST_TOOL="mt"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $MANIFEST_TOOL is a manifest tool" >&5
$as_echo_n "checking if $MANIFEST_TOOL is a manifest tool... " >&6; }
-if test "${lt_cv_path_mainfest_tool+set}" = set; then :
+if ${lt_cv_path_mainfest_tool+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_path_mainfest_tool=no
set dummy ${ac_tool_prefix}dsymutil; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_DSYMUTIL+set}" = set; then :
+if ${ac_cv_prog_DSYMUTIL+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$DSYMUTIL"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_DSYMUTIL="${ac_tool_prefix}dsymutil"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy dsymutil; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_DSYMUTIL+set}" = set; then :
+if ${ac_cv_prog_ac_ct_DSYMUTIL+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_DSYMUTIL"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_ac_ct_DSYMUTIL="dsymutil"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy ${ac_tool_prefix}nmedit; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_NMEDIT+set}" = set; then :
+if ${ac_cv_prog_NMEDIT+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$NMEDIT"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_NMEDIT="${ac_tool_prefix}nmedit"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy nmedit; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_NMEDIT+set}" = set; then :
+if ${ac_cv_prog_ac_ct_NMEDIT+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_NMEDIT"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_ac_ct_NMEDIT="nmedit"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy ${ac_tool_prefix}lipo; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_LIPO+set}" = set; then :
+if ${ac_cv_prog_LIPO+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$LIPO"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_LIPO="${ac_tool_prefix}lipo"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy lipo; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_LIPO+set}" = set; then :
+if ${ac_cv_prog_ac_ct_LIPO+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_LIPO"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_ac_ct_LIPO="lipo"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy ${ac_tool_prefix}otool; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_OTOOL+set}" = set; then :
+if ${ac_cv_prog_OTOOL+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$OTOOL"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_OTOOL="${ac_tool_prefix}otool"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy otool; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_OTOOL+set}" = set; then :
+if ${ac_cv_prog_ac_ct_OTOOL+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_OTOOL"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_ac_ct_OTOOL="otool"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy ${ac_tool_prefix}otool64; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_OTOOL64+set}" = set; then :
+if ${ac_cv_prog_OTOOL64+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$OTOOL64"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_OTOOL64="${ac_tool_prefix}otool64"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy otool64; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_OTOOL64+set}" = set; then :
+if ${ac_cv_prog_ac_ct_OTOOL64+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_OTOOL64"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_ac_ct_OTOOL64="otool64"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for -single_module linker flag" >&5
$as_echo_n "checking for -single_module linker flag... " >&6; }
-if test "${lt_cv_apple_cc_single_mod+set}" = set; then :
+if ${lt_cv_apple_cc_single_mod+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_apple_cc_single_mod=no
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for -exported_symbols_list linker flag" >&5
$as_echo_n "checking for -exported_symbols_list linker flag... " >&6; }
-if test "${lt_cv_ld_exported_symbols_list+set}" = set; then :
+if ${lt_cv_ld_exported_symbols_list+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_ld_exported_symbols_list=no
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for -force_load linker flag" >&5
$as_echo_n "checking for -force_load linker flag... " >&6; }
-if test "${lt_cv_ld_force_load+set}" = set; then :
+if ${lt_cv_ld_force_load+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_ld_force_load=no
;;
esac
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5
+$as_echo_n "checking for ANSI C header files... " >&6; }
+if ${ac_cv_header_stdc+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <float.h>
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+ ac_cv_header_stdc=yes
+else
+ ac_cv_header_stdc=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+if test $ac_cv_header_stdc = yes; then
+ # SunOS 4.x string.h does not declare mem*, contrary to ANSI.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <string.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "memchr" >/dev/null 2>&1; then :
+
+else
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI.
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <stdlib.h>
+
+_ACEOF
+if (eval "$ac_cpp conftest.$ac_ext") 2>&5 |
+ $EGREP "free" >/dev/null 2>&1; then :
+
+else
+ ac_cv_header_stdc=no
+fi
+rm -f conftest*
+
+fi
+
+if test $ac_cv_header_stdc = yes; then
+ # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi.
+ if test "$cross_compiling" = yes; then :
+ :
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+#include <ctype.h>
+#include <stdlib.h>
+#if ((' ' & 0x0FF) == 0x020)
+# define ISLOWER(c) ('a' <= (c) && (c) <= 'z')
+# define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c))
+#else
+# define ISLOWER(c) \
+ (('a' <= (c) && (c) <= 'i') \
+ || ('j' <= (c) && (c) <= 'r') \
+ || ('s' <= (c) && (c) <= 'z'))
+# define TOUPPER(c) (ISLOWER(c) ? ((c) | 0x40) : (c))
+#endif
+
+#define XOR(e, f) (((e) && !(f)) || (!(e) && (f)))
+int
+main ()
+{
+ int i;
+ for (i = 0; i < 256; i++)
+ if (XOR (islower (i), ISLOWER (i))
+ || toupper (i) != TOUPPER (i))
+ return 2;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_run "$LINENO"; then :
+
+else
+ ac_cv_header_stdc=no
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+ conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+
+fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_header_stdc" >&5
+$as_echo "$ac_cv_header_stdc" >&6; }
+if test $ac_cv_header_stdc = yes; then
+
+$as_echo "#define STDC_HEADERS 1" >>confdefs.h
+
+fi
+
+# On IRIX 5.3, sys/types and inttypes.h are conflicting.
+for ac_header in sys/types.h sys/stat.h stdlib.h string.h memory.h strings.h \
+ inttypes.h stdint.h unistd.h
+do :
+ as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
+ac_fn_c_check_header_compile "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default
+"
+if eval test \"x\$"$as_ac_Header"\" = x"yes"; then :
+ cat >>confdefs.h <<_ACEOF
+#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+
+done
+
+
for ac_header in dlfcn.h
do :
ac_fn_c_check_header_compile "$LINENO" "dlfcn.h" "ac_cv_header_dlfcn_h" "$ac_includes_default
"
-if test "x$ac_cv_header_dlfcn_h" = x""yes; then :
+if test "x$ac_cv_header_dlfcn_h" = xyes; then :
cat >>confdefs.h <<_ACEOF
#define HAVE_DLFCN_H 1
_ACEOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for objdir" >&5
$as_echo_n "checking for objdir... " >&6; }
-if test "${lt_cv_objdir+set}" = set; then :
+if ${lt_cv_objdir+:} false; then :
$as_echo_n "(cached) " >&6
else
rm -f .libs 2>/dev/null
if test "$file_magic_cmd" = '$MAGIC_CMD'; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ${ac_tool_prefix}file" >&5
$as_echo_n "checking for ${ac_tool_prefix}file... " >&6; }
-if test "${lt_cv_path_MAGIC_CMD+set}" = set; then :
+if ${lt_cv_path_MAGIC_CMD+:} false; then :
$as_echo_n "(cached) " >&6
else
case $MAGIC_CMD in
if test -n "$ac_tool_prefix"; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for file" >&5
$as_echo_n "checking for file... " >&6; }
-if test "${lt_cv_path_MAGIC_CMD+set}" = set; then :
+if ${lt_cv_path_MAGIC_CMD+:} false; then :
$as_echo_n "(cached) " >&6
else
case $MAGIC_CMD in
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -fno-rtti -fno-exceptions" >&5
$as_echo_n "checking if $compiler supports -fno-rtti -fno-exceptions... " >&6; }
-if test "${lt_cv_prog_compiler_rtti_exceptions+set}" = set; then :
+if ${lt_cv_prog_compiler_rtti_exceptions+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_prog_compiler_rtti_exceptions=no
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5
$as_echo_n "checking for $compiler option to produce PIC... " >&6; }
-if test "${lt_cv_prog_compiler_pic+set}" = set; then :
+if ${lt_cv_prog_compiler_pic+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_prog_compiler_pic=$lt_prog_compiler_pic
if test -n "$lt_prog_compiler_pic"; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic works" >&5
$as_echo_n "checking if $compiler PIC flag $lt_prog_compiler_pic works... " >&6; }
-if test "${lt_cv_prog_compiler_pic_works+set}" = set; then :
+if ${lt_cv_prog_compiler_pic_works+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_prog_compiler_pic_works=no
wl=$lt_prog_compiler_wl eval lt_tmp_static_flag=\"$lt_prog_compiler_static\"
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5
$as_echo_n "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; }
-if test "${lt_cv_prog_compiler_static_works+set}" = set; then :
+if ${lt_cv_prog_compiler_static_works+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_prog_compiler_static_works=no
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5
$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
-if test "${lt_cv_prog_compiler_c_o+set}" = set; then :
+if ${lt_cv_prog_compiler_c_o+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_prog_compiler_c_o=no
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5
$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
-if test "${lt_cv_prog_compiler_c_o+set}" = set; then :
+if ${lt_cv_prog_compiler_c_o+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_prog_compiler_c_o=no
if test "${lt_cv_aix_libpath+set}" = set; then
aix_libpath=$lt_cv_aix_libpath
else
- if test "${lt_cv_aix_libpath_+set}" = set; then :
+ if ${lt_cv_aix_libpath_+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
if test "${lt_cv_aix_libpath+set}" = set; then
aix_libpath=$lt_cv_aix_libpath
else
- if test "${lt_cv_aix_libpath_+set}" = set; then :
+ if ${lt_cv_aix_libpath_+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
# (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP does)
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $CC understands -b" >&5
$as_echo_n "checking if $CC understands -b... " >&6; }
-if test "${lt_cv_prog_compiler__b+set}" = set; then :
+if ${lt_cv_prog_compiler__b+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_prog_compiler__b=no
# This should be the same for all languages, so no per-tag cache variable.
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $host_os linker accepts -exported_symbol" >&5
$as_echo_n "checking whether the $host_os linker accepts -exported_symbol... " >&6; }
-if test "${lt_cv_irix_exported_symbol+set}" = set; then :
+if ${lt_cv_irix_exported_symbol+:} false; then :
$as_echo_n "(cached) " >&6
else
save_LDFLAGS="$LDFLAGS"
# to ld, don't add -lc before -lgcc.
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5
$as_echo_n "checking whether -lc should be explicitly linked in... " >&6; }
-if test "${lt_cv_archive_cmds_need_lc+set}" = set; then :
+if ${lt_cv_archive_cmds_need_lc+:} false; then :
$as_echo_n "(cached) " >&6
else
$RM conftest*
shlibpath_overrides_runpath=no
# Some binutils ld are patched to set DT_RUNPATH
- if test "${lt_cv_shlibpath_overrides_runpath+set}" = set; then :
+ if ${lt_cv_shlibpath_overrides_runpath+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_shlibpath_overrides_runpath=no
# if libdl is installed we need to link against it
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5
$as_echo_n "checking for dlopen in -ldl... " >&6; }
-if test "${ac_cv_lib_dl_dlopen+set}" = set; then :
+if ${ac_cv_lib_dl_dlopen+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5
$as_echo "$ac_cv_lib_dl_dlopen" >&6; }
-if test "x$ac_cv_lib_dl_dlopen" = x""yes; then :
+if test "x$ac_cv_lib_dl_dlopen" = xyes; then :
lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"
else
*)
ac_fn_c_check_func "$LINENO" "shl_load" "ac_cv_func_shl_load"
-if test "x$ac_cv_func_shl_load" = x""yes; then :
+if test "x$ac_cv_func_shl_load" = xyes; then :
lt_cv_dlopen="shl_load"
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for shl_load in -ldld" >&5
$as_echo_n "checking for shl_load in -ldld... " >&6; }
-if test "${ac_cv_lib_dld_shl_load+set}" = set; then :
+if ${ac_cv_lib_dld_shl_load+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_shl_load" >&5
$as_echo "$ac_cv_lib_dld_shl_load" >&6; }
-if test "x$ac_cv_lib_dld_shl_load" = x""yes; then :
+if test "x$ac_cv_lib_dld_shl_load" = xyes; then :
lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld"
else
ac_fn_c_check_func "$LINENO" "dlopen" "ac_cv_func_dlopen"
-if test "x$ac_cv_func_dlopen" = x""yes; then :
+if test "x$ac_cv_func_dlopen" = xyes; then :
lt_cv_dlopen="dlopen"
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" >&5
$as_echo_n "checking for dlopen in -ldl... " >&6; }
-if test "${ac_cv_lib_dl_dlopen+set}" = set; then :
+if ${ac_cv_lib_dl_dlopen+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5
$as_echo "$ac_cv_lib_dl_dlopen" >&6; }
-if test "x$ac_cv_lib_dl_dlopen" = x""yes; then :
+if test "x$ac_cv_lib_dl_dlopen" = xyes; then :
lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for dlopen in -lsvld" >&5
$as_echo_n "checking for dlopen in -lsvld... " >&6; }
-if test "${ac_cv_lib_svld_dlopen+set}" = set; then :
+if ${ac_cv_lib_svld_dlopen+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_svld_dlopen" >&5
$as_echo "$ac_cv_lib_svld_dlopen" >&6; }
-if test "x$ac_cv_lib_svld_dlopen" = x""yes; then :
+if test "x$ac_cv_lib_svld_dlopen" = xyes; then :
lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for dld_link in -ldld" >&5
$as_echo_n "checking for dld_link in -ldld... " >&6; }
-if test "${ac_cv_lib_dld_dld_link+set}" = set; then :
+if ${ac_cv_lib_dld_dld_link+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_dld_link" >&5
$as_echo "$ac_cv_lib_dld_dld_link" >&6; }
-if test "x$ac_cv_lib_dld_dld_link" = x""yes; then :
+if test "x$ac_cv_lib_dld_dld_link" = xyes; then :
lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a program can dlopen itself" >&5
$as_echo_n "checking whether a program can dlopen itself... " >&6; }
-if test "${lt_cv_dlopen_self+set}" = set; then :
+if ${lt_cv_dlopen_self+:} false; then :
$as_echo_n "(cached) " >&6
else
if test "$cross_compiling" = yes; then :
wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS $lt_prog_compiler_static\"
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether a statically linked program can dlopen itself" >&5
$as_echo_n "checking whether a statically linked program can dlopen itself... " >&6; }
-if test "${lt_cv_dlopen_self_static+set}" = set; then :
+if ${lt_cv_dlopen_self_static+:} false; then :
$as_echo_n "(cached) " >&6
else
if test "$cross_compiling" = yes; then :
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to run the C++ preprocessor" >&5
$as_echo_n "checking how to run the C++ preprocessor... " >&6; }
if test -z "$CXXCPP"; then
- if test "${ac_cv_prog_CXXCPP+set}" = set; then :
+ if ${ac_cv_prog_CXXCPP+:} false; then :
$as_echo_n "(cached) " >&6
else
# Double quotes because CXXCPP needs to be expanded
# Broken: fails on valid input.
continue
fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
# OK, works on sane cases. Now check whether nonexistent headers
# can be detected and how.
ac_preproc_ok=:
break
fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
done
# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.i conftest.err conftest.$ac_ext
if $ac_preproc_ok; then :
break
fi
# Broken: fails on valid input.
continue
fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
# OK, works on sane cases. Now check whether nonexistent headers
# can be detected and how.
ac_preproc_ok=:
break
fi
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.err conftest.i conftest.$ac_ext
done
# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
-rm -f conftest.err conftest.$ac_ext
+rm -f conftest.i conftest.err conftest.$ac_ext
if $ac_preproc_ok; then :
else
{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error "C++ preprocessor \"$CXXCPP\" fails sanity check
-See \`config.log' for more details." "$LINENO" 5; }
+as_fn_error $? "C++ preprocessor \"$CXXCPP\" fails sanity check
+See \`config.log' for more details" "$LINENO" 5; }
fi
ac_ext=c
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for non-GNU ld" >&5
$as_echo_n "checking for non-GNU ld... " >&6; }
fi
-if test "${lt_cv_path_LD+set}" = set; then :
+if ${lt_cv_path_LD+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -z "$LD"; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
fi
-test -z "$LD" && as_fn_error "no acceptable ld found in \$PATH" "$LINENO" 5
+test -z "$LD" && as_fn_error $? "no acceptable ld found in \$PATH" "$LINENO" 5
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the linker ($LD) is GNU ld" >&5
$as_echo_n "checking if the linker ($LD) is GNU ld... " >&6; }
-if test "${lt_cv_prog_gnu_ld+set}" = set; then :
+if ${lt_cv_prog_gnu_ld+:} false; then :
$as_echo_n "(cached) " >&6
else
# I'd rather use --version here, but apparently some GNU lds only accept -v.
if test "${lt_cv_aix_libpath+set}" = set; then
aix_libpath=$lt_cv_aix_libpath
else
- if test "${lt_cv_aix_libpath__CXX+set}" = set; then :
+ if ${lt_cv_aix_libpath__CXX+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
if test "${lt_cv_aix_libpath+set}" = set; then
aix_libpath=$lt_cv_aix_libpath
else
- if test "${lt_cv_aix_libpath__CXX+set}" = set; then :
+ if ${lt_cv_aix_libpath__CXX+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5
$as_echo_n "checking for $compiler option to produce PIC... " >&6; }
-if test "${lt_cv_prog_compiler_pic_CXX+set}" = set; then :
+if ${lt_cv_prog_compiler_pic_CXX+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_prog_compiler_pic_CXX=$lt_prog_compiler_pic_CXX
if test -n "$lt_prog_compiler_pic_CXX"; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic_CXX works" >&5
$as_echo_n "checking if $compiler PIC flag $lt_prog_compiler_pic_CXX works... " >&6; }
-if test "${lt_cv_prog_compiler_pic_works_CXX+set}" = set; then :
+if ${lt_cv_prog_compiler_pic_works_CXX+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_prog_compiler_pic_works_CXX=no
wl=$lt_prog_compiler_wl_CXX eval lt_tmp_static_flag=\"$lt_prog_compiler_static_CXX\"
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5
$as_echo_n "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; }
-if test "${lt_cv_prog_compiler_static_works_CXX+set}" = set; then :
+if ${lt_cv_prog_compiler_static_works_CXX+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_prog_compiler_static_works_CXX=no
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5
$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
-if test "${lt_cv_prog_compiler_c_o_CXX+set}" = set; then :
+if ${lt_cv_prog_compiler_c_o_CXX+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_prog_compiler_c_o_CXX=no
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5
$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
-if test "${lt_cv_prog_compiler_c_o_CXX+set}" = set; then :
+if ${lt_cv_prog_compiler_c_o_CXX+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_prog_compiler_c_o_CXX=no
# to ld, don't add -lc before -lgcc.
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5
$as_echo_n "checking whether -lc should be explicitly linked in... " >&6; }
-if test "${lt_cv_archive_cmds_need_lc_CXX+set}" = set; then :
+if ${lt_cv_archive_cmds_need_lc_CXX+:} false; then :
$as_echo_n "(cached) " >&6
else
$RM conftest*
shlibpath_overrides_runpath=no
# Some binutils ld are patched to set DT_RUNPATH
- if test "${lt_cv_shlibpath_overrides_runpath+set}" = set; then :
+ if ${lt_cv_shlibpath_overrides_runpath+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_shlibpath_overrides_runpath=no
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $compiler option to produce PIC" >&5
$as_echo_n "checking for $compiler option to produce PIC... " >&6; }
-if test "${lt_cv_prog_compiler_pic_F77+set}" = set; then :
+if ${lt_cv_prog_compiler_pic_F77+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_prog_compiler_pic_F77=$lt_prog_compiler_pic_F77
if test -n "$lt_prog_compiler_pic_F77"; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag $lt_prog_compiler_pic_F77 works" >&5
$as_echo_n "checking if $compiler PIC flag $lt_prog_compiler_pic_F77 works... " >&6; }
-if test "${lt_cv_prog_compiler_pic_works_F77+set}" = set; then :
+if ${lt_cv_prog_compiler_pic_works_F77+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_prog_compiler_pic_works_F77=no
wl=$lt_prog_compiler_wl_F77 eval lt_tmp_static_flag=\"$lt_prog_compiler_static_F77\"
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler static flag $lt_tmp_static_flag works" >&5
$as_echo_n "checking if $compiler static flag $lt_tmp_static_flag works... " >&6; }
-if test "${lt_cv_prog_compiler_static_works_F77+set}" = set; then :
+if ${lt_cv_prog_compiler_static_works_F77+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_prog_compiler_static_works_F77=no
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5
$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
-if test "${lt_cv_prog_compiler_c_o_F77+set}" = set; then :
+if ${lt_cv_prog_compiler_c_o_F77+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_prog_compiler_c_o_F77=no
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if $compiler supports -c -o file.$ac_objext" >&5
$as_echo_n "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
-if test "${lt_cv_prog_compiler_c_o_F77+set}" = set; then :
+if ${lt_cv_prog_compiler_c_o_F77+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_prog_compiler_c_o_F77=no
if test "${lt_cv_aix_libpath+set}" = set; then
aix_libpath=$lt_cv_aix_libpath
else
- if test "${lt_cv_aix_libpath__F77+set}" = set; then :
+ if ${lt_cv_aix_libpath__F77+:} false; then :
$as_echo_n "(cached) " >&6
else
cat > conftest.$ac_ext <<_ACEOF
if test "${lt_cv_aix_libpath+set}" = set; then
aix_libpath=$lt_cv_aix_libpath
else
- if test "${lt_cv_aix_libpath__F77+set}" = set; then :
+ if ${lt_cv_aix_libpath__F77+:} false; then :
$as_echo_n "(cached) " >&6
else
cat > conftest.$ac_ext <<_ACEOF
# This should be the same for all languages, so no per-tag cache variable.
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the $host_os linker accepts -exported_symbol" >&5
$as_echo_n "checking whether the $host_os linker accepts -exported_symbol... " >&6; }
-if test "${lt_cv_irix_exported_symbol+set}" = set; then :
+if ${lt_cv_irix_exported_symbol+:} false; then :
$as_echo_n "(cached) " >&6
else
save_LDFLAGS="$LDFLAGS"
# to ld, don't add -lc before -lgcc.
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether -lc should be explicitly linked in" >&5
$as_echo_n "checking whether -lc should be explicitly linked in... " >&6; }
-if test "${lt_cv_archive_cmds_need_lc_F77+set}" = set; then :
+if ${lt_cv_archive_cmds_need_lc_F77+:} false; then :
$as_echo_n "(cached) " >&6
else
$RM conftest*
shlibpath_overrides_runpath=no
# Some binutils ld are patched to set DT_RUNPATH
- if test "${lt_cv_shlibpath_overrides_runpath+set}" = set; then :
+ if ${lt_cv_shlibpath_overrides_runpath+:} false; then :
$as_echo_n "(cached) " >&6
else
lt_cv_shlibpath_overrides_runpath=no
if test "$enable_shared" = yes && test "$enable_static" = yes; then
case $library_names_spec in
*libname.a*)
- as_fn_error "cannot create both shared and static libraries on this system, --disable one of the two" "$LINENO" 5
+ as_fn_error $? "cannot create both shared and static libraries on this system, --disable one of the two" "$LINENO" 5
;;
esac
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ANSI C header files" >&5
$as_echo_n "checking for ANSI C header files... " >&6; }
-if test "${ac_cv_header_stdc+set}" = set; then :
+if ${ac_cv_header_stdc+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether time.h and sys/time.h may both be included" >&5
$as_echo_n "checking whether time.h and sys/time.h may both be included... " >&6; }
-if test "${ac_cv_header_time+set}" = set; then :
+if ${ac_cv_header_time+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
do :
as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"
-eval as_val=\$$as_ac_Header
- if test "x$as_val" = x""yes; then :
+if eval test \"x\$"$as_ac_Header"\" = x"yes"; then :
cat >>confdefs.h <<_ACEOF
#define `$as_echo "HAVE_$ac_header" | $as_tr_cpp` 1
_ACEOF
# endif
#endif
"
-if test "x$ac_cv_header_sys_resource_h" = x""yes; then :
+if test "x$ac_cv_header_sys_resource_h" = xyes; then :
cat >>confdefs.h <<_ACEOF
#define HAVE_SYS_RESOURCE_H 1
_ACEOF
# include <sys/param.h>
#endif
"
-if test "x$ac_cv_header_sys_sysctl_h" = x""yes; then :
+if test "x$ac_cv_header_sys_sysctl_h" = xyes; then :
cat >>confdefs.h <<_ACEOF
#define HAVE_SYS_SYSCTL_H 1
_ACEOF
# include <sys/sysinfo.h>
#endif
"
-if test "x$ac_cv_header_machine_hal_sysinfo_h" = x""yes; then :
+if test "x$ac_cv_header_machine_hal_sysinfo_h" = xyes; then :
cat >>confdefs.h <<_ACEOF
#define HAVE_MACHINE_HAL_SYSINFO_H 1
_ACEOF
# to the man page (but aren't), in glibc they're in stdio.h.
#
ac_fn_c_check_decl "$LINENO" "fgetc" "ac_cv_have_decl_fgetc" "$ac_includes_default"
-if test "x$ac_cv_have_decl_fgetc" = x""yes; then :
+if test "x$ac_cv_have_decl_fgetc" = xyes; then :
ac_have_decl=1
else
ac_have_decl=0
#define HAVE_DECL_FGETC $ac_have_decl
_ACEOF
ac_fn_c_check_decl "$LINENO" "fscanf" "ac_cv_have_decl_fscanf" "$ac_includes_default"
-if test "x$ac_cv_have_decl_fscanf" = x""yes; then :
+if test "x$ac_cv_have_decl_fscanf" = xyes; then :
ac_have_decl=1
else
ac_have_decl=0
#define HAVE_DECL_FSCANF $ac_have_decl
_ACEOF
ac_fn_c_check_decl "$LINENO" "optarg" "ac_cv_have_decl_optarg" "$ac_includes_default"
-if test "x$ac_cv_have_decl_optarg" = x""yes; then :
+if test "x$ac_cv_have_decl_optarg" = xyes; then :
ac_have_decl=1
else
ac_have_decl=0
#define HAVE_DECL_OPTARG $ac_have_decl
_ACEOF
ac_fn_c_check_decl "$LINENO" "ungetc" "ac_cv_have_decl_ungetc" "$ac_includes_default"
-if test "x$ac_cv_have_decl_ungetc" = x""yes; then :
+if test "x$ac_cv_have_decl_ungetc" = xyes; then :
ac_have_decl=1
else
ac_have_decl=0
#define HAVE_DECL_UNGETC $ac_have_decl
_ACEOF
ac_fn_c_check_decl "$LINENO" "vfprintf" "ac_cv_have_decl_vfprintf" "$ac_includes_default"
-if test "x$ac_cv_have_decl_vfprintf" = x""yes; then :
+if test "x$ac_cv_have_decl_vfprintf" = xyes; then :
ac_have_decl=1
else
ac_have_decl=0
ac_fn_c_check_decl "$LINENO" "sys_errlist" "ac_cv_have_decl_sys_errlist" "#include <stdio.h>
#include <errno.h>
"
-if test "x$ac_cv_have_decl_sys_errlist" = x""yes; then :
+if test "x$ac_cv_have_decl_sys_errlist" = xyes; then :
ac_have_decl=1
else
ac_have_decl=0
ac_fn_c_check_decl "$LINENO" "sys_nerr" "ac_cv_have_decl_sys_nerr" "#include <stdio.h>
#include <errno.h>
"
-if test "x$ac_cv_have_decl_sys_nerr" = x""yes; then :
+if test "x$ac_cv_have_decl_sys_nerr" = xyes; then :
ac_have_decl=1
else
ac_have_decl=0
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking return type of signal handlers" >&5
$as_echo_n "checking return type of signal handlers... " >&6; }
-if test "${ac_cv_type_signal+set}" = set; then :
+if ${ac_cv_type_signal+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
# the default includes are sufficient for all these types
#
ac_fn_c_check_type "$LINENO" "intmax_t" "ac_cv_type_intmax_t" "$ac_includes_default"
-if test "x$ac_cv_type_intmax_t" = x""yes; then :
+if test "x$ac_cv_type_intmax_t" = xyes; then :
cat >>confdefs.h <<_ACEOF
#define HAVE_INTMAX_T 1
fi
ac_fn_c_check_type "$LINENO" "long double" "ac_cv_type_long_double" "$ac_includes_default"
-if test "x$ac_cv_type_long_double" = x""yes; then :
+if test "x$ac_cv_type_long_double" = xyes; then :
cat >>confdefs.h <<_ACEOF
#define HAVE_LONG_DOUBLE 1
fi
ac_fn_c_check_type "$LINENO" "long long" "ac_cv_type_long_long" "$ac_includes_default"
-if test "x$ac_cv_type_long_long" = x""yes; then :
+if test "x$ac_cv_type_long_long" = xyes; then :
cat >>confdefs.h <<_ACEOF
#define HAVE_LONG_LONG 1
fi
ac_fn_c_check_type "$LINENO" "ptrdiff_t" "ac_cv_type_ptrdiff_t" "$ac_includes_default"
-if test "x$ac_cv_type_ptrdiff_t" = x""yes; then :
+if test "x$ac_cv_type_ptrdiff_t" = xyes; then :
cat >>confdefs.h <<_ACEOF
#define HAVE_PTRDIFF_T 1
fi
ac_fn_c_check_type "$LINENO" "quad_t" "ac_cv_type_quad_t" "$ac_includes_default"
-if test "x$ac_cv_type_quad_t" = x""yes; then :
+if test "x$ac_cv_type_quad_t" = xyes; then :
cat >>confdefs.h <<_ACEOF
#define HAVE_QUAD_T 1
fi
ac_fn_c_check_type "$LINENO" "uint_least32_t" "ac_cv_type_uint_least32_t" "$ac_includes_default"
-if test "x$ac_cv_type_uint_least32_t" = x""yes; then :
+if test "x$ac_cv_type_uint_least32_t" = xyes; then :
cat >>confdefs.h <<_ACEOF
#define HAVE_UINT_LEAST32_T 1
fi
ac_fn_c_check_type "$LINENO" "intptr_t" "ac_cv_type_intptr_t" "$ac_includes_default"
-if test "x$ac_cv_type_intptr_t" = x""yes; then :
+if test "x$ac_cv_type_intptr_t" = xyes; then :
cat >>confdefs.h <<_ACEOF
#define HAVE_INTPTR_T 1
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for preprocessor stringizing operator" >&5
$as_echo_n "checking for preprocessor stringizing operator... " >&6; }
-if test "${ac_cv_c_stringize+set}" = set; then :
+if ${ac_cv_c_stringize+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
# But we don't use it in C++ currently.
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for working volatile" >&5
$as_echo_n "checking for working volatile... " >&6; }
-if test "${ac_cv_c_volatile+set}" = set; then :
+if ${ac_cv_c_volatile+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C/C++ restrict keyword" >&5
$as_echo_n "checking for C/C++ restrict keyword... " >&6; }
-if test "${ac_cv_c_restrict+set}" = set; then :
+if ${ac_cv_c_restrict+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_cv_c_restrict=no
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether <stdarg.h> exists and works" >&5
$as_echo_n "checking whether <stdarg.h> exists and works... " >&6; }
-if test "${gmp_cv_c_stdarg+set}" = set; then :
+if ${gmp_cv_c_stdarg+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether gcc __attribute__ ((const)) works" >&5
$as_echo_n "checking whether gcc __attribute__ ((const)) works... " >&6; }
-if test "${gmp_cv_c_attribute_const+set}" = set; then :
+if ${gmp_cv_c_attribute_const+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether gcc __attribute__ ((malloc)) works" >&5
$as_echo_n "checking whether gcc __attribute__ ((malloc)) works... " >&6; }
-if test "${gmp_cv_c_attribute_malloc+set}" = set; then :
+if ${gmp_cv_c_attribute_malloc+:} false; then :
$as_echo_n "(cached) " >&6
else
cat >conftest.c <<EOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether gcc __attribute__ ((mode (XX))) works" >&5
$as_echo_n "checking whether gcc __attribute__ ((mode (XX))) works... " >&6; }
-if test "${gmp_cv_c_attribute_mode+set}" = set; then :
+if ${gmp_cv_c_attribute_mode+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether gcc __attribute__ ((noreturn)) works" >&5
$as_echo_n "checking whether gcc __attribute__ ((noreturn)) works... " >&6; }
-if test "${gmp_cv_c_attribute_noreturn+set}" = set; then :
+if ${gmp_cv_c_attribute_noreturn+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for inline" >&5
$as_echo_n "checking for inline... " >&6; }
-if test "${ac_cv_c_inline+set}" = set; then :
+if ${ac_cv_c_inline+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_cv_c_inline=no
*-ncr-sysv4.3*)
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mwvalidcheckl in -lmw" >&5
$as_echo_n "checking for _mwvalidcheckl in -lmw... " >&6; }
-if test "${ac_cv_lib_mw__mwvalidcheckl+set}" = set; then :
+if ${ac_cv_lib_mw__mwvalidcheckl+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mw__mwvalidcheckl" >&5
$as_echo "$ac_cv_lib_mw__mwvalidcheckl" >&6; }
-if test "x$ac_cv_lib_mw__mwvalidcheckl" = x""yes; then :
+if test "x$ac_cv_lib_mw__mwvalidcheckl" = xyes; then :
LIBM="-lmw"
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for cos in -lm" >&5
$as_echo_n "checking for cos in -lm... " >&6; }
-if test "${ac_cv_lib_m_cos+set}" = set; then :
+if ${ac_cv_lib_m_cos+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_m_cos" >&5
$as_echo "$ac_cv_lib_m_cos" >&6; }
-if test "x$ac_cv_lib_m_cos" = x""yes; then :
+if test "x$ac_cv_lib_m_cos" = xyes; then :
LIBM="$LIBM -lm"
fi
*)
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for cos in -lm" >&5
$as_echo_n "checking for cos in -lm... " >&6; }
-if test "${ac_cv_lib_m_cos+set}" = set; then :
+if ${ac_cv_lib_m_cos+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_m_cos" >&5
$as_echo "$ac_cv_lib_m_cos" >&6; }
-if test "x$ac_cv_lib_m_cos" = x""yes; then :
+if test "x$ac_cv_lib_m_cos" = xyes; then :
LIBM="-lm"
fi
# for constant arguments. Useless!
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for working alloca.h" >&5
$as_echo_n "checking for working alloca.h... " >&6; }
-if test "${gmp_cv_header_alloca+set}" = set; then :
+if ${gmp_cv_header_alloca+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for alloca (via gmp-impl.h)" >&5
$as_echo_n "checking for alloca (via gmp-impl.h)... " >&6; }
-if test "${gmp_cv_func_alloca+set}" = set; then :
+if ${gmp_cv_func_alloca+:} false; then :
$as_echo_n "(cached) " >&6
else
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to allocate temporary memory" >&5
$as_echo_n "checking how to allocate temporary memory... " >&6; }
-if test "${gmp_cv_option_alloca+set}" = set; then :
+if ${gmp_cv_option_alloca+:} false; then :
$as_echo_n "(cached) " >&6
else
case $enable_alloca in
case $gmp_cv_option_alloca in
alloca)
if test $gmp_cv_func_alloca = no; then
- as_fn_error "--enable-alloca=alloca specified, but alloca not available" "$LINENO" 5
+ as_fn_error $? "--enable-alloca=alloca specified, but alloca not available" "$LINENO" 5
fi
$as_echo "#define WANT_TMP_ALLOCA 1" >>confdefs.h
;;
*)
# checks at the start of configure.in should protect us
- as_fn_error "unrecognised --enable-alloca=$gmp_cv_option_alloca" "$LINENO" 5
+ as_fn_error $? "unrecognised --enable-alloca=$gmp_cv_option_alloca" "$LINENO" 5
;;
esac
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether byte ordering is bigendian" >&5
$as_echo_n "checking whether byte ordering is bigendian... " >&6; }
-if test "${ac_cv_c_bigendian+set}" = set; then :
+if ${ac_cv_c_bigendian+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_cv_c_bigendian=unknown
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking format of \`double' floating point" >&5
$as_echo_n "checking format of \`double' floating point... " >&6; }
-if test "${gmp_cv_c_double_format+set}" = set; then :
+if ${gmp_cv_c_double_format+:} false; then :
$as_echo_n "(cached) " >&6
else
gmp_cv_c_double_format=unknown
# syssgi - IRIX specific
# times - not in mingw
#
-# clock_gettime is in librt on *-*-osf5.1. We could look for it
-# there, but that's not worth bothering with unless it has a decent
-# resolution (in a quick test clock_getres said only 1 millisecond).
-#
# AC_FUNC_STRNLEN is not used because we don't want the AC_LIBOBJ
# replacement setups it gives. It detects a faulty strnlen on AIX, but
# missing out on that test is ok since our only use of strnlen is in
# __gmp_replacement_vsnprintf which is not required on AIX since it has a
# vsnprintf.
#
-for ac_func in alarm attr_get clock clock_gettime cputime getpagesize getrusage gettimeofday getsysinfo localeconv memset mmap mprotect nl_langinfo obstack_vprintf popen processor_info pstat_getprocessor raise read_real_time sigaction sigaltstack sigstack syssgi strchr strerror strnlen strtol strtoul sysconf sysctl sysctlbyname times
+for ac_func in alarm attr_get clock cputime getpagesize getrusage gettimeofday getsysinfo localeconv memset mmap mprotect nl_langinfo obstack_vprintf popen processor_info pstat_getprocessor raise read_real_time sigaction sigaltstack sigstack syssgi strchr strerror strnlen strtol strtoul sysconf sysctl sysctlbyname times
do :
as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
-eval as_val=\$$as_ac_var
- if test "x$as_val" = x""yes; then :
+if eval test \"x\$"$as_ac_var"\" = x"yes"; then :
cat >>confdefs.h <<_ACEOF
#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1
_ACEOF
done
+# clock_gettime is in librt on *-*-osf5.1 and on glibc, so att -lrt to
+# TUNE_LIBS if needed. On linux (tested on x86_32, 2.6.26),
+# clock_getres reports ns accuracy, while in a quick test on osf
+# clock_getres said only 1 millisecond.
+
+old_LIBS="$LIBS"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing clock_gettime" >&5
+$as_echo_n "checking for library containing clock_gettime... " >&6; }
+if ${ac_cv_search_clock_gettime+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char clock_gettime ();
+int
+main ()
+{
+return clock_gettime ();
+ ;
+ return 0;
+}
+_ACEOF
+for ac_lib in '' rt; do
+ if test -z "$ac_lib"; then
+ ac_res="none required"
+ else
+ ac_res=-l$ac_lib
+ LIBS="-l$ac_lib $ac_func_search_save_LIBS"
+ fi
+ if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_search_clock_gettime=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext
+ if ${ac_cv_search_clock_gettime+:} false; then :
+ break
+fi
+done
+if ${ac_cv_search_clock_gettime+:} false; then :
+
+else
+ ac_cv_search_clock_gettime=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_clock_gettime" >&5
+$as_echo "$ac_cv_search_clock_gettime" >&6; }
+ac_res=$ac_cv_search_clock_gettime
+if test "$ac_res" != no; then :
+ test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+
+$as_echo "#define HAVE_CLOCK_GETTIME 1" >>confdefs.h
+
+fi
+
+TUNE_LIBS="$LIBS"
+LIBS="$old_LIBS"
+
+
+
ac_fn_c_check_func "$LINENO" "vsnprintf" "ac_cv_func_vsnprintf"
-if test "x$ac_cv_func_vsnprintf" = x""yes; then :
+if test "x$ac_cv_func_vsnprintf" = xyes; then :
gmp_vsnprintf_exists=yes
else
gmp_vsnprintf_exists=no
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether vsnprintf works" >&5
$as_echo_n "checking whether vsnprintf works... " >&6; }
-if test "${gmp_cv_func_vsnprintf+set}" = set; then :
+if ${gmp_cv_func_vsnprintf+:} false; then :
$as_echo_n "(cached) " >&6
else
gmp_cv_func_vsnprintf=yes
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether sscanf needs writable input" >&5
$as_echo_n "checking whether sscanf needs writable input... " >&6; }
-if test "${gmp_cv_func_sscanf_writable_input+set}" = set; then :
+if ${gmp_cv_func_sscanf_writable_input+:} false; then :
$as_echo_n "(cached) " >&6
else
case $host in
$as_echo "#define SSCANF_WRITABLE_INPUT 1" >>confdefs.h
;;
no) ;;
- *) as_fn_error "unrecognised \$gmp_cv_func_sscanf_writable_input" "$LINENO" 5 ;;
+ *) as_fn_error $? "unrecognised \$gmp_cv_func_sscanf_writable_input" "$LINENO" 5 ;;
esac
#
ac_fn_c_check_member "$LINENO" "struct pst_processor" "psp_iticksperclktick" "ac_cv_member_struct_pst_processor_psp_iticksperclktick" "#include <sys/pstat.h>
"
-if test "x$ac_cv_member_struct_pst_processor_psp_iticksperclktick" = x""yes; then :
+if test "x$ac_cv_member_struct_pst_processor_psp_iticksperclktick" = xyes; then :
$as_echo "#define HAVE_PSP_ITICKSPERCLKTICK 1" >>confdefs.h
for ac_header in sstream
do :
ac_fn_cxx_check_header_mongrel "$LINENO" "sstream" "ac_cv_header_sstream" "$ac_includes_default"
-if test "x$ac_cv_header_sstream" = x""yes; then :
+if test "x$ac_cv_header_sstream" = xyes; then :
cat >>confdefs.h <<_ACEOF
#define HAVE_SSTREAM 1
_ACEOF
ac_fn_cxx_check_type "$LINENO" "std::locale" "ac_cv_type_std__locale" "#include <locale>
"
-if test "x$ac_cv_type_std__locale" = x""yes; then :
+if test "x$ac_cv_type_std__locale" = xyes; then :
cat >>confdefs.h <<_ACEOF
#define HAVE_STD__LOCALE 1
# divrem_1 and pre_divrem_1.
gmp_mpn_functions_optional="umul udiv \
- invert_limb sqr_diagonal \
- mul_2 mul_3 mul_4 \
+ invert_limb sqr_diagonal sqr_diag_addlsh1 \
+ mul_2 mul_3 mul_4 mul_5 mul_6 \
addmul_2 addmul_3 addmul_4 addmul_5 addmul_6 addmul_7 addmul_8 \
addlsh1_n sublsh1_n rsblsh1_n rsh1add_n rsh1sub_n \
addlsh2_n sublsh2_n rsblsh2_n \
add_n_sub_n addaddmul_1msb0"
gmp_mpn_functions="$extra_functions \
- add add_1 add_n sub sub_1 sub_n neg com mul_1 addmul_1 \
- submul_1 lshift rshift dive_1 diveby3 divis divrem divrem_1 divrem_2 \
+ add add_1 add_n sub sub_1 sub_n addcnd_n subcnd_n neg com \
+ mul_1 addmul_1 submul_1 \
+ add_err1_n add_err2_n add_err3_n sub_err1_n sub_err2_n sub_err3_n \
+ lshift rshift dive_1 diveby3 divis divrem divrem_1 divrem_2 \
fib2_ui mod_1 mod_34lsub1 mode1o pre_divrem_1 pre_mod_1 dump \
mod_1_1 mod_1_2 mod_1_3 mod_1_4 lshiftc \
mul mul_fft mul_n sqr mul_basecase sqr_basecase nussbaumer_mul \
+ mulmid_basecase toom42_mulmid mulmid_n mulmid \
random random2 pow_1 \
rootrem sqrtrem get_str set_str scan0 scan1 popcount hamdist cmp \
perfsqr perfpow \
- gcd_1 gcd gcdext_1 gcdext gcd_lehmer gcd_subdiv_step \
- gcdext_lehmer gcdext_subdiv_step \
- div_q tdiv_qr jacbase get_d \
- matrix22_mul hgcd2 hgcd mullo_n mullo_basecase \
+ gcd_1 gcd gcdext_1 gcdext gcd_subdiv_step \
+ gcdext_lehmer \
+ div_q tdiv_qr jacbase jacobi_2 jacobi get_d \
+ matrix22_mul matrix22_mul1_inverse_vector \
+ hgcd_matrix hgcd2 hgcd_step hgcd_reduce hgcd hgcd_appr \
+ hgcd2_jacobi hgcd_jacobi \
+ mullo_n mullo_basecase \
toom22_mul toom32_mul toom42_mul toom52_mul toom62_mul \
- toom33_mul toom43_mul toom53_mul toom63_mul \
+ toom33_mul toom43_mul toom53_mul toom54_mul toom63_mul \
toom44_mul \
toom6h_mul toom6_sqr toom8h_mul toom8_sqr \
toom_couple_handling \
toom2_sqr toom3_sqr toom4_sqr \
- toom_eval_dgr3_pm1 toom_eval_dgr3_pm2 \
+ toom_eval_dgr3_pm1 toom_eval_dgr3_pm2 \
toom_eval_pm1 toom_eval_pm2 toom_eval_pm2exp toom_eval_pm2rexp \
toom_interpolate_5pts toom_interpolate_6pts toom_interpolate_7pts \
toom_interpolate_8pts toom_interpolate_12pts toom_interpolate_16pts \
invertappr invert binvert mulmod_bnm1 sqrmod_bnm1 \
+ div_qr_2 div_qr_2n_pi1 div_qr_2u_pi1 \
sbpi1_div_q sbpi1_div_qr sbpi1_divappr_q \
dcpi1_div_q dcpi1_div_qr dcpi1_divappr_q \
mu_div_qr mu_divappr_q mu_div_q \
sbpi1_bdiv_q sbpi1_bdiv_qr \
dcpi1_bdiv_q dcpi1_bdiv_qr \
mu_bdiv_q mu_bdiv_qr \
- bdiv_q bdiv_qr \
- divexact bdiv_dbm1c redc_1 redc_2 redc_n powm powlo powm_sec subcnd_n \
- redc_1_sec trialdiv remove \
+ bdiv_q bdiv_qr broot brootinv bsqrt bsqrtinv \
+ divexact bdiv_dbm1c redc_1 redc_2 redc_n powm powlo powm_sec \
+ sb_div_qr_sec sb_div_r_sec sbpi1_div_qr_sec sbpi1_div_r_sec \
+ trialdiv remove \
and_n andn_n nand_n ior_n iorn_n nior_n xor_n xnor_n \
- copyi copyd zero \
+ copyi copyd zero tabselect \
+ comb_tables \
$gmp_mpn_functions_optional"
pre_divrem_1) tmp_fbase=preinv_divrem_1 ;;
mode1o) tmp_fbase=modexact_1c_odd ;;
pre_mod_1) tmp_fbase=preinv_mod_1 ;;
+ mod_1_1) tmp_fbase=mod_1_1p ;;
+ mod_1_1_cps) tmp_fbase=mod_1_1p_cps ;;
+ mod_1_2) tmp_fbase=mod_1s_2p ;;
+ mod_1_2_cps) tmp_fbase=mod_1s_2p_cps ;;
+ mod_1_3) tmp_fbase=mod_1s_3p ;;
+ mod_1_3_cps) tmp_fbase=mod_1s_3p_cps ;;
+ mod_1_4) tmp_fbase=mod_1s_4p ;;
+ mod_1_4_cps) tmp_fbase=mod_1s_4p_cps ;;
*) tmp_fbase=$tmp_fn ;;
esac
pre_divrem_1) tmp_fbase=preinv_divrem_1 ;;
mode1o) tmp_fbase=modexact_1c_odd ;;
pre_mod_1) tmp_fbase=preinv_mod_1 ;;
+ mod_1_1) tmp_fbase=mod_1_1p ;;
+ mod_1_1_cps) tmp_fbase=mod_1_1p_cps ;;
+ mod_1_2) tmp_fbase=mod_1s_2p ;;
+ mod_1_2_cps) tmp_fbase=mod_1s_2p_cps ;;
+ mod_1_3) tmp_fbase=mod_1s_3p ;;
+ mod_1_3_cps) tmp_fbase=mod_1s_3p_cps ;;
+ mod_1_4) tmp_fbase=mod_1s_4p ;;
+ mod_1_4_cps) tmp_fbase=mod_1s_4p_cps ;;
*) tmp_fbase=$tmp_fn ;;
esac
pre_divrem_1) tmp_fbase=preinv_divrem_1 ;;
mode1o) tmp_fbase=modexact_1c_odd ;;
pre_mod_1) tmp_fbase=preinv_mod_1 ;;
+ mod_1_1) tmp_fbase=mod_1_1p ;;
+ mod_1_1_cps) tmp_fbase=mod_1_1p_cps ;;
+ mod_1_2) tmp_fbase=mod_1s_2p ;;
+ mod_1_2_cps) tmp_fbase=mod_1s_2p_cps ;;
+ mod_1_3) tmp_fbase=mod_1s_3p ;;
+ mod_1_3_cps) tmp_fbase=mod_1s_3p_cps ;;
+ mod_1_4) tmp_fbase=mod_1s_4p ;;
+ mod_1_4_cps) tmp_fbase=mod_1s_4p_cps ;;
*) tmp_fbase=$tmp_fn ;;
esac
tmp_mulfunc=
case $tmp_fn in
add_n|sub_n) tmp_mulfunc="aors_n" ;;
+ add_err1_n|sub_err1_n)
+ tmp_mulfunc="aors_err1_n" ;;
+ add_err2_n|sub_err2_n)
+ tmp_mulfunc="aors_err2_n" ;;
+ add_err3_n|sub_err3_n)
+ tmp_mulfunc="aors_err3_n" ;;
+ addcnd_n|subcnd_n) tmp_mulfunc="aorscnd_n" ;;
addmul_1|submul_1) tmp_mulfunc="aorsmul_1" ;;
popcount|hamdist) tmp_mulfunc="popham" ;;
and_n|andn_n|nand_n | ior_n|iorn_n|nior_n | xor_n|xnor_n)
tmp_mulfunc="aorrlsh_n sorrlsh_n";;
rsh1add_n|rsh1sub_n)
tmp_mulfunc="rsh1aors_n";;
+ sb_div_qr_sec|sb_div_r_sec)
+ tmp_mulfunc="sb_div_sec";;
+ sbpi1_div_qr_sec|sbpi1_div_r_sec)
+ tmp_mulfunc="sbpi1_div_sec";;
esac
tmp_file=$srcdir/mpn/$tmp_dir/$tmp_base.$tmp_ext
if test -f $tmp_file; then
+ # If the host uses a non-standard ABI, check if tmp_file supports it
+ #
+ if test -n "$GMP_NONSTD_ABI" && test $tmp_ext != "c"; then
+ abi=`sed -n 's/^[ ]*ABI_SUPPORT(\(.*\))/\1/p' $tmp_file `
+ if echo "$abi" | grep -q "\\b${GMP_NONSTD_ABI}\\b"; then
+ true
+ else
+ continue
+ fi
+ fi
+
mpn_objects="$mpn_objects ${tmp_prefix}_$tmp_fn.lo"
mpn_objs_in_libgmp="$mpn_objs_in_libgmp mpn/${tmp_prefix}_$tmp_fn.lo"
pre_divrem_1) tmp_fbase=preinv_divrem_1 ;;
mode1o) tmp_fbase=modexact_1c_odd ;;
pre_mod_1) tmp_fbase=preinv_mod_1 ;;
+ mod_1_1) tmp_fbase=mod_1_1p ;;
+ mod_1_1_cps) tmp_fbase=mod_1_1p_cps ;;
+ mod_1_2) tmp_fbase=mod_1s_2p ;;
+ mod_1_2_cps) tmp_fbase=mod_1s_2p_cps ;;
+ mod_1_3) tmp_fbase=mod_1s_3p ;;
+ mod_1_3_cps) tmp_fbase=mod_1s_3p_cps ;;
+ mod_1_4) tmp_fbase=mod_1s_4p ;;
+ mod_1_4_cps) tmp_fbase=mod_1s_4p_cps ;;
*) tmp_fbase=$tmp_fn ;;
esac
define(__gmpn_$tmp_fbase, __gmpn_${tmp_fbase}_$tmp_suffix)
define(__gmpn_$tmp_fbasec,__gmpn_${tmp_fbasec}_${tmp_suffix})
define(__gmpn_preinv_${tmp_fbase},__gmpn_preinv_${tmp_fbase}_${tmp_suffix})
+define(__gmpn_${tmp_fbase}_cps,__gmpn_${tmp_fbase}_cps_${tmp_suffix})
$tmp_d_n_l For k6 and k7 gcd_1 calling their corresponding mpn_modexact_1_odd
ifdef(\`__gmpn_modexact_1_odd',,
#define __gmpn_$tmp_fbase __gmpn_${tmp_fbase}_$tmp_suffix
#define __gmpn_$tmp_fbasec __gmpn_${tmp_fbasec}_${tmp_suffix}
#define __gmpn_preinv_${tmp_fbase} __gmpn_preinv_${tmp_fbase}_${tmp_suffix}
+#define __gmpn_${tmp_fbase}_cps __gmpn_${tmp_fbase}_cps_${tmp_suffix}
#include \"$mpn_relative_top_srcdir/mpn/$tmp_dir/$tmp_base.c\"
" >mpn/${tmp_prefix}_$tmp_fn.c
if grep "^PROLOGUE(mpn_preinv_$tmp_fn)" $tmp_file >/dev/null; then
echo "DECL_preinv_$tmp_fbase (__gmpn_preinv_${tmp_fbase}_$tmp_suffix);" >>fat.h
CPUVEC_SETUP="$CPUVEC_SETUP decided_cpuvec.preinv_$tmp_fbase = __gmpn_preinv_${tmp_fbase}_${tmp_suffix}; \\
+"
+ fi
+
+ # Ditto for any mod_1...cps variant
+ if grep "^PROLOGUE(mpn_${tmp_fbase}_cps)" $tmp_file >/dev/null; then
+ echo "DECL_${tmp_fbase}_cps (__gmpn_${tmp_fbase}_cps_$tmp_suffix);" >>fat.h
+ CPUVEC_SETUP="$CPUVEC_SETUP decided_cpuvec.${tmp_fbase}_cps = __gmpn_${tmp_fbase}_cps_${tmp_suffix}; \\
"
fi
fi
tmp_mulfunc=
case $tmp_fn in
add_n|sub_n) tmp_mulfunc="aors_n" ;;
+ add_err1_n|sub_err1_n)
+ tmp_mulfunc="aors_err1_n" ;;
+ add_err2_n|sub_err2_n)
+ tmp_mulfunc="aors_err2_n" ;;
+ add_err3_n|sub_err3_n)
+ tmp_mulfunc="aors_err3_n" ;;
+ addcnd_n|subcnd_n) tmp_mulfunc="aorscnd_n" ;;
addmul_1|submul_1) tmp_mulfunc="aorsmul_1" ;;
popcount|hamdist) tmp_mulfunc="popham" ;;
and_n|andn_n|nand_n | ior_n|iorn_n|nior_n | xor_n|xnor_n)
tmp_mulfunc="aorrlsh_n sorrlsh_n";;
rsh1add_n|rsh1sub_n)
tmp_mulfunc="rsh1aors_n";;
+ sb_div_qr_sec|sb_div_r_sec)
+ tmp_mulfunc="sb_div_sec";;
+ sbpi1_div_qr_sec|sbpi1_div_r_sec)
+ tmp_mulfunc="sbpi1_div_sec";;
esac
esac
fi
+ # If the host uses a non-standard ABI, check if tmp_file supports it
+ #
+ if test -n "$GMP_NONSTD_ABI" && test $tmp_ext != "c"; then
+ abi=`sed -n 's/^[ ]*ABI_SUPPORT(\(.*\))/\1/p' $tmp_file `
+ if echo "$abi" | grep -q "\\b${GMP_NONSTD_ABI}\\b"; then
+ true
+ else
+ continue
+ fi
+ fi
+
found=yes
eval found_$tmp_ext=yes
fi
done
if test $found = no; then
- as_fn_error "no version of $tmp_fn found in path: $path" "$LINENO" 5
+ as_fn_error $? "no version of $tmp_fn found in path: $path" "$LINENO" 5
fi
fi
done
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suitable m4" >&5
$as_echo_n "checking for suitable m4... " >&6; }
-if test "${gmp_cv_prog_m4+set}" = set; then :
+if ${gmp_cv_prog_m4+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$M4"; then
done
IFS="$ac_save_ifs"
if test -z "$gmp_cv_prog_m4"; then
- as_fn_error "No usable m4 in \$PATH or /usr/5bin (see config.log for reasons)." "$LINENO" 5
+ as_fn_error $? "No usable m4 in \$PATH or /usr/5bin (see config.log for reasons)." "$LINENO" 5
fi
fi
rm -f conftest.m4
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if m4wrap produces spurious output" >&5
$as_echo_n "checking if m4wrap produces spurious output... " >&6; }
-if test "${gmp_cv_m4_m4wrap_spurious+set}" = set; then :
+if ${gmp_cv_m4_m4wrap_spurious+:} false; then :
$as_echo_n "(cached) " >&6
else
# hide the d-n-l from autoconf's error checking
if test "$gmp_asm_syntax_testing" != no; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to switch to text section" >&5
$as_echo_n "checking how to switch to text section... " >&6; }
-if test "${gmp_cv_asm_text+set}" = set; then :
+if ${gmp_cv_asm_text+:} false; then :
$as_echo_n "(cached) " >&6
else
for i in ".text" ".code" ".csect .text[PR]"; do
done
if test -z "$gmp_cv_asm_text"; then
- as_fn_error "Cannot determine text section directive" "$LINENO" 5
+ as_fn_error $? "Cannot determine text section directive" "$LINENO" 5
fi
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to switch to data section" >&5
$as_echo_n "checking how to switch to data section... " >&6; }
-if test "${gmp_cv_asm_data+set}" = set; then :
+if ${gmp_cv_asm_data+:} false; then :
$as_echo_n "(cached) " >&6
else
case $host in
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler label suffix" >&5
$as_echo_n "checking for assembler label suffix... " >&6; }
-if test "${gmp_cv_asm_label_suffix+set}" = set; then :
+if ${gmp_cv_asm_label_suffix+:} false; then :
$as_echo_n "(cached) " >&6
else
gmp_cv_asm_label_suffix=unknown
done
if test "$gmp_cv_asm_label_suffix" = "unknown"; then
- as_fn_error "Cannot determine label suffix" "$LINENO" 5
+ as_fn_error $? "Cannot determine label suffix" "$LINENO" 5
fi
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler global directive" >&5
$as_echo_n "checking for assembler global directive... " >&6; }
-if test "${gmp_cv_asm_globl+set}" = set; then :
+if ${gmp_cv_asm_globl+:} false; then :
$as_echo_n "(cached) " >&6
else
case $host in
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler global directive attribute" >&5
$as_echo_n "checking for assembler global directive attribute... " >&6; }
-if test "${gmp_cv_asm_globl_attr+set}" = set; then :
+if ${gmp_cv_asm_globl_attr+:} false; then :
$as_echo_n "(cached) " >&6
else
case $gmp_cv_asm_globl in
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if globals are prefixed by underscore" >&5
$as_echo_n "checking if globals are prefixed by underscore... " >&6; }
-if test "${gmp_cv_asm_underscore+set}" = set; then :
+if ${gmp_cv_asm_underscore+:} false; then :
$as_echo_n "(cached) " >&6
else
gmp_cv_asm_underscore="unknown"
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to switch to read-only data section" >&5
$as_echo_n "checking how to switch to read-only data section... " >&6; }
-if test "${gmp_cv_asm_rodata+set}" = set; then :
+if ${gmp_cv_asm_rodata+:} false; then :
$as_echo_n "(cached) " >&6
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler .type directive" >&5
$as_echo_n "checking for assembler .type directive... " >&6; }
-if test "${gmp_cv_asm_type+set}" = set; then :
+if ${gmp_cv_asm_type+:} false; then :
$as_echo_n "(cached) " >&6
else
gmp_cv_asm_type=
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler .size directive" >&5
$as_echo_n "checking for assembler .size directive... " >&6; }
-if test "${gmp_cv_asm_size+set}" = set; then :
+if ${gmp_cv_asm_size+:} false; then :
$as_echo_n "(cached) " >&6
else
gmp_cv_asm_size=
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler local label prefix" >&5
$as_echo_n "checking for assembler local label prefix... " >&6; }
-if test "${gmp_cv_asm_lsym_prefix+set}" = set; then :
+if ${gmp_cv_asm_lsym_prefix+:} false; then :
$as_echo_n "(cached) " >&6
else
gmp_tmp_pre_appears=yes
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler byte directive" >&5
$as_echo_n "checking for assembler byte directive... " >&6; }
-if test "${gmp_cv_asm_byte+set}" = set; then :
+if ${gmp_cv_asm_byte+:} false; then :
$as_echo_n "(cached) " >&6
else
for i in .byte data1; do
done
if test -z "$gmp_cv_asm_byte"; then
- as_fn_error "Cannot determine how to emit a data byte" "$LINENO" 5
+ as_fn_error $? "Cannot determine how to emit a data byte" "$LINENO" 5
fi
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking how to define a 32-bit word" >&5
$as_echo_n "checking how to define a 32-bit word... " >&6; }
-if test "${gmp_cv_asm_w32+set}" = set; then :
+if ${gmp_cv_asm_w32+:} false; then :
$as_echo_n "(cached) " >&6
else
case $host in
;;
esac
if test -z "$gmp_cv_asm_w32"; then
- as_fn_error "cannot determine how to define a 32-bit word" "$LINENO" 5
+ as_fn_error $? "cannot determine how to define a 32-bit word" "$LINENO" 5
fi
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if .align assembly directive is logarithmic" >&5
$as_echo_n "checking if .align assembly directive is logarithmic... " >&6; }
-if test "${gmp_cv_asm_align_log+set}" = set; then :
+if ${gmp_cv_asm_align_log+:} false; then :
$as_echo_n "(cached) " >&6
else
cat >conftest.s <<EOF
cat conftest.out >&5
echo "configure: failed program was:" >&5
cat conftest.s >&5
- as_fn_error "cannot assemble alignment test" "$LINENO" 5
+ as_fn_error $? "cannot assemble alignment test" "$LINENO" 5
fi
rm -f conftest*
ia64*-*-* | itanium-*-* | itanium2-*-*)
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether assembler .align padding is good" >&5
$as_echo_n "checking whether assembler .align padding is good... " >&6; }
-if test "${gmp_cv_asm_ia64_align_ok+set}" = set; then :
+if ${gmp_cv_asm_ia64_align_ok+:} false; then :
$as_echo_n "(cached) " >&6
else
cat >conftest.awk <<\EOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler instruction and register style" >&5
$as_echo_n "checking assembler instruction and register style... " >&6; }
-if test "${gmp_cv_asm_m68k_instruction+set}" = set; then :
+if ${gmp_cv_asm_m68k_instruction+:} false; then :
$as_echo_n "(cached) " >&6
else
for i in "addl %d0,%d1" "add.l %d0,%d1" "addl d0,d1" "add.l d0,d1"; do
done
if test -z "$gmp_cv_asm_m68k_instruction"; then
- as_fn_error "cannot determine assembler instruction and register style" "$LINENO" 5
+ as_fn_error $? "cannot determine assembler instruction and register style" "$LINENO" 5
fi
fi
"addl %d0,%d1") want_dot_size=no; want_register_percent=yes ;;
"add.l d0,d1") want_dot_size=yes; want_register_percent=no ;;
"add.l %d0,%d1") want_dot_size=yes; want_register_percent=yes ;;
-*) as_fn_error "oops, unrecognised instruction and register style" "$LINENO" 5 ;;
+*) as_fn_error $? "oops, unrecognised instruction and register style" "$LINENO" 5 ;;
esac
echo "define(<WANT_REGISTER_PERCENT>, <\`$want_register_percent'>)" >> $gmp_tmpconfigm4
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler addressing style" >&5
$as_echo_n "checking assembler addressing style... " >&6; }
-if test "${gmp_cv_asm_m68k_addressing+set}" = set; then :
+if ${gmp_cv_asm_m68k_addressing+:} false; then :
$as_echo_n "(cached) " >&6
else
case $gmp_cv_asm_m68k_instruction in
addl*) movel=movel ;;
add.l*) movel=move.l ;;
-*) as_fn_error "oops, unrecognised gmp_cv_asm_m68k_instruction" "$LINENO" 5 ;;
+*) as_fn_error $? "oops, unrecognised gmp_cv_asm_m68k_instruction" "$LINENO" 5 ;;
esac
case $gmp_cv_asm_m68k_instruction in
*"%d0,%d1") dreg=%d0; areg=%a0 ;;
*"d0,d1") dreg=d0; areg=a0 ;;
-*) as_fn_error "oops, unrecognised gmp_cv_asm_m68k_instruction" "$LINENO" 5 ;;
+*) as_fn_error $? "oops, unrecognised gmp_cv_asm_m68k_instruction" "$LINENO" 5 ;;
esac
cat >conftest.s <<EOF
$gmp_cv_asm_text
cat conftest.out >&5
echo "configure: failed program was:" >&5
cat conftest.s >&5
- as_fn_error "cannot determine assembler addressing style" "$LINENO" 5
+ as_fn_error $? "cannot determine assembler addressing style" "$LINENO" 5
fi
rm -f conftest*
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler shortest branches" >&5
$as_echo_n "checking assembler shortest branches... " >&6; }
-if test "${gmp_cv_asm_m68k_branches+set}" = set; then :
+if ${gmp_cv_asm_m68k_branches+:} false; then :
$as_echo_n "(cached) " >&6
else
for i in jra jbra bra; do
done
if test -z "$gmp_cv_asm_m68k_branches"; then
- as_fn_error "cannot determine assembler branching style" "$LINENO" 5
+ as_fn_error $? "cannot determine assembler branching style" "$LINENO" 5
fi
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether compiler output is PIC by default" >&5
$as_echo_n "checking whether compiler output is PIC by default... " >&6; }
-if test "${gmp_cv_asm_powerpc_pic+set}" = set; then :
+if ${gmp_cv_asm_powerpc_pic+:} false; then :
$as_echo_n "(cached) " >&6
else
gmp_cv_asm_powerpc_pic=yes
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the assembler needs r on registers" >&5
$as_echo_n "checking if the assembler needs r on registers... " >&6; }
-if test "${gmp_cv_asm_powerpc_r_registers+set}" = set; then :
+if ${gmp_cv_asm_powerpc_r_registers+:} false; then :
$as_echo_n "(cached) " >&6
else
cat >conftest.s <<EOF
cat conftest.out >&5
echo "configure: failed program was:" >&5
cat conftest.s >&5
- as_fn_error "neither \"mtctr 6\" nor \"mtctr r6\" works" "$LINENO" 5
+ as_fn_error $? "neither \"mtctr 6\" nor \"mtctr r6\" works" "$LINENO" 5
fi
rm -f conftest*
case $host in
*-*-aix*)
case $ABI in
- 64 | aix64)
+ mode64)
echo "include_mpn(\`powerpc64/aix.m4')" >> $gmp_tmpconfigm4i
;;
*)
echo "include_mpn(\`powerpc32/aix.m4')" >> $gmp_tmpconfigm4i
;;
- sparcv9*-*-* | ultrasparc*-*-* | sparc64-*-*)
+ *sparc*-*-*)
case $ABI in
64)
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the assembler accepts \".register\"" >&5
$as_echo_n "checking if the assembler accepts \".register\"... " >&6; }
-if test "${gmp_cv_asm_sparc_register+set}" = set; then :
+if ${gmp_cv_asm_sparc_register+:} false; then :
$as_echo_n "(cached) " >&6
else
cat >conftest.s <<EOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the .align directive accepts an 0x90 fill in .text" >&5
$as_echo_n "checking if the .align directive accepts an 0x90 fill in .text... " >&6; }
-if test "${gmp_cv_asm_align_fill_0x90+set}" = set; then :
+if ${gmp_cv_asm_align_fill_0x90+:} false; then :
$as_echo_n "(cached) " >&6
else
cat >conftest.s <<EOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for assembler COFF type directives" >&5
$as_echo_n "checking for assembler COFF type directives... " >&6; }
-if test "${gmp_cv_asm_x86_coff_type+set}" = set; then :
+if ${gmp_cv_asm_x86_coff_type+:} false; then :
$as_echo_n "(cached) " >&6
else
cat >conftest.s <<EOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if _GLOBAL_OFFSET_TABLE_ is prefixed by underscore" >&5
$as_echo_n "checking if _GLOBAL_OFFSET_TABLE_ is prefixed by underscore... " >&6; }
-if test "${gmp_cv_asm_x86_got_underscore+set}" = set; then :
+if ${gmp_cv_asm_x86_got_underscore+:} false; then :
$as_echo_n "(cached) " >&6
else
gmp_cv_asm_x86_got_underscore="not applicable"
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking if the assembler takes cl with shldl" >&5
$as_echo_n "checking if the assembler takes cl with shldl... " >&6; }
-if test "${gmp_cv_asm_x86_shldl_cl+set}" = set; then :
+if ${gmp_cv_asm_x86_shldl_cl+:} false; then :
$as_echo_n "(cached) " >&6
else
cat >conftest.s <<EOF
fi
mcount_nonpic_call=`grep 'call.*mcount' conftest.s`
if test -z "$mcount_nonpic_call"; then
- as_fn_error "Cannot find mcount call for non-PIC" "$LINENO" 5
+ as_fn_error $? "Cannot find mcount call for non-PIC" "$LINENO" 5
fi
else
- as_fn_error "Cannot compile test program for non-PIC" "$LINENO" 5
+ as_fn_error $? "Cannot compile test program for non-PIC" "$LINENO" 5
fi
fi
fi
mcount_pic_call=`grep 'call.*mcount' conftest.s`
if test -z "$mcount_pic_call"; then
- as_fn_error "Cannot find mcount call for PIC" "$LINENO" 5
+ as_fn_error $? "Cannot find mcount call for PIC" "$LINENO" 5
fi
else
- as_fn_error "Cannot compile test program for PIC" "$LINENO" 5
+ as_fn_error $? "Cannot compile test program for PIC" "$LINENO" 5
fi
fi
;;
esac
;;
- 64)
+ 64|x32)
echo "include_mpn(\`x86_64/x86_64-defs.m4')" >> $gmp_tmpconfigm4i
*-*-darwin*)
echo "include_mpn(\`x86_64/darwin.m4')" >> $gmp_tmpconfigm4i
+ ;;
+ *-*-mingw* | *-*-cygwin)
+
+echo "include_mpn(\`x86_64/dos64.m4')" >> $gmp_tmpconfigm4i
;;
esac
;;
fi
done
if test -z "$gmp_mparam_source"; then
- as_fn_error "no version of gmp-mparam.h found in path: $path" "$LINENO" 5
+ as_fn_error $? "no version of gmp-mparam.h found in path: $path" "$LINENO" 5
fi
# For a helpful message from tune/tuneup.c
-# Copy any SQR_TOOM2_THRESHOLD from gmp-mparam.h to config.m4.
-# Some versions of sqr_basecase.asm use this.
+# Copy relevant parameters from gmp-mparam.h to config.m4.
+# We only do this for parameters that are used by some assembly files.
# Fat binaries do this on a per-file basis, so skip in that case.
#
if test -z "$fat_path"; then
- tmp_gmp_karatsuba_sqr_threshold=`sed -n 's/^#define SQR_TOOM2_THRESHOLD[ ]*\([0-9][0-9]*\).*$/\1/p' $gmp_mparam_source`
- if test -n "$tmp_gmp_karatsuba_sqr_threshold"; then
+ for i in SQR_TOOM2_THRESHOLD BMOD_1_TO_MOD_1_THRESHOLD SHLD_SLOW SHRD_SLOW; do
+ value=`sed -n 's/^#define '$i'[ ]*\([0-9][0-9]*\).*$/\1/p' $gmp_mparam_source`
+ if test -n "$value"; then
-echo "define(<SQR_TOOM2_THRESHOLD>,<$tmp_gmp_karatsuba_sqr_threshold>)" >> $gmp_tmpconfigm4
+echo "define(<$i>,<$value>)" >> $gmp_tmpconfigm4
- fi
+ fi
+ done
fi
# This bug is HP SR number 8606223364.
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of void *" >&5
$as_echo_n "checking size of void *... " >&6; }
-if test "${ac_cv_sizeof_void_p+set}" = set; then :
+if ${ac_cv_sizeof_void_p+:} false; then :
$as_echo_n "(cached) " >&6
else
if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (void *))" "ac_cv_sizeof_void_p" "$ac_includes_default"; then :
if test "$ac_cv_type_void_p" = yes; then
{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-{ as_fn_set_status 77
-as_fn_error "cannot compute sizeof (void *)
-See \`config.log' for more details." "$LINENO" 5; }; }
+as_fn_error 77 "cannot compute sizeof (void *)
+See \`config.log' for more details" "$LINENO" 5; }
else
ac_cv_sizeof_void_p=0
fi
# This bug is HP SR number 8606223364.
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of unsigned short" >&5
$as_echo_n "checking size of unsigned short... " >&6; }
-if test "${ac_cv_sizeof_unsigned_short+set}" = set; then :
+if ${ac_cv_sizeof_unsigned_short+:} false; then :
$as_echo_n "(cached) " >&6
else
if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (unsigned short))" "ac_cv_sizeof_unsigned_short" "$ac_includes_default"; then :
if test "$ac_cv_type_unsigned_short" = yes; then
{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-{ as_fn_set_status 77
-as_fn_error "cannot compute sizeof (unsigned short)
-See \`config.log' for more details." "$LINENO" 5; }; }
+as_fn_error 77 "cannot compute sizeof (unsigned short)
+See \`config.log' for more details" "$LINENO" 5; }
else
ac_cv_sizeof_unsigned_short=0
fi
# This bug is HP SR number 8606223364.
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of unsigned" >&5
$as_echo_n "checking size of unsigned... " >&6; }
-if test "${ac_cv_sizeof_unsigned+set}" = set; then :
+if ${ac_cv_sizeof_unsigned+:} false; then :
$as_echo_n "(cached) " >&6
else
if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (unsigned))" "ac_cv_sizeof_unsigned" "$ac_includes_default"; then :
if test "$ac_cv_type_unsigned" = yes; then
{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-{ as_fn_set_status 77
-as_fn_error "cannot compute sizeof (unsigned)
-See \`config.log' for more details." "$LINENO" 5; }; }
+as_fn_error 77 "cannot compute sizeof (unsigned)
+See \`config.log' for more details" "$LINENO" 5; }
else
ac_cv_sizeof_unsigned=0
fi
# This bug is HP SR number 8606223364.
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of unsigned long" >&5
$as_echo_n "checking size of unsigned long... " >&6; }
-if test "${ac_cv_sizeof_unsigned_long+set}" = set; then :
+if ${ac_cv_sizeof_unsigned_long+:} false; then :
$as_echo_n "(cached) " >&6
else
if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (unsigned long))" "ac_cv_sizeof_unsigned_long" "$ac_includes_default"; then :
if test "$ac_cv_type_unsigned_long" = yes; then
{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-{ as_fn_set_status 77
-as_fn_error "cannot compute sizeof (unsigned long)
-See \`config.log' for more details." "$LINENO" 5; }; }
+as_fn_error 77 "cannot compute sizeof (unsigned long)
+See \`config.log' for more details" "$LINENO" 5; }
else
ac_cv_sizeof_unsigned_long=0
fi
# This bug is HP SR number 8606223364.
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking size of mp_limb_t" >&5
$as_echo_n "checking size of mp_limb_t... " >&6; }
-if test "${ac_cv_sizeof_mp_limb_t+set}" = set; then :
+if ${ac_cv_sizeof_mp_limb_t+:} false; then :
$as_echo_n "(cached) " >&6
else
if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (mp_limb_t))" "ac_cv_sizeof_mp_limb_t" "#define __GMP_WITHIN_CONFIGURE 1 /* ignore template stuff */
if test "$ac_cv_type_mp_limb_t" = yes; then
{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-{ as_fn_set_status 77
-as_fn_error "cannot compute sizeof (mp_limb_t)
-See \`config.log' for more details." "$LINENO" 5; }; }
+as_fn_error 77 "cannot compute sizeof (mp_limb_t)
+See \`config.log' for more details" "$LINENO" 5; }
else
ac_cv_sizeof_mp_limb_t=0
fi
if test "$ac_cv_sizeof_mp_limb_t" = 0; then
- as_fn_error "Oops, mp_limb_t doesn't seem to work" "$LINENO" 5
+ as_fn_error $? "Oops, mp_limb_t doesn't seem to work" "$LINENO" 5
fi
GMP_LIMB_BITS=`expr 8 \* $ac_cv_sizeof_mp_limb_t`
mparam_bits=`sed -n 's/^#define GMP_LIMB_BITS[ ][ ]*\([0-9]*\).*$/\1/p' $gmp_mparam_source`
if test -n "$mparam_bits" && test "$mparam_bits" -ne $GMP_LIMB_BITS; then
if test "$test_CFLAGS" = set; then
- as_fn_error "Oops, mp_limb_t is $GMP_LIMB_BITS bits, but the assembler code
+ as_fn_error $? "Oops, mp_limb_t is $GMP_LIMB_BITS bits, but the assembler code
in this configuration expects $mparam_bits bits.
You appear to have set \$CFLAGS, perhaps you also need to tell GMP the
intended ABI, see \"ABI and ISA\" in the manual." "$LINENO" 5
else
- as_fn_error "Oops, mp_limb_t is $GMP_LIMB_BITS bits, but the assembler code
+ as_fn_error $? "Oops, mp_limb_t is $GMP_LIMB_BITS bits, but the assembler code
in this configuration expects $mparam_bits bits." "$LINENO" 5
fi
fi
-# Exclude the mpn random functions from mpbsd since that would drag in the
-# top-level rand things, all of which are unnecessary for libmp. There's
-# other unnecessary objects too actually, if we could be bothered figuring
-# out exactly which they are.
-#
-mpn_objs_in_libmp=
-for i in $mpn_objs_in_libgmp; do
- case $i in
- *random*) ;;
- *) mpn_objs_in_libmp="$mpn_objs_in_libmp $i" ;;
- esac
-done
-
-
ac_fn_c_check_type "$LINENO" "stack_t" "ac_cv_type_stack_t" "#include <signal.h>
"
-if test "x$ac_cv_type_stack_t" = x""yes; then :
+if test "x$ac_cv_type_stack_t" = xyes; then :
cat >>confdefs.h <<_ACEOF
#define HAVE_STACK_T 1
if test $with_readline != no; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for tputs in -lncurses" >&5
$as_echo_n "checking for tputs in -lncurses... " >&6; }
-if test "${ac_cv_lib_ncurses_tputs+set}" = set; then :
+if ${ac_cv_lib_ncurses_tputs+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_ncurses_tputs" >&5
$as_echo "$ac_cv_lib_ncurses_tputs" >&6; }
-if test "x$ac_cv_lib_ncurses_tputs" = x""yes; then :
+if test "x$ac_cv_lib_ncurses_tputs" = xyes; then :
LIBCURSES=-lncurses
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for tputs in -lcurses" >&5
$as_echo_n "checking for tputs in -lcurses... " >&6; }
-if test "${ac_cv_lib_curses_tputs+set}" = set; then :
+if ${ac_cv_lib_curses_tputs+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_curses_tputs" >&5
$as_echo "$ac_cv_lib_curses_tputs" >&6; }
-if test "x$ac_cv_lib_curses_tputs" = x""yes; then :
+if test "x$ac_cv_lib_curses_tputs" = xyes; then :
LIBCURSES=-lcurses
fi
use_readline=no
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for readline in -lreadline" >&5
$as_echo_n "checking for readline in -lreadline... " >&6; }
-if test "${ac_cv_lib_readline_readline+set}" = set; then :
+if ${ac_cv_lib_readline_readline+:} false; then :
$as_echo_n "(cached) " >&6
else
ac_check_lib_save_LIBS=$LIBS
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_readline_readline" >&5
$as_echo "$ac_cv_lib_readline_readline" >&6; }
-if test "x$ac_cv_lib_readline_readline" = x""yes; then :
+if test "x$ac_cv_lib_readline_readline" = xyes; then :
ac_fn_c_check_header_mongrel "$LINENO" "readline/readline.h" "ac_cv_header_readline_readline_h" "$ac_includes_default"
-if test "x$ac_cv_header_readline_readline_h" = x""yes; then :
+if test "x$ac_cv_header_readline_readline_h" = xyes; then :
ac_fn_c_check_header_mongrel "$LINENO" "readline/history.h" "ac_cv_header_readline_history_h" "$ac_includes_default"
-if test "x$ac_cv_header_readline_history_h" = x""yes; then :
+if test "x$ac_cv_header_readline_history_h" = xyes; then :
use_readline=yes
fi
set dummy $ac_prog; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_YACC+set}" = set; then :
+if ${ac_cv_prog_YACC+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$YACC"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_YACC="$ac_prog"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy $ac_prog; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_LEX+set}" = set; then :
+if ${ac_cv_prog_LEX+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$LEX"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_LEX="$ac_prog"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
b { REJECT; }
c { yymore (); }
d { yyless (1); }
-e { yyless (input () != 0); }
+e { /* IRIX 6.5 flex 2.5.4 underquotes its yyless argument. */
+ yyless ((input () != 0)); }
f { unput (yytext[0]); }
. { BEGIN INITIAL; }
%%
test $ac_status = 0; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking lex output file root" >&5
$as_echo_n "checking lex output file root... " >&6; }
-if test "${ac_cv_prog_lex_root+set}" = set; then :
+if ${ac_cv_prog_lex_root+:} false; then :
$as_echo_n "(cached) " >&6
else
elif test -f lexyy.c; then
ac_cv_prog_lex_root=lexyy
else
- as_fn_error "cannot find output from $LEX; giving up" "$LINENO" 5
+ as_fn_error $? "cannot find output from $LEX; giving up" "$LINENO" 5
fi
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_lex_root" >&5
if test -z "${LEXLIB+set}"; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking lex library" >&5
$as_echo_n "checking lex library... " >&6; }
-if test "${ac_cv_lib_lex+set}" = set; then :
+if ${ac_cv_lib_lex+:} false; then :
$as_echo_n "(cached) " >&6
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether yytext is a pointer" >&5
$as_echo_n "checking whether yytext is a pointer... " >&6; }
-if test "${ac_cv_prog_lex_yytext_pointer+set}" = set; then :
+if ${ac_cv_prog_lex_yytext_pointer+:} false; then :
$as_echo_n "(cached) " >&6
else
# POSIX says lex can declare yytext either as a pointer or an array; the
LIBS="$LEXLIB $ac_save_LIBS"
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-#define YYTEXT_POINTER 1
+
+ #define YYTEXT_POINTER 1
`cat $LEX_OUTPUT_ROOT.c`
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
set dummy ${ac_tool_prefix}ranlib; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_RANLIB+set}" = set; then :
+if ${ac_cv_prog_RANLIB+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$RANLIB"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
set dummy ranlib; ac_word=$2
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_prog_ac_ct_RANLIB+set}" = set; then :
+if ${ac_cv_prog_ac_ct_RANLIB+:} false; then :
$as_echo_n "(cached) " >&6
else
if test -n "$ac_ct_RANLIB"; then
IFS=$as_save_IFS
test -z "$as_dir" && as_dir=.
for ac_exec_ext in '' $ac_executable_extensions; do
- if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
+ if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then
ac_cv_prog_ac_ct_RANLIB="ranlib"
$as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
break 2
# FIXME: Upcoming version of autoconf/automake may not like broken lines.
# Right now automake isn't accepting the new AC_CONFIG_FILES scheme.
-ac_config_files="$ac_config_files Makefile mpbsd/Makefile mpf/Makefile mpn/Makefile mpq/Makefile mpz/Makefile printf/Makefile scanf/Makefile cxx/Makefile tests/Makefile tests/devel/Makefile tests/mpbsd/Makefile tests/mpf/Makefile tests/mpn/Makefile tests/mpq/Makefile tests/mpz/Makefile tests/rand/Makefile tests/misc/Makefile tests/cxx/Makefile doc/Makefile tune/Makefile demos/Makefile demos/calc/Makefile demos/expr/Makefile gmp.h:gmp-h.in mp.h:mp-h.in"
+ac_config_files="$ac_config_files Makefile mpf/Makefile mpn/Makefile mpq/Makefile mpz/Makefile printf/Makefile scanf/Makefile rand/Makefile cxx/Makefile tests/Makefile tests/devel/Makefile tests/mpf/Makefile tests/mpn/Makefile tests/mpq/Makefile tests/mpz/Makefile tests/rand/Makefile tests/misc/Makefile tests/cxx/Makefile doc/Makefile tune/Makefile demos/Makefile demos/calc/Makefile demos/expr/Makefile gmp.h:gmp-h.in"
cat >confcache <<\_ACEOF
# This file is a shell script that caches the results of configure
:end' >>confcache
if diff "$cache_file" confcache >/dev/null 2>&1; then :; else
if test -w "$cache_file"; then
- test "x$cache_file" != "x/dev/null" &&
+ if test "x$cache_file" != "x/dev/null"; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5
$as_echo "$as_me: updating cache $cache_file" >&6;}
- cat confcache >$cache_file
+ if test ! -f "$cache_file" || test -h "$cache_file"; then
+ cat confcache >"$cache_file"
+ else
+ case $cache_file in #(
+ */* | ?:*)
+ mv -f confcache "$cache_file"$$ &&
+ mv -f "$cache_file"$$ "$cache_file" ;; #(
+ *)
+ mv -f confcache "$cache_file" ;;
+ esac
+ fi
+ fi
else
{ $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5
$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;}
ac_libobjs=
ac_ltlibobjs=
+U=
for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
# 1. Remove the extension, and $U if already installed.
ac_script='s/\$U\././;s/\.o$//;s/\.obj$//'
fi
if test -z "${MAINTAINER_MODE_TRUE}" && test -z "${MAINTAINER_MODE_FALSE}"; then
- as_fn_error "conditional \"MAINTAINER_MODE\" was never defined.
-Usually this means the macro was only invoked conditionally." "$LINENO" 5
-fi
-if test -z "${WANT_MPBSD_TRUE}" && test -z "${WANT_MPBSD_FALSE}"; then
- as_fn_error "conditional \"WANT_MPBSD\" was never defined.
+ as_fn_error $? "conditional \"MAINTAINER_MODE\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
if test -z "${WANT_CXX_TRUE}" && test -z "${WANT_CXX_FALSE}"; then
- as_fn_error "conditional \"WANT_CXX\" was never defined.
+ as_fn_error $? "conditional \"WANT_CXX\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
if test -z "${ENABLE_STATIC_TRUE}" && test -z "${ENABLE_STATIC_FALSE}"; then
- as_fn_error "conditional \"ENABLE_STATIC\" was never defined.
+ as_fn_error $? "conditional \"ENABLE_STATIC\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
-: ${CONFIG_STATUS=./config.status}
+: "${CONFIG_STATUS=./config.status}"
ac_write_fail=0
ac_clean_files_save=$ac_clean_files
ac_clean_files="$ac_clean_files $CONFIG_STATUS"
IFS=" "" $as_nl"
# Find who we are. Look in the path if we contain no directory separator.
+as_myself=
case $0 in #((
*[\\/]* ) as_myself=$0 ;;
*) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
-# as_fn_error ERROR [LINENO LOG_FD]
-# ---------------------------------
+# as_fn_error STATUS ERROR [LINENO LOG_FD]
+# ----------------------------------------
# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
-# script with status $?, using 1 if that was 0.
+# script with STATUS, using 1 if that was 0.
as_fn_error ()
{
- as_status=$?; test $as_status -eq 0 && as_status=1
- if test "$3"; then
- as_lineno=${as_lineno-"$2"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
- $as_echo "$as_me:${as_lineno-$LINENO}: error: $1" >&$3
+ as_status=$1; test $as_status -eq 0 && as_status=1
+ if test "$4"; then
+ as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+ $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
fi
- $as_echo "$as_me: error: $1" >&2
+ $as_echo "$as_me: error: $2" >&2
as_fn_exit $as_status
} # as_fn_error
# ... but there are two gotchas:
# 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
# 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
- # In both cases, we have to default to `cp -p'.
+ # In both cases, we have to default to `cp -pR'.
ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
- as_ln_s='cp -p'
+ as_ln_s='cp -pR'
elif ln conf$$.file conf$$ 2>/dev/null; then
as_ln_s=ln
else
- as_ln_s='cp -p'
+ as_ln_s='cp -pR'
fi
else
- as_ln_s='cp -p'
+ as_ln_s='cp -pR'
fi
rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
rmdir conf$$.dir 2>/dev/null
test -d "$as_dir" && break
done
test -z "$as_dirs" || eval "mkdir $as_dirs"
- } || test -d "$as_dir" || as_fn_error "cannot create directory $as_dir"
+ } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
} # as_fn_mkdir_p
as_mkdir_p=false
fi
-if test -x / >/dev/null 2>&1; then
- as_test_x='test -x'
-else
- if ls -dL / >/dev/null 2>&1; then
- as_ls_L_option=L
- else
- as_ls_L_option=
- fi
- as_test_x='
- eval sh -c '\''
- if test -d "$1"; then
- test -d "$1/.";
- else
- case $1 in #(
- -*)set "./$1";;
- esac;
- case `ls -ld'$as_ls_L_option' "$1" 2>/dev/null` in #((
- ???[sx]*):;;*)false;;esac;fi
- '\'' sh
- '
-fi
-as_executable_p=$as_test_x
+
+# as_fn_executable_p FILE
+# -----------------------
+# Test if FILE is an executable regular file.
+as_fn_executable_p ()
+{
+ test -f "$1" && test -x "$1"
+} # as_fn_executable_p
+as_test_x='test -x'
+as_executable_p=as_fn_executable_p
# Sed expression to map a string onto a valid CPP name.
as_tr_cpp="eval sed 'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by GNU MP $as_me 5.0.5, which was
-generated by GNU Autoconf 2.65. Invocation command line was
+This file was extended by GNU MP $as_me 5.1.3, which was
+generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
CONFIG_HEADERS = $CONFIG_HEADERS
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-GNU MP config.status 5.0.5
-configured by $0, generated by GNU Autoconf 2.65,
+GNU MP config.status 5.1.3
+configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
-Copyright (C) 2009 Free Software Foundation, Inc.
+Copyright (C) 2012 Free Software Foundation, Inc.
This config.status script is free software; the Free Software Foundation
gives unlimited permission to copy, distribute and modify it."
while test $# != 0
do
case $1 in
- --*=*)
+ --*=?*)
ac_option=`expr "X$1" : 'X\([^=]*\)='`
ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'`
ac_shift=:
;;
+ --*=)
+ ac_option=`expr "X$1" : 'X\([^=]*\)='`
+ ac_optarg=
+ ac_shift=:
+ ;;
*)
ac_option=$1
ac_optarg=$2
$ac_shift
case $ac_optarg in
*\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
+ '') as_fn_error $? "missing file argument" ;;
esac
as_fn_append CONFIG_FILES " '$ac_optarg'"
ac_need_defaults=false;;
ac_need_defaults=false;;
--he | --h)
# Conflict between --help and --header
- as_fn_error "ambiguous option: \`$1'
+ as_fn_error $? "ambiguous option: \`$1'
Try \`$0 --help' for more information.";;
--help | --hel | -h )
$as_echo "$ac_cs_usage"; exit ;;
ac_cs_silent=: ;;
# This is an error.
- -*) as_fn_error "unrecognized option: \`$1'
+ -*) as_fn_error $? "unrecognized option: \`$1'
Try \`$0 --help' for more information." ;;
*) as_fn_append ac_config_targets " $1"
_ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
if \$ac_cs_recheck; then
- set X '$SHELL' '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
+ set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion
shift
\$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6
CONFIG_SHELL='$SHELL'
"demos/pexpr-config.h") CONFIG_FILES="$CONFIG_FILES demos/pexpr-config.h:demos/pexpr-config-h.in" ;;
"demos/calc/calc-config.h") CONFIG_FILES="$CONFIG_FILES demos/calc/calc-config.h:demos/calc/calc-config-h.in" ;;
"Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
- "mpbsd/Makefile") CONFIG_FILES="$CONFIG_FILES mpbsd/Makefile" ;;
"mpf/Makefile") CONFIG_FILES="$CONFIG_FILES mpf/Makefile" ;;
"mpn/Makefile") CONFIG_FILES="$CONFIG_FILES mpn/Makefile" ;;
"mpq/Makefile") CONFIG_FILES="$CONFIG_FILES mpq/Makefile" ;;
"mpz/Makefile") CONFIG_FILES="$CONFIG_FILES mpz/Makefile" ;;
"printf/Makefile") CONFIG_FILES="$CONFIG_FILES printf/Makefile" ;;
"scanf/Makefile") CONFIG_FILES="$CONFIG_FILES scanf/Makefile" ;;
+ "rand/Makefile") CONFIG_FILES="$CONFIG_FILES rand/Makefile" ;;
"cxx/Makefile") CONFIG_FILES="$CONFIG_FILES cxx/Makefile" ;;
"tests/Makefile") CONFIG_FILES="$CONFIG_FILES tests/Makefile" ;;
"tests/devel/Makefile") CONFIG_FILES="$CONFIG_FILES tests/devel/Makefile" ;;
- "tests/mpbsd/Makefile") CONFIG_FILES="$CONFIG_FILES tests/mpbsd/Makefile" ;;
"tests/mpf/Makefile") CONFIG_FILES="$CONFIG_FILES tests/mpf/Makefile" ;;
"tests/mpn/Makefile") CONFIG_FILES="$CONFIG_FILES tests/mpn/Makefile" ;;
"tests/mpq/Makefile") CONFIG_FILES="$CONFIG_FILES tests/mpq/Makefile" ;;
"demos/calc/Makefile") CONFIG_FILES="$CONFIG_FILES demos/calc/Makefile" ;;
"demos/expr/Makefile") CONFIG_FILES="$CONFIG_FILES demos/expr/Makefile" ;;
"gmp.h") CONFIG_FILES="$CONFIG_FILES gmp.h:gmp-h.in" ;;
- "mp.h") CONFIG_FILES="$CONFIG_FILES mp.h:mp-h.in" ;;
- *) as_fn_error "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
+ *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
esac
done
# after its creation but before its name has been assigned to `$tmp'.
$debug ||
{
- tmp=
+ tmp= ac_tmp=
trap 'exit_status=$?
- { test -z "$tmp" || test ! -d "$tmp" || rm -fr "$tmp"; } && exit $exit_status
+ : "${ac_tmp:=$tmp}"
+ { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status
' 0
trap 'as_fn_exit 1' 1 2 13 15
}
{
tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` &&
- test -n "$tmp" && test -d "$tmp"
+ test -d "$tmp"
} ||
{
tmp=./conf$$-$RANDOM
(umask 077 && mkdir "$tmp")
-} || as_fn_error "cannot create a temporary directory in ." "$LINENO" 5
+} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5
+ac_tmp=$tmp
# Set up the scripts for CONFIG_FILES section.
# No need to generate them if there are no CONFIG_FILES.
fi
ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' </dev/null 2>/dev/null`
if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then
- ac_cs_awk_cr='\r'
+ ac_cs_awk_cr='\\r'
else
ac_cs_awk_cr=$ac_cr
fi
-echo 'BEGIN {' >"$tmp/subs1.awk" &&
+echo 'BEGIN {' >"$ac_tmp/subs1.awk" &&
_ACEOF
echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' &&
echo "_ACEOF"
} >conf$$subs.sh ||
- as_fn_error "could not make $CONFIG_STATUS" "$LINENO" 5
-ac_delim_num=`echo "$ac_subst_vars" | grep -c '$'`
+ as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'`
ac_delim='%!_!# '
for ac_last_try in false false false false false :; do
. ./conf$$subs.sh ||
- as_fn_error "could not make $CONFIG_STATUS" "$LINENO" 5
+ as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X`
if test $ac_delim_n = $ac_delim_num; then
break
elif $ac_last_try; then
- as_fn_error "could not make $CONFIG_STATUS" "$LINENO" 5
+ as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
else
ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
fi
rm -f conf$$subs.sh
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
-cat >>"\$tmp/subs1.awk" <<\\_ACAWK &&
+cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK &&
_ACEOF
sed -n '
h
rm -f conf$$subs.awk
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
_ACAWK
-cat >>"\$tmp/subs1.awk" <<_ACAWK &&
+cat >>"\$ac_tmp/subs1.awk" <<_ACAWK &&
for (key in S) S_is_set[key] = 1
FS = "\a"
sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g"
else
cat
-fi < "$tmp/subs1.awk" > "$tmp/subs.awk" \
- || as_fn_error "could not setup config files machinery" "$LINENO" 5
+fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \
+ || as_fn_error $? "could not setup config files machinery" "$LINENO" 5
_ACEOF
-# VPATH may cause trouble with some makes, so we remove $(srcdir),
-# ${srcdir} and @srcdir@ from VPATH if srcdir is ".", strip leading and
+# VPATH may cause trouble with some makes, so we remove sole $(srcdir),
+# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and
# trailing colons and then remove the whole line if VPATH becomes empty
# (actually we leave an empty line to preserve line numbers).
if test "x$srcdir" = x.; then
- ac_vpsub='/^[ ]*VPATH[ ]*=/{
-s/:*\$(srcdir):*/:/
-s/:*\${srcdir}:*/:/
-s/:*@srcdir@:*/:/
-s/^\([^=]*=[ ]*\):*/\1/
+ ac_vpsub='/^[ ]*VPATH[ ]*=[ ]*/{
+h
+s///
+s/^/:/
+s/[ ]*$/:/
+s/:\$(srcdir):/:/g
+s/:\${srcdir}:/:/g
+s/:@srcdir@:/:/g
+s/^:*//
s/:*$//
+x
+s/\(=[ ]*\).*/\1/
+G
+s/\n//
s/^[^=]*=[ ]*$//
}'
fi
# No need to generate them if there are no CONFIG_HEADERS.
# This happens for instance with `./config.status Makefile'.
if test -n "$CONFIG_HEADERS"; then
-cat >"$tmp/defines.awk" <<\_ACAWK ||
+cat >"$ac_tmp/defines.awk" <<\_ACAWK ||
BEGIN {
_ACEOF
# handling of long lines.
ac_delim='%!_!# '
for ac_last_try in false false :; do
- ac_t=`sed -n "/$ac_delim/p" confdefs.h`
- if test -z "$ac_t"; then
+ ac_tt=`sed -n "/$ac_delim/p" confdefs.h`
+ if test -z "$ac_tt"; then
break
elif $ac_last_try; then
- as_fn_error "could not make $CONFIG_HEADERS" "$LINENO" 5
+ as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5
else
ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
fi
_ACAWK
_ACEOF
cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
- as_fn_error "could not setup config headers machinery" "$LINENO" 5
+ as_fn_error $? "could not setup config headers machinery" "$LINENO" 5
fi # test -n "$CONFIG_HEADERS"
esac
case $ac_mode$ac_tag in
:[FHL]*:*);;
- :L* | :C*:*) as_fn_error "invalid tag \`$ac_tag'" "$LINENO" 5;;
+ :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;;
:[FH]-) ac_tag=-:-;;
:[FH]*) ac_tag=$ac_tag:$ac_tag.in;;
esac
for ac_f
do
case $ac_f in
- -) ac_f="$tmp/stdin";;
+ -) ac_f="$ac_tmp/stdin";;
*) # Look for the file first in the build tree, then in the source tree
# (if the path is not absolute). The absolute path cannot be DOS-style,
# because $ac_f cannot contain `:'.
[\\/$]*) false;;
*) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";;
esac ||
- as_fn_error "cannot find input file: \`$ac_f'" "$LINENO" 5;;
+ as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;;
esac
case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac
as_fn_append ac_file_inputs " '$ac_f'"
esac
case $ac_tag in
- *:-:* | *:-) cat >"$tmp/stdin" \
- || as_fn_error "could not create $ac_file" "$LINENO" 5 ;;
+ *:-:* | *:-) cat >"$ac_tmp/stdin" \
+ || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;;
esac
;;
esac
s&@MKDIR_P@&$ac_MKDIR_P&;t t
$ac_datarootdir_hack
"
-eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$tmp/subs.awk" >$tmp/out \
- || as_fn_error "could not create $ac_file" "$LINENO" 5
+eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \
+ >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5
test -z "$ac_datarootdir_hack$ac_datarootdir_seen" &&
- { ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } &&
- { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' "$tmp/out"`; test -z "$ac_out"; } &&
+ { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } &&
+ { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \
+ "$ac_tmp/out"`; test -z "$ac_out"; } &&
{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir'
-which seems to be undefined. Please make sure it is defined." >&5
+which seems to be undefined. Please make sure it is defined" >&5
$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir'
-which seems to be undefined. Please make sure it is defined." >&2;}
+which seems to be undefined. Please make sure it is defined" >&2;}
- rm -f "$tmp/stdin"
+ rm -f "$ac_tmp/stdin"
case $ac_file in
- -) cat "$tmp/out" && rm -f "$tmp/out";;
- *) rm -f "$ac_file" && mv "$tmp/out" "$ac_file";;
+ -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";;
+ *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";;
esac \
- || as_fn_error "could not create $ac_file" "$LINENO" 5
+ || as_fn_error $? "could not create $ac_file" "$LINENO" 5
;;
:H)
#
if test x"$ac_file" != x-; then
{
$as_echo "/* $configure_input */" \
- && eval '$AWK -f "$tmp/defines.awk"' "$ac_file_inputs"
- } >"$tmp/config.h" \
- || as_fn_error "could not create $ac_file" "$LINENO" 5
- if diff "$ac_file" "$tmp/config.h" >/dev/null 2>&1; then
+ && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs"
+ } >"$ac_tmp/config.h" \
+ || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+ if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5
$as_echo "$as_me: $ac_file is unchanged" >&6;}
else
rm -f "$ac_file"
- mv "$tmp/config.h" "$ac_file" \
- || as_fn_error "could not create $ac_file" "$LINENO" 5
+ mv "$ac_tmp/config.h" "$ac_file" \
+ || as_fn_error $? "could not create $ac_file" "$LINENO" 5
fi
else
$as_echo "/* $configure_input */" \
- && eval '$AWK -f "$tmp/defines.awk"' "$ac_file_inputs" \
- || as_fn_error "could not create -" "$LINENO" 5
+ && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \
+ || as_fn_error $? "could not create -" "$LINENO" 5
fi
# Compute "$ac_file"'s index in $config_headers.
_am_arg="$ac_file"
$as_echo "$as_me: linking $ac_source to $ac_file" >&6;}
if test ! -r "$ac_source"; then
- as_fn_error "$ac_source: file not found" "$LINENO" 5
+ as_fn_error $? "$ac_source: file not found" "$LINENO" 5
fi
rm -f "$ac_file"
# Try a relative symlink, then a hard link, then a copy.
- case $srcdir in
+ case $ac_source in
[\\/$]* | ?:[\\/]* ) ac_rel_source=$ac_source ;;
*) ac_rel_source=$ac_top_build_prefix$ac_source ;;
esac
ln -s "$ac_rel_source" "$ac_file" 2>/dev/null ||
ln "$ac_source" "$ac_file" 2>/dev/null ||
cp -p "$ac_source" "$ac_file" ||
- as_fn_error "cannot link or copy $ac_source to $ac_file" "$LINENO" 5
+ as_fn_error $? "cannot link or copy $ac_source to $ac_file" "$LINENO" 5
fi
;;
:C) { $as_echo "$as_me:${as_lineno-$LINENO}: executing $ac_file commands" >&5
ac_clean_files=$ac_clean_files_save
test $ac_write_fail = 0 ||
- as_fn_error "write failure creating $CONFIG_STATUS" "$LINENO" 5
+ as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5
# configure is writing to config.log, and then calls config.status.
exec 5>>config.log
# Use ||, not &&, to avoid exiting from the if with $? = 1, which
# would make configure fail if this is the last instruction.
- $ac_cs_success || as_fn_exit $?
+ $ac_cs_success || as_fn_exit 1
fi
if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then
{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5
$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;}
fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: summary of build options:
+
+ Version: ${PACKAGE_STRING}
+ Host type: ${host}
+ ABI: ${ABI}
+ Install prefix: ${prefix}
+ Compiler: ${CC}
+ Static libraries: ${enable_static}
+ Shared libraries: ${enable_shared}
+" >&5
+$as_echo "$as_me: summary of build options:
+
+ Version: ${PACKAGE_STRING}
+ Host type: ${host}
+ ABI: ${ABI}
+ Install prefix: ${prefix}
+ Compiler: ${CC}
+ Static libraries: ${enable_static}
+ Shared libraries: ${enable_shared}
+" >&6;}
+
+if test x$cross_compiling = xyes ; then
+ case "$host" in
+ *-*-mingw* | *-*-cygwin)
+ if test x$ABI = x64 ; then
+ { $as_echo "$as_me:${as_lineno-$LINENO}: If wine64 is installed, use make check TESTS_ENVIRONMENT=wine64." >&5
+$as_echo "$as_me: If wine64 is installed, use make check TESTS_ENVIRONMENT=wine64." >&6;}
+ else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: If wine is installed, use make check TESTS_ENVIRONMENT=wine." >&5
+$as_echo "$as_me: If wine is installed, use make check TESTS_ENVIRONMENT=wine." >&6;}
+ fi
+ ;;
+ esac
+fi
--- /dev/null
+dnl Process this file with autoconf to produce a configure script.
+
+
+define(GMP_COPYRIGHT,[[
+
+Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
+2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published
+by the Free Software Foundation; either version 3 of the License, or (at
+your option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+]])
+
+AC_COPYRIGHT(GMP_COPYRIGHT)
+AH_TOP(/*GMP_COPYRIGHT*/)
+
+AC_REVISION($Revision$)
+AC_PREREQ(2.59)
+AC_INIT(GNU MP, GMP_VERSION, [gmp-bugs@gmplib.org, see http://gmplib.org/manual/Reporting-Bugs.html], gmp)
+AC_CONFIG_SRCDIR(gmp-impl.h)
+m4_pattern_forbid([^[ \t]*GMP_])
+m4_pattern_allow(GMP_LDFLAGS)
+m4_pattern_allow(GMP_LIMB_BITS)
+m4_pattern_allow(GMP_MPARAM_H_SUGGEST)
+m4_pattern_allow(GMP_NAIL_BITS)
+m4_pattern_allow(GMP_NUMB_BITS)
+m4_pattern_allow(GMP_NONSTD_ABI)
+
+# If --target is not used then $target_alias is empty, but if say
+# "./configure athlon-pc-freebsd3.5" is used, then all three of
+# $build_alias, $host_alias and $target_alias are set to
+# "athlon-pc-freebsd3.5".
+#
+if test -n "$target_alias" && test "$target_alias" != "$host_alias"; then
+ AC_MSG_ERROR([--target is not appropriate for GMP
+Use --build=CPU-VENDOR-OS if you need to specify your CPU and/or system
+explicitly. Use --host if cross-compiling (see "Installing GMP" in the
+manual for more on this).])
+fi
+
+GMP_INIT(config.m4)
+
+AC_CANONICAL_HOST
+
+dnl Automake "no-dependencies" is used because include file dependencies
+dnl are not useful to us. Pretty much everything depends just on gmp.h,
+dnl gmp-impl.h and longlong.h, and yet only rarely does everything need to
+dnl be rebuilt for changes to those files.
+dnl
+dnl "no-dependencies" also helps with the way we're setup to run
+dnl AC_PROG_CXX only conditionally. If dependencies are used then recent
+dnl automake (eg 1.7.2) appends an AM_CONDITIONAL to AC_PROG_CXX, and then
+dnl gets upset if it's not actually executed.
+dnl
+dnl Note that there's a copy of these options in the top-level Makefile.am,
+dnl so update there too if changing anything.
+dnl
+AM_INIT_AUTOMAKE([1.8 gnu no-dependencies])
+AC_CONFIG_HEADERS(config.h:config.in)
+AM_MAINTAINER_MODE
+
+
+AC_ARG_ENABLE(assert,
+AC_HELP_STRING([--enable-assert],[enable ASSERT checking [[default=no]]]),
+[case $enableval in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value $enableval for --enable-assert, need yes or no]) ;;
+esac],
+[enable_assert=no])
+
+if test "$enable_assert" = "yes"; then
+ AC_DEFINE(WANT_ASSERT,1,
+ [Define to 1 to enable ASSERT checking, per --enable-assert])
+ want_assert_01=1
+else
+ want_assert_01=0
+fi
+GMP_DEFINE_RAW(["define(<WANT_ASSERT>,$want_assert_01)"])
+
+
+AC_ARG_ENABLE(alloca,
+AC_HELP_STRING([--enable-alloca],[how to get temp memory [[default=reentrant]]]),
+[case $enableval in
+alloca|malloc-reentrant|malloc-notreentrant) ;;
+yes|no|reentrant|notreentrant) ;;
+debug) ;;
+*)
+ AC_MSG_ERROR([bad value $enableval for --enable-alloca, need one of:
+yes no reentrant notreentrant alloca malloc-reentrant malloc-notreentrant debug]) ;;
+esac],
+[enable_alloca=reentrant])
+
+
+# IMPROVE ME: The default for C++ is disabled. The tests currently
+# performed below for a working C++ compiler are not particularly strong,
+# and in general can't be expected to get the right setup on their own. The
+# most significant problem is getting the ABI the same. Defaulting CXXFLAGS
+# to CFLAGS takes only a small step towards this. It's also probably worth
+# worrying whether the C and C++ runtimes from say gcc and a vendor C++ can
+# work together. Some rather broken C++ installations were encountered
+# during testing, and though such things clearly aren't GMP's problem, if
+# --enable-cxx=detect were to be the default then some careful checks of
+# which, if any, C++ compiler on the system is up to scratch would be
+# wanted.
+#
+AC_ARG_ENABLE(cxx,
+AC_HELP_STRING([--enable-cxx],[enable C++ support [[default=no]]]),
+[case $enableval in
+yes|no|detect) ;;
+*) AC_MSG_ERROR([bad value $enableval for --enable-cxx, need yes/no/detect]) ;;
+esac],
+[enable_cxx=no])
+
+
+AC_ARG_ENABLE(assembly,
+AC_HELP_STRING([--enable-assembly],[enable the use of assembly loops [[default=yes]]]),
+[case $enableval in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value $enableval for --enable-assembly, need yes or no]) ;;
+esac],
+[enable_assembly=yes])
+
+if test "$enable_assembly" = "yes"; then
+ AC_DEFINE(WANT_ASSEMBLY,1,
+ [Defined to 1 as per --enable-assembly])
+fi
+
+
+AC_ARG_ENABLE(fft,
+AC_HELP_STRING([--enable-fft],[enable FFTs for multiplication [[default=yes]]]),
+[case $enableval in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value $enableval for --enable-fft, need yes or no]) ;;
+esac],
+[enable_fft=yes])
+
+if test "$enable_fft" = "yes"; then
+ AC_DEFINE(WANT_FFT,1,
+ [Define to 1 to enable FFTs for multiplication, per --enable-fft])
+fi
+
+
+AC_ARG_ENABLE(old-fft-full,
+AC_HELP_STRING([--enable-old-fft-full],[enable old mpn_mul_fft_full for multiplication [[default=no]]]),
+[case $enableval in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value $enableval for --enable-old-fft-full, need yes or no]) ;;
+esac],
+[enable_old_fft_full=no])
+
+if test "$enable_old_fft_full" = "yes"; then
+ AC_DEFINE(WANT_OLD_FFT_FULL,1,
+ [Define to 1 to enable old mpn_mul_fft_full for multiplication, per --enable-old-fft-full])
+fi
+
+
+AC_ARG_ENABLE(nails,
+AC_HELP_STRING([--enable-nails],[use nails on limbs [[default=no]]]),
+[case $enableval in
+[yes|no|[02468]|[0-9][02468]]) ;;
+[*[13579]])
+ AC_MSG_ERROR([bad value $enableval for --enable-nails, only even nail sizes supported]) ;;
+*)
+ AC_MSG_ERROR([bad value $enableval for --enable-nails, need yes/no/number]) ;;
+esac],
+[enable_nails=no])
+
+case $enable_nails in
+yes) GMP_NAIL_BITS=2 ;;
+no) GMP_NAIL_BITS=0 ;;
+*) GMP_NAIL_BITS=$enable_nails ;;
+esac
+AC_SUBST(GMP_NAIL_BITS)
+
+
+AC_ARG_ENABLE(profiling,
+AC_HELP_STRING([--enable-profiling],
+ [build with profiler support [[default=no]]]),
+[case $enableval in
+no|prof|gprof|instrument) ;;
+*) AC_MSG_ERROR([bad value $enableval for --enable-profiling, need no/prof/gprof/instrument]) ;;
+esac],
+[enable_profiling=no])
+
+case $enable_profiling in
+ prof)
+ AC_DEFINE(WANT_PROFILING_PROF, 1,
+ [Define to 1 if --enable-profiling=prof])
+ ;;
+ gprof)
+ AC_DEFINE(WANT_PROFILING_GPROF, 1,
+ [Define to 1 if --enable-profiling=gprof])
+ ;;
+ instrument)
+ AC_DEFINE(WANT_PROFILING_INSTRUMENT, 1,
+ [Define to 1 if --enable-profiling=instrument])
+ ;;
+esac
+
+GMP_DEFINE_RAW(["define(<WANT_PROFILING>,<\`$enable_profiling'>)"])
+
+# -fomit-frame-pointer is incompatible with -pg on some chips
+if test "$enable_profiling" = gprof; then
+ fomit_frame_pointer=
+else
+ fomit_frame_pointer="-fomit-frame-pointer"
+fi
+
+
+AC_ARG_WITH(readline,
+AC_HELP_STRING([--with-readline],
+ [readline support in calc demo program [[default=detect]]]),
+[case $withval in
+yes|no|detect) ;;
+*) AC_MSG_ERROR([bad value $withval for --with-readline, need yes/no/detect]) ;;
+esac],
+[with_readline=detect])
+
+
+AC_ARG_ENABLE(fat,
+AC_HELP_STRING([--enable-fat],
+ [build a fat binary on systems that support it [[default=no]]]),
+[case $enableval in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value $enableval for --enable-fat, need yes or no]) ;;
+esac],
+[enable_fat=no])
+
+
+AC_ARG_ENABLE(minithres,
+AC_HELP_STRING([--enable-minithres],
+ [choose minimal thresholds for testing [[default=no]]]),
+[case $enableval in
+yes|no) ;;
+*) AC_MSG_ERROR([bad value $enableval for --enable-minithres, need yes or no]) ;;
+esac],
+[enable_minithres=no])
+
+
+if test $enable_fat = yes && test $enable_assembly = no ; then
+ AC_MSG_ERROR([when doing a fat build, disabling assembly will not work])
+fi
+
+
+tmp_host=`echo $host_cpu | sed 's/\./_/'`
+AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_$tmp_host)
+GMP_DEFINE_RAW("define_not_for_expansion(\`HAVE_HOST_CPU_$tmp_host')", POST)
+
+dnl The HAVE_HOST_CPU_ list here only needs to have entries for those which
+dnl are going to be tested, not everything that can possibly be selected.
+dnl
+dnl The HAVE_HOST_CPU_FAMILY_ list similarly, and note that the AC_DEFINEs
+dnl for these are under the cpu specific setups below.
+
+AH_VERBATIM([HAVE_HOST_CPU_1],
+[/* Define one of these to 1 for the host CPU family.
+ If your CPU is not in any of these families, leave all undefined.
+ For an AMD64 chip, define "x86" in ABI=32, but not in ABI=64. */
+#undef HAVE_HOST_CPU_FAMILY_alpha
+#undef HAVE_HOST_CPU_FAMILY_m68k
+#undef HAVE_HOST_CPU_FAMILY_power
+#undef HAVE_HOST_CPU_FAMILY_powerpc
+#undef HAVE_HOST_CPU_FAMILY_x86
+#undef HAVE_HOST_CPU_FAMILY_x86_64
+
+/* Define one of the following to 1 for the host CPU, as per the output of
+ ./config.guess. If your CPU is not listed here, leave all undefined. */
+#undef HAVE_HOST_CPU_alphaev67
+#undef HAVE_HOST_CPU_alphaev68
+#undef HAVE_HOST_CPU_alphaev7
+#undef HAVE_HOST_CPU_m68020
+#undef HAVE_HOST_CPU_m68030
+#undef HAVE_HOST_CPU_m68040
+#undef HAVE_HOST_CPU_m68060
+#undef HAVE_HOST_CPU_m68360
+#undef HAVE_HOST_CPU_powerpc604
+#undef HAVE_HOST_CPU_powerpc604e
+#undef HAVE_HOST_CPU_powerpc750
+#undef HAVE_HOST_CPU_powerpc7400
+#undef HAVE_HOST_CPU_supersparc
+#undef HAVE_HOST_CPU_i386
+#undef HAVE_HOST_CPU_i586
+#undef HAVE_HOST_CPU_i686
+#undef HAVE_HOST_CPU_pentium
+#undef HAVE_HOST_CPU_pentiummmx
+#undef HAVE_HOST_CPU_pentiumpro
+#undef HAVE_HOST_CPU_pentium2
+#undef HAVE_HOST_CPU_pentium3
+#undef HAVE_HOST_CPU_s390_z900
+#undef HAVE_HOST_CPU_s390_z990
+#undef HAVE_HOST_CPU_s390_z9
+#undef HAVE_HOST_CPU_s390_z10
+#undef HAVE_HOST_CPU_s390_z196
+
+/* Define to 1 iff we have a s390 with 64-bit registers. */
+#undef HAVE_HOST_CPU_s390_zarch])
+
+
+# Table of compilers, options, and mpn paths. This code has various related
+# purposes
+#
+# - better default CC/CFLAGS selections than autoconf otherwise gives
+# - default CC/CFLAGS selections for extra CPU types specific to GMP
+# - a few tests for known bad compilers
+# - choice of ABIs on suitable systems
+# - selection of corresponding mpn search path
+#
+# After GMP specific searches and tests, the standard autoconf AC_PROG_CC is
+# called. User selections of CC etc are respected.
+#
+# Care is taken not to use macros like AC_TRY_COMPILE during the GMP
+# pre-testing, since they of course depend on AC_PROG_CC, and also some of
+# them cache their results, which is not wanted.
+#
+# The ABI selection mechanism is unique to GMP. All that reaches autoconf
+# is a different selection of CC/CFLAGS according to the best ABI the system
+# supports, and/or what the user selects. Naturally the mpn assembler code
+# selected is very dependent on the ABI.
+#
+# The closest the standard tools come to a notion of ABI is something like
+# "sparc64" which encodes a CPU and an ABI together. This doesn't seem to
+# scale well for GMP, where exact CPU types like "ultrasparc2" are wanted,
+# separate from the ABI used on them.
+#
+#
+# The variables set here are
+#
+# cclist the compiler choices
+# xx_cflags flags for compiler xx
+# xx_cflags_maybe flags for compiler xx, if they work
+# xx_cppflags cpp flags for compiler xx
+# xx_cflags_optlist list of sets of optional flags
+# xx_cflags_yyy set yyy of optional flags for compiler xx
+# xx_ldflags -Wc,-foo flags for libtool linking with compiler xx
+# ar_flags extra flags for $AR
+# nm_flags extra flags for $NM
+# limb limb size, can be "longlong"
+# path mpn search path
+# extra_functions extra mpn functions
+# fat_path fat binary mpn search path [if fat binary desired]
+# fat_functions fat functions
+# fat_thresholds fat thresholds
+#
+# Suppose xx_cflags_optlist="arch", then flags from $xx_cflags_arch are
+# tried, and the first flag that works will be used. An optlist like "arch
+# cpu optimize" can be used to get multiple independent sets of flags tried.
+# The first that works from each will be used. If no flag in a set works
+# then nothing from that set is added.
+#
+# For multiple ABIs, the scheme extends as follows.
+#
+# abilist set of ABI choices
+# cclist_aa compiler choices in ABI aa
+# xx_aa_cflags flags for xx in ABI aa
+# xx_aa_cflags_maybe flags for xx in ABI aa, if they work
+# xx_aa_cppflags cpp flags for xx in ABI aa
+# xx_aa_cflags_optlist list of sets of optional flags in ABI aa
+# xx_aa_cflags_yyy set yyy of optional flags for compiler xx in ABI aa
+# xx_aa_ldflags -Wc,-foo flags for libtool linking
+# ar_aa_flags extra flags for $AR in ABI aa
+# nm_aa_flags extra flags for $NM in ABI aa
+# limb_aa limb size in ABI aa, can be "longlong"
+# path_aa mpn search path in ABI aa
+# extra_functions_aa extra mpn functions in ABI aa
+#
+# As a convenience, the unadorned xx_cflags (etc) are used for the last ABI
+# in ablist, if an xx_aa_cflags for that ABI isn't given. For example if
+# abilist="64 32" then $cc_64_cflags will be used for the 64-bit ABI, but
+# for the 32-bit either $cc_32_cflags or $cc_cflags is used, whichever is
+# defined. This makes it easy to add some 64-bit compilers and flags to an
+# unadorned 32-bit set.
+#
+# limb=longlong (or limb_aa=longlong) applies to all compilers within that
+# ABI. It won't work to have some needing long long and some not, since a
+# single instantiated gmp.h will be used by both.
+#
+# SPEED_CYCLECOUNTER, cyclecounter_size and CALLING_CONVENTIONS_OBJS are
+# also set here, with an ABI suffix.
+#
+#
+#
+# A table-driven approach like this to mapping cpu type to good compiler
+# options is a bit of a maintenance burden, but there's not much uniformity
+# between options specifications on different compilers. Some sort of
+# separately updatable tool might be cute.
+#
+# The use of lots of variables like this, direct and indirect, tends to
+# obscure when and how various things are done, but unfortunately it's
+# pretty much the only way. If shell subroutines were portable then actual
+# code like "if this .. do that" could be written, but attempting the same
+# with full copies of GMP_PROG_CC_WORKS etc expanded at every point would
+# hugely bloat the output.
+
+
+AC_ARG_VAR(ABI, [desired ABI (for processors supporting more than one ABI)])
+
+# abilist needs to be non-empty, "standard" is just a generic name here
+abilist="standard"
+
+# FIXME: We'd like to prefer an ANSI compiler, perhaps by preferring
+# c89 over cc here. But note that on HP-UX c89 provides a castrated
+# environment, and would want to be excluded somehow. Maybe
+# AC_PROG_CC_STDC already does enough to stick cc into ANSI mode and
+# we don't need to worry.
+#
+cclist="gcc cc"
+
+gcc_cflags="-O2 -pedantic"
+gcc_64_cflags="-O2 -pedantic"
+cc_cflags="-O"
+cc_64_cflags="-O"
+
+SPEED_CYCLECOUNTER_OBJ=
+cyclecounter_size=2
+
+AC_SUBST(HAVE_HOST_CPU_FAMILY_power, 0)
+AC_SUBST(HAVE_HOST_CPU_FAMILY_powerpc,0)
+
+case $host in
+
+ a29k*-*-*)
+ path="a29k"
+ ;;
+
+
+ alpha*-*-*)
+ AC_DEFINE(HAVE_HOST_CPU_FAMILY_alpha)
+ case $host_cpu in
+ alphaev5* | alphapca5*)
+ path="alpha/ev5 alpha" ;;
+ alphaev67 | alphaev68 | alphaev7*)
+ path="alpha/ev67 alpha/ev6 alpha" ;;
+ alphaev6)
+ path="alpha/ev6 alpha" ;;
+ *)
+ path="alpha" ;;
+ esac
+ extra_functions="cntlz"
+ gcc_cflags_optlist="asm cpu oldas" # need asm ahead of cpu, see below
+ gcc_cflags_oldas="-Wa,-oldas" # see GMP_GCC_WA_OLDAS.
+
+ # gcc 2.7.2.3 doesn't know any -mcpu= for alpha, apparently.
+ # gcc 2.95 knows -mcpu= ev4, ev5, ev56, pca56, ev6.
+ # gcc 3.0 adds nothing.
+ # gcc 3.1 adds ev45, ev67 (but ev45 is the same as ev4).
+ # gcc 3.2 adds nothing.
+ #
+ # gcc version "2.9-gnupro-99r1" under "-O2 -mcpu=ev6" strikes internal
+ # compiler errors too easily and is rejected by GMP_PROG_CC_WORKS. Each
+ # -mcpu=ev6 below has a fallback to -mcpu=ev56 for this reason.
+ #
+ case $host_cpu in
+ alpha) gcc_cflags_cpu="-mcpu=ev4" ;;
+ alphaev5) gcc_cflags_cpu="-mcpu=ev5" ;;
+ alphaev56) gcc_cflags_cpu="-mcpu=ev56" ;;
+ alphapca56 | alphapca57)
+ gcc_cflags_cpu="-mcpu=pca56" ;;
+ alphaev6) gcc_cflags_cpu="-mcpu=ev6 -mcpu=ev56" ;;
+ alphaev67 | alphaev68 | alphaev7*)
+ gcc_cflags_cpu="-mcpu=ev67 -mcpu=ev6 -mcpu=ev56" ;;
+ esac
+
+ # gcc version "2.9-gnupro-99r1" on alphaev68-dec-osf5.1 has been seen
+ # accepting -mcpu=ev6, but not putting the assembler in the right mode
+ # for what it produces. We need to do this for it, and need to do it
+ # before testing the -mcpu options.
+ #
+ # On old versions of gcc, which don't know -mcpu=, we believe an
+ # explicit -Wa,-mev5 etc will be necessary to put the assembler in
+ # the right mode for our .asm files and longlong.h asm blocks.
+ #
+ # On newer versions of gcc, when -mcpu= is known, we must give a -Wa
+ # which is at least as high as the code gcc will generate. gcc
+ # establishes what it needs with a ".arch" directive, our command line
+ # option seems to override that.
+ #
+ # gas prior to 2.14 doesn't accept -mev67, but -mev6 seems enough for
+ # ctlz and cttz (in 2.10.0 at least).
+ #
+ # OSF `as' accepts ev68 but stupidly treats it as ev4. -arch only seems
+ # to affect insns like ldbu which are expanded as macros when necessary.
+ # Insns like ctlz which were never available as macros are always
+ # accepted and always generate their plain code.
+ #
+ case $host_cpu in
+ alpha) gcc_cflags_asm="-Wa,-arch,ev4 -Wa,-mev4" ;;
+ alphaev5) gcc_cflags_asm="-Wa,-arch,ev5 -Wa,-mev5" ;;
+ alphaev56) gcc_cflags_asm="-Wa,-arch,ev56 -Wa,-mev56" ;;
+ alphapca56 | alphapca57)
+ gcc_cflags_asm="-Wa,-arch,pca56 -Wa,-mpca56" ;;
+ alphaev6) gcc_cflags_asm="-Wa,-arch,ev6 -Wa,-mev6" ;;
+ alphaev67 | alphaev68 | alphaev7*)
+ gcc_cflags_asm="-Wa,-arch,ev67 -Wa,-mev67 -Wa,-arch,ev6 -Wa,-mev6" ;;
+ esac
+
+ # It might be better to ask "cc" whether it's Cray C or DEC C,
+ # instead of relying on the OS part of $host. But it's hard to
+ # imagine either of those compilers anywhere except their native
+ # systems.
+ #
+ GMP_INCLUDE_MPN(alpha/alpha-defs.m4)
+ case $host in
+ *-cray-unicos*)
+ cc_cflags="-O" # no -g, it silently disables all optimizations
+ GMP_INCLUDE_MPN(alpha/unicos.m4)
+ # Don't perform any assembly syntax tests on this beast.
+ gmp_asm_syntax_testing=no
+ ;;
+ *-*-osf*)
+ GMP_INCLUDE_MPN(alpha/default.m4)
+ cc_cflags=""
+ cc_cflags_optlist="opt cpu"
+
+ # not sure if -fast works on old versions, so make it optional
+ cc_cflags_opt="-fast -O2"
+
+ # DEC C V5.9-005 knows ev4, ev5, ev56, pca56, ev6.
+ # Compaq C V6.3-029 adds ev67.
+ #
+ case $host_cpu in
+ alpha) cc_cflags_cpu="-arch~ev4~-tune~ev4" ;;
+ alphaev5) cc_cflags_cpu="-arch~ev5~-tune~ev5" ;;
+ alphaev56) cc_cflags_cpu="-arch~ev56~-tune~ev56" ;;
+ alphapca56 | alphapca57)
+ cc_cflags_cpu="-arch~pca56~-tune~pca56" ;;
+ alphaev6) cc_cflags_cpu="-arch~ev6~-tune~ev6" ;;
+ alphaev67 | alphaev68 | alphaev7*)
+ cc_cflags_cpu="-arch~ev67~-tune~ev67 -arch~ev6~-tune~ev6" ;;
+ esac
+ ;;
+ *)
+ GMP_INCLUDE_MPN(alpha/default.m4)
+ ;;
+ esac
+
+ case $host in
+ *-*-unicos*)
+ # tune/alpha.asm assumes int==4bytes but unicos uses int==8bytes
+ ;;
+ *)
+ SPEED_CYCLECOUNTER_OBJ=alpha.lo
+ cyclecounter_size=1 ;;
+ esac
+ ;;
+
+
+ # Cray vector machines.
+ # This must come after alpha* so that we can recognize present and future
+ # vector processors with a wildcard.
+ *-cray-unicos*)
+ gmp_asm_syntax_testing=no
+ cclist="cc"
+ # We used to have -hscalar0 here as a workaround for miscompilation of
+ # mpz/import.c, but let's hope Cray fixes their bugs instead, since
+ # -hscalar0 causes disastrously poor code to be generated.
+ cc_cflags="-O3 -hnofastmd -htask0 -Wa,-B"
+ path="cray"
+ ;;
+
+
+ arm*-*-*)
+ gcc_cflags="$gcc_cflags $fomit_frame_pointer"
+ gcc_cflags_optlist="arch tune"
+ gcc_cflags_maybe="-marm"
+ gcc_testlist="gcc-arm-umodsi"
+ GMP_INCLUDE_MPN(arm/arm-defs.m4)
+ CALLING_CONVENTIONS_OBJS='arm32call.lo arm32check.lo'
+
+ case $host_cpu in
+ armsa1 | armv4*)
+ path="arm"
+ gcc_cflags_arch="-march=armv4"
+ ;;
+ armxscale | arm926 | arm946 | arm966 | arm1026 | armv5*)
+ path="arm/v5 arm"
+ gcc_cflags_arch="-march=armv5"
+ ;;
+ arm11mpcore | arm1136 | arm1176 | armv6*)
+ path="arm/v6 arm/v5 arm"
+ gcc_cflags_arch="-march=armv6"
+ ;;
+ arm1156)
+ path="arm/v6t2 arm/v6 arm/v5 arm"
+ gcc_cflags_arch="-march=armv6t2"
+ ;;
+ armcortexa9)
+ path="arm/v7a/cora9 arm/v6t2 arm/v6 arm/v5 arm"
+ gcc_cflags_arch="-march=armv7-a"
+ gcc_cflags_tune="-mtune=cortex-a9"
+ ;;
+ armcortexa15)
+ path="arm/v7a/cora15 arm/v6t2 arm/v6 arm/v5 arm"
+ gcc_cflags_arch="-march=armv7-a"
+ gcc_cflags_tune="-mtune=cortex-a15 -mtune=cortex-a9"
+ ;;
+ armcortexa5 | armcortexa8 | armv7a*)
+ path="arm/v6t2 arm/v6 arm/v5 arm"
+ gcc_cflags_arch="-march=armv7-a"
+ ;;
+ *)
+ path="arm"
+ ;;
+ esac
+ ;;
+
+
+ clipper*-*-*)
+ path="clipper"
+ ;;
+
+
+ # Fujitsu
+ [f30[01]-fujitsu-sysv*])
+ cclist="gcc vcc"
+ # FIXME: flags for vcc?
+ vcc_cflags="-g"
+ path="fujitsu"
+ ;;
+
+
+ hppa*-*-*)
+ # HP cc (the one sold separately) is K&R by default, but AM_C_PROTOTYPES
+ # will add "-Ae", or "-Aa -D_HPUX_SOURCE", to put it into ansi mode, if
+ # possible.
+ #
+ # gcc for hppa 2.0 can be built either for 2.0n (32-bit) or 2.0w
+ # (64-bit), but not both, so there's no option to choose the desired
+ # mode, we must instead detect which of the two it is. This is done by
+ # checking sizeof(long), either 4 or 8 bytes respectively. Do this in
+ # ABI=1.0 too, in case someone tries to build that with a 2.0w gcc.
+ #
+ gcc_cflags_optlist="arch"
+ gcc_testlist="sizeof-long-4"
+ SPEED_CYCLECOUNTER_OBJ=hppa.lo
+ cyclecounter_size=1
+
+ # FIXME: For hppa2.0*, path should be "pa32/hppa2_0 pa32/hppa1_1 pa32".
+ # (Can't remember why this isn't done already, have to check what .asm
+ # files are available in each and how they run on a typical 2.0 cpu.)
+ #
+ case $host_cpu in
+ hppa1.0*) path="pa32" ;;
+ hppa7000*) path="pa32/hppa1_1 pa32" ;;
+ hppa2.0* | hppa64)
+ path="pa32/hppa2_0 pa32/hppa1_1/pa7100 pa32/hppa1_1 pa32" ;;
+ *) # default to 7100
+ path="pa32/hppa1_1/pa7100 pa32/hppa1_1 pa32" ;;
+ esac
+
+ # gcc 2.7.2.3 knows -mpa-risc-1-0 and -mpa-risc-1-1
+ # gcc 2.95 adds -mpa-risc-2-0, plus synonyms -march=1.0, 1.1 and 2.0
+ #
+ # We don't use -mpa-risc-2-0 in ABI=1.0 because 64-bit registers may not
+ # be saved by the kernel on an old system. Actually gcc (as of 3.2)
+ # only adds a few float instructions with -mpa-risc-2-0, so it would
+ # probably be safe, but let's not take the chance. In any case, a
+ # configuration like --host=hppa2.0 ABI=1.0 is far from optimal.
+ #
+ case $host_cpu in
+ hppa1.0*) gcc_cflags_arch="-mpa-risc-1-0" ;;
+ *) # default to 7100
+ gcc_cflags_arch="-mpa-risc-1-1" ;;
+ esac
+
+ case $host_cpu in
+ hppa1.0*) cc_cflags="+O2" ;;
+ *) # default to 7100
+ cc_cflags="+DA1.1 +O2" ;;
+ esac
+
+ case $host in
+ hppa2.0*-*-* | hppa64-*-*)
+ cclist_20n="gcc cc"
+ abilist="2.0n 1.0"
+ path_20n="pa64"
+ limb_20n=longlong
+ any_20n_testlist="sizeof-long-4"
+ SPEED_CYCLECOUNTER_OBJ_20n=hppa2.lo
+ cyclecounter_size_20n=2
+
+ # -mpa-risc-2-0 is only an optional flag, in case an old gcc is
+ # used. Assembler support for 2.0 is essential though, for our asm
+ # files.
+ gcc_20n_cflags="$gcc_cflags"
+ gcc_20n_cflags_optlist="arch"
+ gcc_20n_cflags_arch="-mpa-risc-2-0 -mpa-risc-1-1"
+ gcc_20n_testlist="sizeof-long-4 hppa-level-2.0"
+
+ cc_20n_cflags="+DA2.0 +e +O2 -Wl,+vnocompatwarnings"
+ cc_20n_testlist="hpc-hppa-2-0"
+
+ # ABI=2.0w is available for hppa2.0w and hppa2.0, but not for
+ # hppa2.0n, on the assumption that that the latter indicates a
+ # desire for ABI=2.0n.
+ case $host in
+ hppa2.0n-*-*) ;;
+ *)
+ # HPUX 10 and earlier cannot run 2.0w. Not sure about other
+ # systems (GNU/Linux for instance), but lets assume they're ok.
+ case $host in
+ [*-*-hpux[1-9] | *-*-hpux[1-9].* | *-*-hpux10 | *-*-hpux10.*]) ;;
+ [*-*-linux*]) abilist="1.0" ;; # due to linux permanent kernel bug
+ *) abilist="2.0w $abilist" ;;
+ esac
+
+ cclist_20w="gcc cc"
+ gcc_20w_cflags="$gcc_cflags -mpa-risc-2-0"
+ cc_20w_cflags="+DD64 +O2"
+ cc_20w_testlist="hpc-hppa-2-0"
+ path_20w="pa64"
+ any_20w_testlist="sizeof-long-8"
+ SPEED_CYCLECOUNTER_OBJ_20w=hppa2w.lo
+ cyclecounter_size_20w=2
+ ;;
+ esac
+ ;;
+ esac
+ ;;
+
+
+ i960*-*-*)
+ path="i960"
+ ;;
+
+
+ IA64_PATTERN)
+ abilist="64"
+ GMP_INCLUDE_MPN(ia64/ia64-defs.m4)
+ SPEED_CYCLECOUNTER_OBJ=ia64.lo
+ any_32_testlist="sizeof-long-4"
+
+ case $host_cpu in
+ itanium) path="ia64/itanium ia64" ;;
+ itanium2) path="ia64/itanium2 ia64" ;;
+ *) path="ia64" ;;
+ esac
+
+ gcc_64_cflags_optlist="tune"
+ gcc_32_cflags_optlist=$gcc_64_cflags_optlist
+
+ # gcc pre-release 3.4 adds -mtune itanium and itanium2
+ case $host_cpu in
+ itanium) gcc_cflags_tune="-mtune=itanium" ;;
+ itanium2) gcc_cflags_tune="-mtune=itanium2" ;;
+ esac
+
+ case $host in
+ *-*-linux*)
+ cclist="gcc icc"
+ icc_cflags="-no-gcc"
+ icc_cflags_optlist="opt"
+ # Don't use -O3, it is for "large data sets" and also miscompiles GMP.
+ # But icc miscompiles GMP at any optimization level, at higher levels
+ # it miscompiles more files...
+ icc_cflags_opt="-O2 -O1"
+ ;;
+
+ *-*-hpux*)
+ # HP cc sometimes gets internal errors if the optimization level is
+ # too high. GMP_PROG_CC_WORKS detects this, the "_opt" fallbacks
+ # let us use whatever seems to work.
+ #
+ abilist="32 64"
+ any_64_testlist="sizeof-long-8"
+
+ cclist_32="gcc cc"
+ path_32="ia64"
+ cc_32_cflags=""
+ cc_32_cflags_optlist="opt"
+ cc_32_cflags_opt="+O3 +O2 +O1"
+ gcc_32_cflags="$gcc_cflags -milp32"
+ limb_32=longlong
+ SPEED_CYCLECOUNTER_OBJ_32=ia64.lo
+ cyclecounter_size_32=2
+
+ # Must have +DD64 in CPPFLAGS to get the right __LP64__ for headers,
+ # but also need it in CFLAGS for linking programs, since automake
+ # only uses CFLAGS when linking, not CPPFLAGS.
+ # FIXME: Maybe should use cc_64_ldflags for this, but that would
+ # need GMP_LDFLAGS used consistently by all the programs.
+ #
+ cc_64_cflags="+DD64"
+ cc_64_cppflags="+DD64"
+ cc_64_cflags_optlist="opt"
+ cc_64_cflags_opt="+O3 +O2 +O1"
+ gcc_64_cflags="$gcc_cflags -mlp64"
+ ;;
+ esac
+ ;;
+
+
+ # Motorola 68k
+ #
+ M68K_PATTERN)
+ AC_DEFINE(HAVE_HOST_CPU_FAMILY_m68k)
+ GMP_INCLUDE_MPN(m68k/m68k-defs.m4)
+ gcc_cflags="$gcc_cflags $fomit_frame_pointer"
+ gcc_cflags_optlist="arch"
+
+ # gcc 2.7.2 knows -m68000, -m68020, -m68030, -m68040.
+ # gcc 2.95 adds -mcpu32, -m68060.
+ # FIXME: Maybe "-m68020 -mnobitfield" would suit cpu32 on 2.7.2.
+ #
+ case $host_cpu in
+ m68020) gcc_cflags_arch="-m68020" ;;
+ m68030) gcc_cflags_arch="-m68030" ;;
+ m68040) gcc_cflags_arch="-m68040" ;;
+ m68060) gcc_cflags_arch="-m68060 -m68000" ;;
+ m68360) gcc_cflags_arch="-mcpu32 -m68000" ;;
+ *) gcc_cflags_arch="-m68000" ;;
+ esac
+
+ # FIXME: m68k/mc68020 looks like it's ok for cpu32, but this wants to be
+ # tested. Will need to introduce an m68k/cpu32 if m68k/mc68020 ever uses
+ # the bitfield instructions.
+ case $host_cpu in
+ [m680[234]0 | m68360]) path="m68k/mc68020 m68k" ;;
+ *) path="m68k" ;;
+ esac
+ ;;
+
+
+ # Motorola 88k
+ m88k*-*-*)
+ path="m88k"
+ ;;
+ m88110*-*-*)
+ gcc_cflags="$gcc_cflags -m88110"
+ path="m88k/mc88110 m88k"
+ ;;
+
+
+ # National Semiconductor 32k
+ ns32k*-*-*)
+ path="ns32k"
+ ;;
+
+
+ # IRIX 5 and earlier can only run 32-bit o32.
+ #
+ # IRIX 6 and up always has a 64-bit mips CPU can run n32 or 64. n32 is
+ # preferred over 64, but only because that's been the default in past
+ # versions of GMP. The two are equally efficient.
+ #
+ # Linux kernel 2.2.13 arch/mips/kernel/irixelf.c has a comment about not
+ # supporting n32 or 64.
+ #
+ # For reference, libtool (eg. 1.5.6) recognises the n32 ABI and knows the
+ # right options to use when linking (both cc and gcc), so no need for
+ # anything special from us.
+ #
+ mips*-*-*)
+ abilist="o32"
+ gcc_cflags_optlist="abi"
+ gcc_cflags_abi="-mabi=32"
+ gcc_testlist="gcc-mips-o32"
+ path="mips32"
+ cc_cflags="-O2 -o32" # no -g, it disables all optimizations
+ # this suits both mips32 and mips64
+ GMP_INCLUDE_MPN(mips32/mips-defs.m4)
+
+ case $host in
+ [mips64*-*-* | mips*-*-irix[6789]*])
+ abilist="n32 64 o32"
+
+ cclist_n32="gcc cc"
+ gcc_n32_cflags="$gcc_cflags -mabi=n32"
+ cc_n32_cflags="-O2 -n32" # no -g, it disables all optimizations
+ limb_n32=longlong
+ path_n32="mips64"
+
+ cclist_64="gcc cc"
+ gcc_64_cflags="$gcc_cflags -mabi=64"
+ gcc_64_ldflags="-Wc,-mabi=64"
+ cc_64_cflags="-O2 -64" # no -g, it disables all optimizations
+ cc_64_ldflags="-Wc,-64"
+ path_64="mips64"
+ ;;
+ esac
+ ;;
+
+
+ # Darwin (powerpc-apple-darwin1.3) has it's hacked gcc installed as cc.
+ # Our usual "gcc in disguise" detection means gcc_cflags etc here gets
+ # used.
+ #
+ # The darwin pre-compiling preprocessor is disabled with -no-cpp-precomp
+ # since it doesn't like "__attribute__ ((mode (SI)))" etc in gmp-impl.h,
+ # and so always ends up running the plain preprocessor anyway. This could
+ # be done in CPPFLAGS rather than CFLAGS, but there's not many places
+ # preprocessing is done separately, and this is only a speedup, the normal
+ # preprocessor gets run if there's any problems.
+ #
+ # We used to use -Wa,-mppc with gcc, but can't remember exactly why.
+ # Presumably it was for old versions of gcc where -mpowerpc doesn't put
+ # the assembler in the right mode. In any case -Wa,-mppc is not good, for
+ # instance -mcpu=604 makes recent gcc use -m604 to get access to the
+ # "fsel" instruction, but a -Wa,-mppc overrides that, making code that
+ # comes out with fsel fail.
+ #
+ # (Note also that the darwin assembler doesn't accept "-mppc", so any
+ # -Wa,-mppc was used only if it worked. The right flag on darwin would be
+ # "-arch ppc" or some such, but that's already the default.)
+ #
+ [powerpc*-*-* | power[3-9]-*-*])
+ AC_DEFINE(HAVE_HOST_CPU_FAMILY_powerpc)
+ HAVE_HOST_CPU_FAMILY_powerpc=1
+ abilist="32"
+ cclist="gcc cc"
+ cc_cflags="-O2"
+ gcc_32_cflags="$gcc_cflags -mpowerpc"
+ gcc_cflags_optlist="precomp subtype asm cpu"
+ gcc_cflags_precomp="-no-cpp-precomp"
+ gcc_cflags_subtype="-force_cpusubtype_ALL" # for vmx on darwin
+ gcc_cflags_asm=""
+ gcc_cflags_cpu=""
+ vmx_path=""
+
+ # grab this object, though it's not a true cycle counter routine
+ SPEED_CYCLECOUNTER_OBJ=powerpc.lo
+ cyclecounter_size=0
+
+ case $host_cpu in
+ powerpc740 | powerpc750)
+ path="powerpc32/750 powerpc32" ;;
+ powerpc7400 | powerpc7410)
+ path="powerpc32/vmx powerpc32/750 powerpc32" ;;
+ [powerpc74[45]?])
+ path="powerpc32/vmx powerpc32" ;;
+ *)
+ path="powerpc32" ;;
+ esac
+
+ case $host_cpu in
+ powerpc401) gcc_cflags_cpu="-mcpu=401" ;;
+ powerpc403) gcc_cflags_cpu="-mcpu=403"
+ xlc_cflags_arch="-qarch=403 -qarch=ppc" ;;
+ powerpc405) gcc_cflags_cpu="-mcpu=405" ;;
+ powerpc505) gcc_cflags_cpu="-mcpu=505" ;;
+ powerpc601) gcc_cflags_cpu="-mcpu=601"
+ xlc_cflags_arch="-qarch=601 -qarch=ppc" ;;
+ powerpc602) gcc_cflags_cpu="-mcpu=602"
+ xlc_cflags_arch="-qarch=602 -qarch=ppc" ;;
+ powerpc603) gcc_cflags_cpu="-mcpu=603"
+ xlc_cflags_arch="-qarch=603 -qarch=ppc" ;;
+ powerpc603e) gcc_cflags_cpu="-mcpu=603e -mcpu=603"
+ xlc_cflags_arch="-qarch=603 -qarch=ppc" ;;
+ powerpc604) gcc_cflags_cpu="-mcpu=604"
+ xlc_cflags_arch="-qarch=604 -qarch=ppc" ;;
+ powerpc604e) gcc_cflags_cpu="-mcpu=604e -mcpu=604"
+ xlc_cflags_arch="-qarch=604 -qarch=ppc" ;;
+ powerpc620) gcc_cflags_cpu="-mcpu=620" ;;
+ powerpc630) gcc_cflags_cpu="-mcpu=630"
+ xlc_cflags_arch="-qarch=pwr3"
+ cpu_path="p3 p3-p7" ;;
+ powerpc740) gcc_cflags_cpu="-mcpu=740" ;;
+ powerpc7400 | powerpc7410)
+ gcc_cflags_asm="-Wa,-maltivec"
+ gcc_cflags_cpu="-mcpu=7400 -mcpu=750" ;;
+ [powerpc74[45]?])
+ gcc_cflags_asm="-Wa,-maltivec"
+ gcc_cflags_cpu="-mcpu=7450" ;;
+ powerpc750) gcc_cflags_cpu="-mcpu=750" ;;
+ powerpc801) gcc_cflags_cpu="-mcpu=801" ;;
+ powerpc821) gcc_cflags_cpu="-mcpu=821" ;;
+ powerpc823) gcc_cflags_cpu="-mcpu=823" ;;
+ powerpc860) gcc_cflags_cpu="-mcpu=860" ;;
+ powerpc970) gcc_cflags_cpu="-mtune=970"
+ xlc_cflags_arch="-qarch=970 -qarch=pwr3"
+ vmx_path="powerpc64/vmx"
+ cpu_path="p4 p3-p7" ;;
+ power4) gcc_cflags_cpu="-mtune=power4"
+ xlc_cflags_arch="-qarch=pwr4"
+ cpu_path="p4 p3-p7" ;;
+ power5) gcc_cflags_cpu="-mtune=power5 -mtune=power4"
+ xlc_cflags_arch="-qarch=pwr5"
+ cpu_path="p5 p4 p3-p7" ;;
+ power6) gcc_cflags_cpu="-mtune=power6"
+ xlc_cflags_arch="-qarch=pwr6"
+ cpu_path="p6 p3-p7" ;;
+ power7) gcc_cflags_cpu="-mtune=power7 -mtune=power5"
+ xlc_cflags_arch="-qarch=pwr7 -qarch=pwr5"
+ cpu_path="p7 p5 p4 p3-p7" ;;
+ esac
+
+ case $host in
+ *-*-aix*)
+ cclist="gcc xlc cc"
+ gcc_32_cflags_maybe="-maix32"
+ xlc_cflags="-O2 -qmaxmem=20000"
+ xlc_cflags_optlist="arch"
+ xlc_32_cflags_maybe="-q32"
+ ar_32_flags="-X32"
+ nm_32_flags="-X32"
+ esac
+
+ case $host in
+ POWERPC64_PATTERN)
+ case $host in
+ *-*-aix*)
+ # On AIX a true 64-bit ABI is available.
+ # Need -Wc to pass object type flags through to the linker.
+ abilist="mode64 $abilist"
+ cclist_mode64="gcc xlc"
+ gcc_mode64_cflags="$gcc_cflags -maix64 -mpowerpc64"
+ gcc_mode64_cflags_optlist="cpu"
+ gcc_mode64_ldflags="-Wc,-maix64"
+ xlc_mode64_cflags="-O2 -q64 -qmaxmem=20000"
+ xlc_mode64_cflags_optlist="arch"
+ xlc_mode64_ldflags="-Wc,-q64"
+ # Must indicate object type to ar and nm
+ ar_mode64_flags="-X64"
+ nm_mode64_flags="-X64"
+ path_mode64=""
+ p=""
+ for i in $cpu_path
+ do path_mode64="${path_mode64}powerpc64/mode64/$i "
+ path_mode64="${path_mode64}powerpc64/$i "
+ p="${p} powerpc32/$i "
+ done
+ path_mode64="${path_mode64}powerpc64/mode64 $vmx_path powerpc64"
+ path="$p $path"
+ # grab this object, though it's not a true cycle counter routine
+ SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo
+ cyclecounter_size_mode64=0
+ ;;
+ *-*-darwin*)
+ # On Darwin we can use 64-bit instructions with a longlong limb,
+ # but the chip still in 32-bit mode.
+ # In theory this can be used on any OS which knows how to save
+ # 64-bit registers in a context switch.
+ #
+ # Note that we must use -mpowerpc64 with gcc, since the
+ # longlong.h macros expect limb operands in a single 64-bit
+ # register, not two 32-bit registers as would be given for a
+ # long long without -mpowerpc64. In theory we could detect and
+ # accommodate both styles, but the proper 64-bit registers will
+ # be fastest and are what we really want to use.
+ #
+ # One would think -mpowerpc64 would set the assembler in the right
+ # mode to handle 64-bit instructions. But for that, also
+ # -force_cpusubtype_ALL is needed.
+ #
+ # Do not use -fast for Darwin, it actually adds options
+ # incompatible with a shared library.
+ #
+ abilist="mode64 mode32 $abilist"
+ gcc_32_cflags_maybe="-m32"
+ gcc_cflags_opt="-O3 -O2 -O1" # will this become used?
+ cclist_mode32="gcc"
+ gcc_mode32_cflags_maybe="-m32"
+ gcc_mode32_cflags="-mpowerpc64"
+ gcc_mode32_cflags_optlist="subtype cpu opt"
+ gcc_mode32_cflags_subtype="-force_cpusubtype_ALL"
+ gcc_mode32_cflags_opt="-O3 -O2 -O1"
+ limb_mode32=longlong
+ cclist_mode64="gcc"
+ gcc_mode64_cflags="-m64"
+ gcc_mode64_cflags_optlist="cpu opt"
+ gcc_mode64_cflags_opt="-O3 -O2 -O1"
+ path_mode64=""
+ path_mode32=""
+ p=""
+ for i in $cpu_path
+ do path_mode64="${path_mode64}powerpc64/mode64/$i "
+ path_mode64="${path_mode64}powerpc64/$i "
+ path_mode32="${path_mode32}powerpc64/mode32/$i "
+ path_mode32="${path_mode32}powerpc64/$i "
+ p="${p} powerpc32/$i "
+ done
+ path_mode64="${path_mode64}powerpc64/mode64 $vmx_path powerpc64"
+ path_mode32="${path_mode32}powerpc64/mode32 $vmx_path powerpc64"
+ path="$p $path"
+ SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo
+ cyclecounter_size_mode64=0
+ any_mode64_testlist="sizeof-long-8"
+ ;;
+ *-*-linux* | *-*-*bsd*)
+ # On GNU/Linux, assume the processor is in 64-bit mode. Some
+ # environments have a gcc that is always in 64-bit mode, while
+ # others require -m64, hence the use of cflags_maybe. The
+ # sizeof-long-8 test checks the mode is right (for the no option
+ # case).
+ #
+ # -mpowerpc64 is not used, since it should be the default in
+ # 64-bit mode. (We need its effect for the various longlong.h
+ # asm macros to be right of course.)
+ #
+ # gcc64 was an early port of gcc to 64-bit mode, but should be
+ # obsolete before too long. We prefer plain gcc when it knows
+ # 64-bits.
+ #
+ abilist="mode64 mode32 $abilist"
+ gcc_32_cflags_maybe="-m32"
+ cclist_mode32="gcc"
+ gcc_mode32_cflags_maybe="-m32"
+ gcc_mode32_cflags="-mpowerpc64"
+ gcc_mode32_cflags_optlist="cpu opt"
+ gcc_mode32_cflags_opt="-O3 -O2 -O1"
+ limb_mode32=longlong
+ cclist_mode64="gcc gcc64"
+ gcc_mode64_cflags_maybe="-m64"
+ gcc_mode64_cflags_optlist="cpu opt"
+ gcc_mode64_cflags_opt="-O3 -O2 -O1"
+ path_mode64=""
+ path_mode32=""
+ p=""
+ for i in $cpu_path
+ do path_mode64="${path_mode64}powerpc64/mode64/$i "
+ path_mode64="${path_mode64}powerpc64/$i "
+ path_mode32="${path_mode32}powerpc64/mode32/$i "
+ path_mode32="${path_mode32}powerpc64/$i "
+ p="${p} powerpc32/$i "
+ done
+ path_mode64="${path_mode64}powerpc64/mode64 $vmx_path powerpc64"
+ path_mode32="${path_mode32}powerpc64/mode32 $vmx_path powerpc64"
+ path="$p $path"
+ SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo
+ cyclecounter_size_mode64=0
+ any_mode64_testlist="sizeof-long-8"
+ ;;
+ esac
+ ;;
+ esac
+ ;;
+
+
+ # POWER 32-bit
+ [power-*-* | power[12]-*-* | power2sc-*-*])
+ AC_DEFINE(HAVE_HOST_CPU_FAMILY_power)
+ HAVE_HOST_CPU_FAMILY_power=1
+ cclist="gcc"
+ extra_functions="udiv_w_sdiv"
+ path="power"
+
+ # gcc 2.7.2 knows rios1, rios2, rsc
+ #
+ # -mcpu=rios2 can tickle an AIX assembler bug (see GMP_PROG_CC_WORKS) so
+ # there needs to be a fallback to just -mpower.
+ #
+ gcc_cflags_optlist="cpu"
+ case $host in
+ power-*-*) gcc_cflags_cpu="-mcpu=power -mpower" ;;
+ power1-*-*) gcc_cflags_cpu="-mcpu=rios1 -mpower" ;;
+ power2-*-*) gcc_cflags_cpu="-mcpu=rios2 -mpower" ;;
+ power2sc-*-*) gcc_cflags_cpu="-mcpu=rsc -mpower" ;;
+ esac
+ case $host in
+ *-*-aix*)
+ cclist="gcc xlc"
+ xlc_cflags="-O2 -qarch=pwr -qmaxmem=20000"
+ ;;
+ esac
+ ;;
+
+
+ pyramid-*-*)
+ path="pyr"
+ ;;
+
+
+ # IBM System/390 and z/Architecture
+ S390_PATTERN | S390X_PATTERN)
+ abilist="32"
+ gcc_cflags="$gcc_cflags $fomit_frame_pointer"
+ gcc_cflags_optlist="arch"
+ path="s390_32"
+ extra_functions="udiv_w_sdiv"
+ gcc_32_cflags_maybe="-m31"
+
+ case $host_cpu in
+ s390)
+ ;;
+ z900 | z900esa)
+ cpu="z900"
+ gccarch="$cpu"
+ path="s390_32/esame/$cpu s390_32/esame s390_32"
+ gcc_cflags_arch="-march=$gccarch"
+ AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
+ AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
+ extra_functions=""
+ ;;
+ z990 | z990esa)
+ cpu="z990"
+ gccarch="$cpu"
+ path="s390_32/esame/$cpu s390_32/esame s390_32"
+ gcc_cflags_arch="-march=$gccarch"
+ AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
+ AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
+ extra_functions=""
+ ;;
+ z9 | z9esa)
+ cpu="z9"
+ gccarch="z9-109"
+ path="s390_32/esame/$cpu s390_32/esame s390_32"
+ gcc_cflags_arch="-march=$gccarch"
+ AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
+ AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
+ extra_functions=""
+ ;;
+ z10 | z10esa)
+ cpu="z10"
+ gccarch="z10"
+ path="s390_32/esame/$cpu s390_32/esame s390_32"
+ gcc_cflags_arch="-march=$gccarch"
+ AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
+ AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
+ extra_functions=""
+ ;;
+ z196 | z196esa)
+ cpu="z196"
+ gccarch="z196"
+ path="s390_32/esame/$cpu s390_32/esame s390_32"
+ gcc_cflags_arch="-march=$gccarch"
+ AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
+ AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
+ extra_functions=""
+ ;;
+ esac
+
+ case $host in
+ S390X_PATTERN)
+ abilist="64 32"
+ cclist_64="gcc"
+ gcc_64_cflags_optlist="arch"
+ gcc_64_cflags="$gcc_cflags -m64"
+ path_64="s390_64/$host_cpu s390_64"
+ extra_functions=""
+ ;;
+ esac
+ ;;
+
+
+ sh-*-*) path="sh" ;;
+ [sh[2-4]-*-*]) path="sh/sh2 sh" ;;
+
+
+ *sparc*-*-*)
+ # sizeof(long)==4 or 8 is tested, to ensure we get the right ABI. We've
+ # had various bug reports where users have set CFLAGS for their desired
+ # mode, but not set our ABI. For some reason it's sparc where this
+ # keeps coming up, presumably users there are accustomed to driving the
+ # compiler mode that way. The effect of our testlist setting is to
+ # reject ABI=64 in favour of ABI=32 if the user has forced the flags to
+ # 32-bit mode.
+ #
+ abilist="32"
+ cclist="gcc acc cc"
+ any_testlist="sizeof-long-4"
+ GMP_INCLUDE_MPN(sparc32/sparc-defs.m4)
+
+ case $host_cpu in
+ sparcv8 | microsparc | turbosparc)
+ path="sparc32/v8 sparc32" ;;
+ supersparc)
+ path="sparc32/v8/supersparc sparc32/v8 sparc32" ;;
+ [sparc64 | sparcv9* | ultrasparc | ultrasparc[234]*])
+ path="sparc32/v9 sparc32/v8 sparc32" ;;
+ [ultrasparct[12345]])
+ path="sparc32/ultrasparct1 sparc32/v8 sparc32" ;;
+ *)
+ path="sparc32" ;;
+ esac
+
+ # gcc 2.7.2 doesn't know about v9 and doesn't pass -xarch=v8plus to the
+ # assembler. Add it explicitly since the solaris assembler won't accept
+ # our sparc32/v9 asm code without it. gas accepts -xarch=v8plus too, so
+ # it can be in the cflags unconditionally (though gas doesn't need it).
+ #
+ # gcc -m32 is needed to force 32-bit mode on a dual-ABI system, but past
+ # gcc doesn't know that flag, hence cflags_maybe. Note that -m32 cannot
+ # be done through the optlist since the plain cflags would be run first
+ # and we don't want to require the default mode (whatever it is) works.
+ #
+ # Note it's gcc_32_cflags_maybe and not gcc_cflags_maybe because the
+ # latter would be used in the 64-bit ABI on systems like "*bsd" where
+ # abilist="64" only.
+ #
+ case $host_cpu in
+ [ultrasparct[345]])
+ gcc_32_cflags="$gcc_cflags -Wa,-xarch=v8plusd" ;;
+ sparc64 | sparcv9* | ultrasparc*)
+ gcc_32_cflags="$gcc_cflags -Wa,-xarch=v8plus" ;;
+ esac
+ gcc_32_cflags_maybe="-m32"
+ gcc_cflags_optlist="cpu"
+
+ # gcc 2.7.2 knows -mcypress, -msupersparc, -mv8, -msparclite.
+ # gcc 2.95 knows -mcpu= v7, hypersparc, sparclite86x, f930, f934,
+ # sparclet, tsc701, v9, ultrasparc. A warning is given that the
+ # plain -m forms will disappear.
+ # gcc 3.0 adds nothing.
+ # gcc 3.1 adds nothing.
+ # gcc 3.2 adds nothing.
+ # gcc 3.3 adds ultrasparc3.
+ #
+ case $host_cpu in
+ supersparc) gcc_cflags_cpu="-mcpu=supersparc -msupersparc" ;;
+ sparcv8 | microsparc | turbosparc)
+ gcc_cflags_cpu="-mcpu=v8 -mv8" ;;
+ sparc64 | sparcv9*) gcc_cflags_cpu="-mcpu=v9 -mv8" ;;
+ ultrasparc3) gcc_cflags_cpu="-mcpu=ultrasparc3 -mcpu=ultrasparc -mv8" ;;
+ ultrasparc*) gcc_cflags_cpu="-mcpu=ultrasparc -mv8" ;;
+ *) gcc_cflags_cpu="-mcpu=v7 -mcypress" ;;
+ esac
+
+ # SunPRO cc and acc, and SunOS bundled cc
+ case $host in
+ *-*-solaris* | *-*-sunos*)
+ # Note no -g, it disables all optimizations.
+ cc_cflags=
+ cc_cflags_optlist="opt arch cpu"
+
+ # SunOS cc doesn't know -xO4, fallback to -O2.
+ cc_cflags_opt="-xO4 -O2"
+
+ # SunOS cc doesn't know -xarch, apparently always generating v7
+ # code, so make this optional
+ case $host_cpu in
+ sparcv8 | microsparc | supersparc | turbosparc)
+ cc_cflags_arch="-xarch=v8" ;;
+ [ultrasparct[345]]) cc_cflags_arch="-xarch=v8plusd" ;;
+ sparc64 | sparcv9* | ultrasparc*) cc_cflags_arch="-xarch=v8plus" ;;
+ *) cc_cflags_arch="-xarch=v7" ;;
+ esac
+
+ # SunOS cc doesn't know -xchip and doesn't seem to have an equivalent.
+ # SunPRO cc 5 recognises -xchip=generic, old, super, super2, micro,
+ # micro2, hyper, hyper2, powerup, ultra, ultra2, ultra2i.
+ # SunPRO cc 6 adds -xchip=ultra2e, ultra3cu.
+ #
+ # FIXME: Which of ultra, ultra2 or ultra2i is the best fallback for
+ # ultrasparc3?
+ #
+ case $host_cpu in
+ supersparc) cc_cflags_cpu="-xchip=super" ;;
+ microsparc) cc_cflags_cpu="-xchip=micro" ;;
+ turbosparc) cc_cflags_cpu="-xchip=micro2" ;;
+ ultrasparc) cc_cflags_cpu="-xchip=ultra" ;;
+ ultrasparc2) cc_cflags_cpu="-xchip=ultra2" ;;
+ ultrasparc2i) cc_cflags_cpu="-xchip=ultra2i" ;;
+ ultrasparc3) cc_cflags_cpu="-xchip=ultra3 -xchip=ultra" ;;
+ *) cc_cflags_cpu="-xchip=generic" ;;
+ esac
+ esac
+
+ case $host_cpu in
+ sparc64 | sparcv9* | ultrasparc*)
+ case $host in
+ # Solaris 6 and earlier cannot run ABI=64 since it doesn't save
+ # registers properly, so ABI=32 is left as the only choice.
+ #
+ [*-*-solaris2.[0-6] | *-*-solaris2.[0-6].*]) ;;
+
+ # BSD sparc64 ports are 64-bit-only systems, so ABI=64 is the only
+ # choice. In fact they need no special compiler flags, gcc -m64
+ # is the default, but it doesn't hurt to add it. v9 CPUs always
+ # use the sparc64 port, since the plain 32-bit sparc ports don't
+ # run on a v9.
+ #
+ *-*-*bsd*) abilist="64" ;;
+
+ # For all other systems, we try both 64 and 32.
+ #
+ # GNU/Linux sparc64 has only recently gained a 64-bit user mode.
+ # In the past sparc64 meant a v9 cpu, but there were no 64-bit
+ # operations in user mode. We assume that if "gcc -m64" works
+ # then the system is suitable. Hopefully even if someone attempts
+ # to put a new gcc and/or glibc on an old system it won't run.
+ #
+ *) abilist="64 32" ;;
+ esac
+
+ case $host_cpu in
+ ultrasparc | ultrasparc2 | ultrasparc2i)
+ path_64="sparc64/ultrasparc1234 sparc64" ;;
+ [ultrasparc[34]])
+ path_64="sparc64/ultrasparc34 sparc64/ultrasparc1234 sparc64" ;;
+ [ultrasparct[12345]])
+ path_64="sparc64/ultrasparct1 sparc64" ;;
+ *)
+ path_64="sparc64"
+ esac
+
+ cclist_64="gcc"
+ any_64_testlist="sizeof-long-8"
+
+ # gcc -mptr64 is probably implied by -m64, but we're not sure if
+ # this was always so. On Solaris in the past we always used both
+ # "-m64 -mptr64".
+ #
+ # gcc -Wa,-xarch=v9 is thought to be necessary in some cases on
+ # solaris, but it would seem likely that if gcc is going to generate
+ # 64-bit code it will have to add that option itself where needed.
+ # An extra copy of this option should be harmless though, but leave
+ # it until we're sure. (Might want -xarch=v9a or -xarch=v9b for the
+ # higher cpu types instead.)
+ #
+ gcc_64_cflags="$gcc_cflags -m64 -mptr64"
+ gcc_64_ldflags="-Wc,-m64"
+ gcc_64_cflags_optlist="cpu"
+
+ case $host in
+ *-*-solaris*)
+ # Sun cc.
+ #
+ # We used to have -fast and some fixup options here, but it
+ # recurrently caused problems with miscompilation. Of course,
+ # -fast is documented as miscompiling things for the sake of speed.
+ #
+ cclist_64="$cclist_64 cc"
+ cc_64_cflags="-xO3 -xarch=v9"
+ cc_64_cflags_optlist="cpu"
+ ;;
+ esac
+
+ # using the v9 %tick register
+ SPEED_CYCLECOUNTER_OBJ_32=sparcv9.lo
+ SPEED_CYCLECOUNTER_OBJ_64=sparcv9.lo
+ cyclecounter_size_32=2
+ cyclecounter_size_64=2
+ ;;
+ esac
+ ;;
+
+
+ # VAX
+ vax*-*-*elf*)
+ # Use elf conventions (i.e., '%' register prefix, no global prefix)
+ #
+ GMP_INCLUDE_MPN(vax/elf.m4)
+ gcc_cflags="$gcc_cflags $fomit_frame_pointer"
+ path="vax"
+ extra_functions="udiv_w_sdiv"
+ ;;
+ vax*-*-*)
+ # Default to aout conventions (i.e., no register prefix, '_' global prefix)
+ #
+ gcc_cflags="$gcc_cflags $fomit_frame_pointer"
+ path="vax"
+ extra_functions="udiv_w_sdiv"
+ ;;
+
+
+ # AMD and Intel x86 configurations, including AMD64
+ #
+ # Rumour has it gcc -O2 used to give worse register allocation than just
+ # -O, but lets assume that's no longer true.
+ #
+ # -m32 forces 32-bit mode on a bi-arch 32/64 amd64 build of gcc. -m64 is
+ # the default in such a build (we think), so -m32 is essential for ABI=32.
+ # This is, of course, done for any $host_cpu, not just x86_64, so we can
+ # get such a gcc into the right mode to cross-compile to say i486-*-*.
+ #
+ # -m32 is not available in gcc 2.95 and earlier, hence cflags_maybe to use
+ # it when it works. We check sizeof(long)==4 to ensure we get the right
+ # mode, in case -m32 has failed not because it's an old gcc, but because
+ # it's a dual 32/64-bit gcc without a 32-bit libc, or whatever.
+ #
+ X86_PATTERN | X86_64_PATTERN)
+ abilist="32"
+ cclist="gcc icc cc"
+ gcc_cflags="$gcc_cflags $fomit_frame_pointer"
+ gcc_32_cflags_maybe="-m32"
+ icc_cflags="-no-gcc"
+ icc_cflags_optlist="opt"
+ icc_cflags_opt="-O3 -O2 -O1"
+ any_32_testlist="sizeof-long-4"
+ CALLING_CONVENTIONS_OBJS='x86call.lo x86check$U.lo'
+
+ # Availability of rdtsc is checked at run-time.
+ SPEED_CYCLECOUNTER_OBJ=pentium.lo
+
+ # gcc 2.7.2 only knows i386 and i486, using -m386 or -m486. These
+ # represent -mcpu= since -m486 doesn't generate 486 specific insns.
+ # gcc 2.95 adds k6, pentium and pentiumpro, and takes -march= and -mcpu=.
+ # gcc 3.0 adds athlon.
+ # gcc 3.1 adds k6-2, k6-3, pentium-mmx, pentium2, pentium3, pentium4,
+ # athlon-tbird, athlon-4, athlon-xp, athlon-mp.
+ # gcc 3.2 adds winchip2.
+ # gcc 3.3 adds winchip-c6.
+ # gcc 3.3.1 from mandrake adds k8 and knows -mtune.
+ # gcc 3.4 adds c3, c3-2, k8, and deprecates -mcpu in favour of -mtune.
+ #
+ # In gcc 2.95.[0123], -march=pentiumpro provoked a stack slot bug in an
+ # old version of mpz/powm.c. Seems to be fine with the current code, so
+ # no need for any restrictions on that option.
+ #
+ # -march=pentiumpro can fail if the assembler doesn't know "cmov"
+ # (eg. solaris 2.8 native "as"), so always have -march=pentium after
+ # that as a fallback.
+ #
+ # -march=pentium4 and -march=k8 enable SSE2 instructions, which may or
+ # may not be supported by the assembler and/or the OS, and is bad in gcc
+ # prior to 3.3. The tests will reject these if no good, so fallbacks
+ # like "-march=pentium4 -mno-sse2" are given to try also without SSE2.
+ # Note the relevant -march types are listed in the optflags handling
+ # below, be sure to update there if adding new types emitting SSE2.
+ #
+ # -mtune is used at the start of each cpu option list to give something
+ # gcc 3.4 will use, thereby avoiding warnings from -mcpu. -mcpu forms
+ # are retained for use by prior gcc. For example pentium has
+ # "-mtune=pentium -mcpu=pentium ...", the -mtune is for 3.4 and the
+ # -mcpu for prior. If there's a brand new choice in 3.4 for a chip,
+ # like k8 for x86_64, then it can be the -mtune at the start, no need to
+ # duplicate anything.
+ #
+ gcc_cflags_optlist="cpu arch"
+ case $host_cpu in
+ i386*)
+ gcc_cflags_cpu="-mtune=i386 -mcpu=i386 -m386"
+ gcc_cflags_arch="-march=i386"
+ path="x86"
+ ;;
+ i486*)
+ gcc_cflags_cpu="-mtune=i486 -mcpu=i486 -m486"
+ gcc_cflags_arch="-march=i486"
+ path="x86/i486 x86"
+ ;;
+ i586 | pentium)
+ gcc_cflags_cpu="-mtune=pentium -mcpu=pentium -m486"
+ gcc_cflags_arch="-march=pentium"
+ path="x86/pentium x86"
+ ;;
+ pentiummmx)
+ gcc_cflags_cpu="-mtune=pentium-mmx -mcpu=pentium-mmx -mcpu=pentium -m486"
+ gcc_cflags_arch="-march=pentium-mmx -march=pentium"
+ path="x86/pentium/mmx x86/pentium x86"
+ ;;
+ i686 | pentiumpro)
+ gcc_cflags_cpu="-mtune=pentiumpro -mcpu=pentiumpro -mcpu=i486 -m486"
+ gcc_cflags_arch="-march=pentiumpro -march=pentium"
+ path="x86/p6 x86"
+ ;;
+ pentium2)
+ gcc_cflags_cpu="-mtune=pentium2 -mcpu=pentium2 -mcpu=pentiumpro -mcpu=i486 -m486"
+ gcc_cflags_arch="-march=pentium2 -march=pentiumpro -march=pentium"
+ path="x86/p6/mmx x86/p6 x86"
+ ;;
+ pentium3)
+ gcc_cflags_cpu="-mtune=pentium3 -mcpu=pentium3 -mcpu=pentiumpro -mcpu=i486 -m486"
+ gcc_cflags_arch="-march=pentium3 -march=pentiumpro -march=pentium"
+ path="x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+ ;;
+ pentiumm)
+ gcc_cflags_cpu="-mtune=pentium3 -mcpu=pentium3 -mcpu=pentiumpro -mcpu=i486 -m486"
+ gcc_cflags_arch="-march=pentium3 -march=pentiumpro -march=pentium"
+ path="x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+ ;;
+ k6)
+ gcc_cflags_cpu="-mtune=k6 -mcpu=k6 -mcpu=i486 -m486"
+ gcc_cflags_arch="-march=k6"
+ path="x86/k6/mmx x86/k6 x86"
+ ;;
+ k62)
+ gcc_cflags_cpu="-mtune=k6-2 -mcpu=k6-2 -mcpu=k6 -mcpu=i486 -m486"
+ gcc_cflags_arch="-march=k6-2 -march=k6"
+ path="x86/k6/k62mmx x86/k6/mmx x86/k6 x86"
+ ;;
+ k63)
+ gcc_cflags_cpu="-mtune=k6-3 -mcpu=k6-3 -mcpu=k6 -mcpu=i486 -m486"
+ gcc_cflags_arch="-march=k6-3 -march=k6"
+ path="x86/k6/k62mmx x86/k6/mmx x86/k6 x86"
+ ;;
+ geode)
+ gcc_cflags_cpu="-mtune=k6-3 -mcpu=k6-3 -mcpu=k6 -mcpu=i486 -m486"
+ gcc_cflags_arch="-march=k6-3 -march=k6"
+ path="x86/geode x86/k6/k62mmx x86/k6/mmx x86/k6 x86"
+ ;;
+ athlon)
+ # Athlon instruction costs are close to P6 (3 cycle load latency,
+ # 4-6 cycle mul, 40 cycle div, pairable adc, etc) so if gcc doesn't
+ # know athlon (eg. 2.95.2 doesn't) then fall back on pentiumpro.
+ gcc_cflags_cpu="-mtune=athlon -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486"
+ gcc_cflags_arch="-march=athlon -march=pentiumpro -march=pentium"
+ path="x86/k7/mmx x86/k7 x86"
+ ;;
+ i786 | pentium4)
+ # pentiumpro is the primary fallback when gcc doesn't know pentium4.
+ # This gets us cmov to eliminate branches. Maybe "athlon" would be
+ # a possibility on gcc 3.0.
+ #
+ gcc_cflags_cpu="-mtune=pentium4 -mcpu=pentium4 -mcpu=pentiumpro -mcpu=i486 -m486"
+ gcc_cflags_arch="-march=pentium4 -march=pentium4~-mno-sse2 -march=pentiumpro -march=pentium"
+ gcc_64_cflags_cpu="-mtune=nocona"
+ path="x86/pentium4/sse2 x86/pentium4/mmx x86/pentium4 x86"
+ path_64="x86_64/pentium4 x86_64"
+ ;;
+ viac32)
+ # Not sure of the best fallbacks here for -mcpu.
+ # c3-2 has sse and mmx, so pentium3 is good for -march.
+ gcc_cflags_cpu="-mtune=c3-2 -mcpu=c3-2 -mcpu=i486 -m486"
+ gcc_cflags_arch="-march=c3-2 -march=pentium3 -march=pentiumpro -march=pentium"
+ path="x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+ ;;
+ viac3*)
+ # Not sure of the best fallbacks here.
+ gcc_cflags_cpu="-mtune=c3 -mcpu=c3 -mcpu=i486 -m486"
+ gcc_cflags_arch="-march=c3 -march=pentium-mmx -march=pentium"
+ path="x86/pentium/mmx x86/pentium x86"
+ ;;
+ athlon64 | k8 | x86_64)
+ gcc_cflags_cpu="-mtune=k8 -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486"
+ gcc_cflags_arch="-march=k8 -march=k8~-mno-sse2 -march=athlon -march=pentiumpro -march=pentium"
+ path="x86/k8 x86/k7/mmx x86/k7 x86"
+ path_64="x86_64/k8 x86_64"
+ ;;
+ k10)
+ gcc_cflags_cpu="-mtune=amdfam10 -mtune=k8"
+ gcc_cflags_arch="-march=amdfam10 -march=k8 -march=k8~-mno-sse2"
+ path="x86/k10 x86/k8 x86/k7/mmx x86/k7 x86"
+ path_64="x86_64/k10 x86_64/k8 x86_64"
+ ;;
+ bobcat)
+ gcc_cflags_cpu="-mtune=btver1 -mtune=amdfam10 -mtune=k8"
+ gcc_cflags_arch="-march=btver1 -march=amdfam10 -march=k8 -march=k8~-mno-sse2"
+ path="x86/bobcat x86/k7/mmx x86/k7 x86"
+ path_64="x86_64/bobcat x86_64/k10 x86_64/k8 x86_64"
+ ;;
+ bulldozer | bd1)
+ gcc_cflags_cpu="-mtune=bdver1 -mtune=amdfam10 -mtune=k8"
+ gcc_cflags_arch="-march=bdver1 -march=amdfam10 -march=k8 -march=k8~-mno-sse2"
+ path="x86/bd1 x86/k7/mmx x86/k7 x86"
+ path_64="x86_64/bd1 x86_64/k10 x86_64/k8 x86_64"
+ ;;
+ core2)
+ gcc_cflags_cpu="-mtune=core2 -mtune=k8"
+ gcc_cflags_arch="-march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
+ path="x86/core2 x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+ path_64="x86_64/core2 x86_64"
+ ;;
+ corei | coreinhm | coreiwsm)
+ gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
+ gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
+ path="x86/coreinhm x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+ path_64="x86_64/coreinhm x86_64/core2 x86_64"
+ ;;
+ coreisbr | coreihwl | coreibwl)
+ gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
+ gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
+ path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
+ path_64="x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
+ ;;
+ atom)
+ gcc_cflags_cpu="-mtune=atom -mtune=pentium3"
+ gcc_cflags_arch="-march=atom -march=pentium3"
+ path="x86/atom/sse2 x86/atom/mmx x86/atom x86"
+ path_64="x86_64/atom x86_64"
+ ;;
+ nano)
+ gcc_cflags_cpu="-mtune=nano"
+ gcc_cflags_arch="-march=nano"
+ path="x86/nano x86"
+ path_64="x86_64/nano x86_64"
+ ;;
+ *)
+ gcc_cflags_cpu="-mtune=i486 -mcpu=i486 -m486"
+ gcc_cflags_arch="-march=i486"
+ path="x86"
+ path_64="x86_64"
+ ;;
+ esac
+
+ case $host in
+ X86_64_PATTERN)
+ cclist_64="gcc"
+ gcc_64_cflags="$gcc_cflags -m64"
+ gcc_64_cflags_optlist="cpu arch"
+ CALLING_CONVENTIONS_OBJS_64='amd64call.lo amd64check$U.lo'
+ SPEED_CYCLECOUNTER_OBJ_64=x86_64.lo
+ cyclecounter_size_64=2
+
+ cclist_x32="gcc"
+ gcc_x32_cflags="$gcc_cflags -mx32"
+ gcc_x32_cflags_optlist="$gcc_64_cflags_optlist"
+ CALLING_CONVENTIONS_OBJS_x32="$CALLING_CONVENTIONS_OBJS_64"
+ SPEED_CYCLECOUNTER_OBJ_x32="$SPEED_CYCLECOUNTER_OBJ_64"
+ cyclecounter_size_x32="$cyclecounter_size_64"
+ path_x32="$path_64"
+ limb_x32=longlong
+ any_x32_testlist="sizeof-long-4"
+
+ abilist="64 x32 32"
+ if test "$enable_assembly" = "yes" ; then
+ extra_functions_64="invert_limb_table"
+ extra_functions_x32=$extra_functions_64
+ fi
+
+ case $host in
+ *-*-solaris*)
+ # Sun cc.
+ cclist_64="$cclist_64 cc"
+ cc_64_cflags="-xO3 -m64"
+ ;;
+ *-*-mingw* | *-*-cygwin)
+ limb_64=longlong
+ CALLING_CONVENTIONS_OBJS_64=""
+ AC_DEFINE(HOST_DOS64,1,[Define to 1 for Windos/64])
+ GMP_NONSTD_ABI_64=DOS64
+ ;;
+ esac
+ ;;
+ esac
+ ;;
+
+
+ # FIXME: z8kx won't get through config.sub. Could make 16 versus 32 bit
+ # limb an ABI option perhaps.
+ z8kx*-*-*)
+ path="z8000x"
+ extra_functions="udiv_w_sdiv"
+ ;;
+ z8k*-*-*)
+ path="z8000"
+ extra_functions="udiv_w_sdiv"
+ ;;
+
+
+ # Special CPU "none" used to select generic C, now this is obsolete.
+ none-*-*)
+ enable_assembly=no
+ AC_MSG_WARN([the \"none\" host is obsolete, use --disable-assembly])
+ ;;
+
+esac
+
+# mingw can be built by the cygwin gcc if -mno-cygwin is added. For
+# convenience add this automatically if it works. Actual mingw gcc accepts
+# -mno-cygwin too, but of course is the default. mingw only runs on the
+# x86s, but allow any CPU here so as to catch "none" too.
+#
+case $host in
+ *-*-mingw*)
+ gcc_cflags_optlist="$gcc_cflags_optlist nocygwin"
+ gcc_cflags_nocygwin="-mno-cygwin"
+ ;;
+esac
+
+
+CFLAGS_or_unset=${CFLAGS-'(unset)'}
+CPPFLAGS_or_unset=${CPPFLAGS-'(unset)'}
+
+cat >&AC_FD_CC <<EOF
+User:
+ABI=$ABI
+CC=$CC
+CFLAGS=$CFLAGS_or_unset
+CPPFLAGS=$CPPFLAGS_or_unset
+MPN_PATH=$MPN_PATH
+GMP:
+abilist=$abilist
+cclist=$cclist
+EOF
+
+
+test_CFLAGS=${CFLAGS+set}
+test_CPPFLAGS=${CPPFLAGS+set}
+
+for abi in $abilist; do
+ abi_last="$abi"
+done
+
+# If the user specifies an ABI then it must be in $abilist, after that
+# $abilist is restricted to just that choice.
+#
+if test -n "$ABI"; then
+ found=no
+ for abi in $abilist; do
+ if test $abi = "$ABI"; then found=yes; break; fi
+ done
+ if test $found = no; then
+ AC_MSG_ERROR([ABI=$ABI is not among the following valid choices: $abilist])
+ fi
+ abilist="$ABI"
+fi
+
+found_compiler=no
+
+for abi in $abilist; do
+
+ echo "checking ABI=$abi"
+
+ # Suppose abilist="64 32", then for abi=64, will have abi1="_64" and
+ # abi2="_64". For abi=32, will have abi1="_32" and abi2="". This is how
+ # $gcc_cflags becomes a fallback for $gcc_32_cflags (the last in the
+ # abilist), but there's no fallback for $gcc_64_cflags.
+ #
+ abi1=[`echo _$abi | sed 's/[.]//g'`]
+ if test $abi = $abi_last; then abi2=; else abi2="$abi1"; fi
+
+ # Compiler choices under this ABI
+ eval cclist_chosen=\"\$cclist$abi1\"
+ test -n "$cclist_chosen" || eval cclist_chosen=\"\$cclist$abi2\"
+
+ # If there's a user specified $CC then don't use a list for
+ # $cclist_chosen, just a single value for $ccbase.
+ #
+ if test -n "$CC"; then
+
+ # The first word of $CC, stripped of any directory. For instance
+ # CC="/usr/local/bin/gcc -pipe" will give "gcc".
+ #
+ for ccbase in $CC; do break; done
+ ccbase=`echo $ccbase | sed 's:.*/::'`
+
+ # If this $ccbase is in $cclist_chosen then it's a compiler we know and
+ # we can do flags defaulting with it. If not, then $cclist_chosen is
+ # set to "unrecognised" so no default flags are used.
+ #
+ # "unrecognised" is used to avoid bad effects with eval if $ccbase has
+ # non-symbol characters. For instance ccbase=my+cc would end up with
+ # something like cflags="$my+cc_cflags" which would give
+ # cflags="+cc_cflags" rather than the intended empty string for an
+ # unknown compiler.
+ #
+ found=unrecognised
+ for i in $cclist_chosen; do
+ if test "$ccbase" = $i; then
+ found=$ccbase
+ break
+ fi
+ done
+ cclist_chosen=$found
+ fi
+
+ for ccbase in $cclist_chosen; do
+
+ # When cross compiling, look for a compiler with the $host_alias as a
+ # prefix, the same way that AC_CHECK_TOOL does. But don't do this to a
+ # user-selected $CC.
+ #
+ # $cross_compiling will be yes/no/maybe at this point. Do the host
+ # prefixing for "maybe" as well as "yes".
+ #
+ if test "$cross_compiling" != no && test -z "$CC"; then
+ cross_compiling_prefix="${host_alias}-"
+ fi
+
+ for ccprefix in $cross_compiling_prefix ""; do
+
+ cc="$CC"
+ test -n "$cc" || cc="$ccprefix$ccbase"
+
+ # If the compiler is gcc but installed under another name, then change
+ # $ccbase so as to use the flags we know for gcc. This helps for
+ # instance when specifying CC=gcc272 on Debian GNU/Linux, or the
+ # native cc which is really gcc on NeXT or MacOS-X.
+ #
+ # FIXME: There's a slight misfeature here. If cc is actually gcc but
+ # gcc is not a known compiler under this $abi then we'll end up
+ # testing it with no flags and it'll work, but chances are it won't be
+ # in the right mode for the ABI we desire. Let's quietly hope this
+ # doesn't happen.
+ #
+ if test $ccbase != gcc; then
+ GMP_PROG_CC_IS_GNU($cc,ccbase=gcc)
+ fi
+
+ # Similarly if the compiler is IBM xlc but invoked as cc or whatever
+ # then change $ccbase and make the default xlc flags available.
+ if test $ccbase != xlc; then
+ GMP_PROG_CC_IS_XLC($cc,ccbase=xlc)
+ fi
+
+ # acc was Sun's first unbundled compiler back in the SunOS days, or
+ # something like that, but today its man page says it's not meant to
+ # be used directly (instead via /usr/ucb/cc). The options are pretty
+ # much the same as the main SunPRO cc, so share those configs.
+ #
+ case $host in
+ *sparc*-*-solaris* | *sparc*-*-sunos*)
+ if test "$ccbase" = acc; then ccbase=cc; fi ;;
+ esac
+
+ for tmp_cflags_maybe in yes no; do
+ eval cflags=\"\$${ccbase}${abi1}_cflags\"
+ test -n "$cflags" || eval cflags=\"\$${ccbase}${abi2}_cflags\"
+
+ if test "$tmp_cflags_maybe" = yes; then
+ # don't try cflags_maybe when the user set CFLAGS
+ if test "$test_CFLAGS" = set; then continue; fi
+ eval cflags_maybe=\"\$${ccbase}${abi1}_cflags_maybe\"
+ test -n "$cflags_maybe" || eval cflags_maybe=\"\$${ccbase}${abi2}_cflags_maybe\"
+ # don't try cflags_maybe if there's nothing set
+ if test -z "$cflags_maybe"; then continue; fi
+ cflags="$cflags_maybe $cflags"
+ fi
+
+ # Any user CFLAGS, even an empty string, takes precedence
+ if test "$test_CFLAGS" = set; then cflags=$CFLAGS; fi
+
+ # Any user CPPFLAGS, even an empty string, takes precedence
+ eval cppflags=\"\$${ccbase}${abi1}_cppflags\"
+ test -n "$cppflags" || eval cppflags=\"\$${ccbase}${abi2}_cppflags\"
+ if test "$test_CPPFLAGS" = set; then cppflags=$CPPFLAGS; fi
+
+ # --enable-profiling adds -p/-pg even to user-specified CFLAGS.
+ # This is convenient, but it's perhaps a bit naughty to modify user
+ # CFLAGS.
+ case "$enable_profiling" in
+ prof) cflags="$cflags -p" ;;
+ gprof) cflags="$cflags -pg" ;;
+ instrument) cflags="$cflags -finstrument-functions" ;;
+ esac
+
+ GMP_PROG_CC_WORKS($cc $cflags $cppflags,,continue)
+
+ # If we're supposed to be using a "long long" for a limb, check that
+ # it works.
+ eval limb_chosen=\"\$limb$abi1\"
+ test -n "$limb_chosen" || eval limb_chosen=\"\$limb$abi2\"
+ if test "$limb_chosen" = longlong; then
+ GMP_PROG_CC_WORKS_LONGLONG($cc $cflags $cppflags,,continue)
+ fi
+
+ # The tests to perform on this $cc, if any
+ eval testlist=\"\$${ccbase}${abi1}_testlist\"
+ test -n "$testlist" || eval testlist=\"\$${ccbase}${abi2}_testlist\"
+ test -n "$testlist" || eval testlist=\"\$any${abi1}_testlist\"
+ test -n "$testlist" || eval testlist=\"\$any${abi2}_testlist\"
+
+ testlist_pass=yes
+ for tst in $testlist; do
+ case $tst in
+ hpc-hppa-2-0) GMP_HPC_HPPA_2_0($cc,,testlist_pass=no) ;;
+ gcc-arm-umodsi) GMP_GCC_ARM_UMODSI($cc,,testlist_pass=no) ;;
+ gcc-mips-o32) GMP_GCC_MIPS_O32($cc,,testlist_pass=no) ;;
+ hppa-level-2.0) GMP_HPPA_LEVEL_20($cc $cflags,,testlist_pass=no) ;;
+ sizeof*) GMP_C_TEST_SIZEOF($cc $cflags,$tst,,testlist_pass=no) ;;
+ esac
+ if test $testlist_pass = no; then break; fi
+ done
+
+ if test $testlist_pass = yes; then
+ found_compiler=yes
+ break
+ fi
+ done
+
+ if test $found_compiler = yes; then break; fi
+ done
+
+ if test $found_compiler = yes; then break; fi
+ done
+
+ if test $found_compiler = yes; then break; fi
+done
+
+
+# If we recognised the CPU, as indicated by $path being set, then insist
+# that we have a working compiler, either from our $cclist choices or from
+# $CC. We can't let AC_PROG_CC look around for a compiler because it might
+# find one that we've rejected (for not supporting the modes our asm code
+# demands, etc).
+#
+# If we didn't recognise the CPU (and this includes host_cpu=none), then
+# fall through and let AC_PROG_CC look around for a compiler too. This is
+# mostly in the interests of following a standard autoconf setup, after all
+# we've already tested cc and gcc adequately (hopefully). As of autoconf
+# 2.50 the only thing AC_PROG_CC really adds is a check for "cl" (Microsoft
+# C on MS-DOS systems).
+#
+if test $found_compiler = no && test -n "$path"; then
+ AC_MSG_ERROR([could not find a working compiler, see config.log for details])
+fi
+
+case $host in
+ X86_PATTERN | X86_64_PATTERN)
+ # If the user asked for a fat build, override the path and flags set above
+ if test $enable_fat = yes; then
+ gcc_cflags_cpu=""
+ gcc_cflags_arch=""
+
+ fat_functions="add_n addmul_1 bdiv_dbm1c com copyd copyi dive_1 divrem_1
+ gcd_1 lshift lshiftc mod_1 mod_1_1 mod_1_1_cps mod_1_2
+ mod_1_2_cps mod_1_4 mod_1_4_cps mod_34lsub1 mode1o mul_1
+ mul_basecase mullo_basecase pre_divrem_1 pre_mod_1 redc_1
+ redc_2 rshift sqr_basecase sub_n submul_1"
+
+ if test "$abi" = 32; then
+ extra_functions="$extra_functions fat fat_entry"
+ path="x86/fat x86"
+ fat_path="x86 x86/fat x86/i486
+ x86/k6 x86/k6/mmx x86/k6/k62mmx
+ x86/k7 x86/k7/mmx
+ x86/k8 x86/k10 x86/bobcat
+ x86/pentium x86/pentium/mmx
+ x86/p6 x86/p6/mmx x86/p6/p3mmx x86/p6/sse2
+ x86/pentium4 x86/pentium4/mmx x86/pentium4/sse2
+ x86/core2 x86/coreinhm x86/coreisbr
+ x86/atom x86/atom/mmx x86/atom/sse2 x86/nano"
+ fi
+
+ if test "$abi" = 64; then
+ gcc_64_cflags=""
+ extra_functions_64="$extra_functions_64 fat fat_entry"
+ path_64="x86_64/fat x86_64"
+ fat_path="x86_64 x86_64/fat
+ x86_64/k8 x86_64/k10 x86_64/bd1 x86_64/bobcat
+ x86_64/pentium4 x86_64/core2 x86_64/coreinhm x86_64/coreisbr
+ x86_64/atom x86_64/nano"
+ fat_functions="$fat_functions addmul_2 addlsh1_n addlsh2_n sublsh1_n"
+ fi
+
+ fat_thresholds="MUL_TOOM22_THRESHOLD MUL_TOOM33_THRESHOLD
+ SQR_TOOM2_THRESHOLD SQR_TOOM3_THRESHOLD
+ BMOD_1_TO_MOD_1_THRESHOLD"
+ fi
+ ;;
+esac
+
+
+if test $found_compiler = yes; then
+
+ # If we're creating CFLAGS, then look for optional additions. If the user
+ # set CFLAGS then leave it alone.
+ #
+ if test "$test_CFLAGS" != set; then
+ eval optlist=\"\$${ccbase}${abi1}_cflags_optlist\"
+ test -n "$optlist" || eval optlist=\"\$${ccbase}${abi2}_cflags_optlist\"
+
+ for opt in $optlist; do
+ eval optflags=\"\$${ccbase}${abi1}_cflags_${opt}\"
+ test -n "$optflags" || eval optflags=\"\$${ccbase}${abi2}_cflags_${opt}\"
+ test -n "$optflags" || eval optflags=\"\$${ccbase}_cflags_${opt}\"
+
+ for flag in $optflags; do
+
+ # ~ represents a space in an option spec
+ flag=`echo "$flag" | tr '~' ' '`
+
+ case $flag in
+ -march=pentium4 | -march=k8)
+ # For -march settings which enable SSE2 we exclude certain bad
+ # gcc versions and we need an OS knowing how to save xmm regs.
+ #
+ # This is only for ABI=32, any 64-bit gcc is good and any OS
+ # knowing x86_64 will know xmm.
+ #
+ # -march=k8 was only introduced in gcc 3.3, so we shouldn't need
+ # the GMP_GCC_PENTIUM4_SSE2 check (for gcc 3.2 and prior). But
+ # it doesn't hurt to run it anyway, sharing code with the
+ # pentium4 case.
+ #
+ if test "$abi" = 32; then
+ GMP_GCC_PENTIUM4_SSE2($cc $cflags $cppflags,, continue)
+ GMP_OS_X86_XMM($cc $cflags $cppflags,, continue)
+ fi
+ ;;
+ -no-cpp-precomp)
+ # special check, avoiding a warning
+ GMP_GCC_NO_CPP_PRECOMP($ccbase,$cc,$cflags,
+ [cflags="$cflags $flag"
+ break],
+ [continue])
+ ;;
+ -Wa,-m*)
+ case $host in
+ alpha*-*-*)
+ GMP_GCC_WA_MCPU($cc $cflags, $flag, , [continue])
+ ;;
+ esac
+ ;;
+ -Wa,-oldas)
+ GMP_GCC_WA_OLDAS($cc $cflags $cppflags,
+ [cflags="$cflags $flag"
+ break],
+ [continue])
+ ;;
+ esac
+
+ GMP_PROG_CC_WORKS($cc $cflags $cppflags $flag,
+ [cflags="$cflags $flag"
+ break])
+ done
+ done
+ fi
+
+ ABI="$abi"
+ CC="$cc"
+ CFLAGS="$cflags"
+ CPPFLAGS="$cppflags"
+ eval GMP_NONSTD_ABI=\"\$GMP_NONSTD_ABI_$ABI\"
+
+ # Could easily have this in config.h too, if desired.
+ ABI_nodots=`echo $ABI | sed 's/\./_/'`
+ GMP_DEFINE_RAW("define_not_for_expansion(\`HAVE_ABI_$ABI_nodots')", POST)
+
+
+ # GMP_LDFLAGS substitution, selected according to ABI.
+ # These are needed on libgmp.la and libmp.la, but currently not on
+ # convenience libraries like tune/libspeed.la or mpz/libmpz.la.
+ #
+ eval GMP_LDFLAGS=\"\$${ccbase}${abi1}_ldflags\"
+ test -n "$GMP_LDFLAGS" || eval GMP_LDFLAGS=\"\$${ccbase}${abi1}_ldflags\"
+ AC_SUBST(GMP_LDFLAGS)
+ AC_SUBST(LIBGMP_LDFLAGS)
+ AC_SUBST(LIBGMPXX_LDFLAGS)
+
+ # extra_functions, selected according to ABI
+ eval tmp=\"\$extra_functions$abi1\"
+ test -n "$tmp" || eval tmp=\"\$extra_functions$abi2\"
+ extra_functions="$tmp"
+
+
+ # Cycle counter, selected according to ABI.
+ #
+ eval tmp=\"\$SPEED_CYCLECOUNTER_OBJ$abi1\"
+ test -n "$tmp" || eval tmp=\"\$SPEED_CYCLECOUNTER_OBJ$abi2\"
+ SPEED_CYCLECOUNTER_OBJ="$tmp"
+ eval tmp=\"\$cyclecounter_size$abi1\"
+ test -n "$tmp" || eval tmp=\"\$cyclecounter_size$abi2\"
+ cyclecounter_size="$tmp"
+
+ if test -n "$SPEED_CYCLECOUNTER_OBJ"; then
+ AC_DEFINE_UNQUOTED(HAVE_SPEED_CYCLECOUNTER, $cyclecounter_size,
+ [Tune directory speed_cyclecounter, undef=none, 1=32bits, 2=64bits)])
+ fi
+ AC_SUBST(SPEED_CYCLECOUNTER_OBJ)
+
+
+ # Calling conventions checking, selected according to ABI.
+ #
+ eval tmp=\"\$CALLING_CONVENTIONS_OBJS$abi1\"
+ test -n "$tmp" || eval tmp=\"\$CALLING_CONVENTIONS_OBJS$abi2\"
+ if test "$enable_assembly" = "yes"; then
+ CALLING_CONVENTIONS_OBJS="$tmp"
+ else
+ CALLING_CONVENTIONS_OBJS=""
+ fi
+
+ if test -n "$CALLING_CONVENTIONS_OBJS"; then
+ AC_DEFINE(HAVE_CALLING_CONVENTIONS,1,
+ [Define to 1 if tests/libtests has calling conventions checking for the CPU])
+ fi
+ AC_SUBST(CALLING_CONVENTIONS_OBJS)
+
+fi
+
+
+# If the user gave an MPN_PATH, use that verbatim, otherwise choose
+# according to the ABI and add "generic".
+#
+if test -n "$MPN_PATH"; then
+ path="$MPN_PATH"
+else
+ eval tmp=\"\$path$abi1\"
+ test -n "$tmp" || eval tmp=\"\$path$abi2\"
+ path="$tmp generic"
+fi
+
+
+# Long long limb setup for gmp.h.
+case $limb_chosen in
+longlong) DEFN_LONG_LONG_LIMB="#define _LONG_LONG_LIMB 1" ;;
+*) DEFN_LONG_LONG_LIMB="/* #undef _LONG_LONG_LIMB */" ;;
+esac
+AC_SUBST(DEFN_LONG_LONG_LIMB)
+
+
+# The C compiler and preprocessor, put into ANSI mode if possible.
+AC_PROG_CC
+AC_PROG_CC_STDC
+AC_PROG_CPP
+
+
+# The C compiler on the build system, and associated tests.
+GMP_PROG_CC_FOR_BUILD
+GMP_PROG_CPP_FOR_BUILD
+GMP_PROG_EXEEXT_FOR_BUILD
+GMP_C_FOR_BUILD_ANSI
+GMP_CHECK_LIBM_FOR_BUILD
+
+
+# How to assemble, used with CFLAGS etc, see mpn/Makeasm.am.
+# Using the compiler is a lot easier than figuring out how to invoke the
+# assembler directly.
+#
+test -n "$CCAS" || CCAS="$CC -c"
+AC_SUBST(CCAS)
+
+
+# The C++ compiler, if desired.
+want_cxx=no
+if test $enable_cxx != no; then
+ test_CXXFLAGS=${CXXFLAGS+set}
+ AC_PROG_CXX
+
+ echo "CXXFLAGS chosen by autoconf: $CXXFLAGS" >&AC_FD_CC
+ cxxflags_ac_prog_cxx=$CXXFLAGS
+ cxxflags_list=ac_prog_cxx
+
+ # If the user didn't specify $CXXFLAGS, then try $CFLAGS, with -g removed
+ # if AC_PROG_CXX thinks that doesn't work. $CFLAGS stands a good chance
+ # of working, eg. on a GNU system where CC=gcc and CXX=g++.
+ #
+ if test "$test_CXXFLAGS" != set; then
+ cxxflags_cflags=$CFLAGS
+ cxxflags_list="cflags $cxxflags_list"
+ if test "$ac_prog_cxx_g" = no; then
+ cxxflags_cflags=`echo "$cxxflags_cflags" | sed -e 's/ -g //' -e 's/^-g //' -e 's/ -g$//'`
+ fi
+ fi
+
+ # See if the C++ compiler works. If the user specified CXXFLAGS then all
+ # we're doing is checking whether AC_PROG_CXX succeeded, since it doesn't
+ # give a fatal error, just leaves CXX set to a default g++. If on the
+ # other hand the user didn't specify CXXFLAGS then we get to try here our
+ # $cxxflags_list alternatives.
+ #
+ # Automake includes $CPPFLAGS in a C++ compile, so we do the same here.
+ #
+ for cxxflags_choice in $cxxflags_list; do
+ eval CXXFLAGS=\"\$cxxflags_$cxxflags_choice\"
+ GMP_PROG_CXX_WORKS($CXX $CPPFLAGS $CXXFLAGS,
+ [want_cxx=yes
+ break])
+ done
+
+ # If --enable-cxx=yes but a C++ compiler can't be found, then abort.
+ if test $want_cxx = no && test $enable_cxx = yes; then
+ AC_MSG_ERROR([C++ compiler not available, see config.log for details])
+ fi
+fi
+
+AM_CONDITIONAL(WANT_CXX, test $want_cxx = yes)
+
+# FIXME: We're not interested in CXXCPP for ourselves, but if we don't do it
+# here then AC_PROG_LIBTOOL will AC_REQUIRE it (via _LT_AC_TAGCONFIG) and
+# hence execute it unconditionally, and that will fail if there's no C++
+# compiler (and no generic /lib/cpp).
+#
+if test $want_cxx = yes; then
+ AC_PROG_CXXCPP
+fi
+
+
+# Path setups for Cray, according to IEEE or CFP. These must come after
+# deciding the compiler.
+#
+GMP_CRAY_OPTIONS(
+ [add_path="cray/ieee"],
+ [add_path="cray/cfp"; extra_functions="mulwwc90"],
+ [add_path="cray/cfp"; extra_functions="mulwwj90"])
+
+
+if test -z "$MPN_PATH"; then
+ path="$add_path $path"
+fi
+
+# For a nail build, also look in "nails" subdirectories.
+#
+if test $GMP_NAIL_BITS != 0 && test -z "$MPN_PATH"; then
+ new_path=
+ for i in $path; do
+ case $i in
+ generic) new_path="$new_path $i" ;;
+ *) new_path="$new_path $i/nails $i" ;;
+ esac
+ done
+ path=$new_path
+fi
+
+
+# Put all directories into CPUVEC_list so as to get a full set of
+# CPUVEC_SETUP_$tmp_suffix defines into config.h, even if some of them are
+# empty because mmx and/or sse2 had to be dropped.
+#
+for i in $fat_path; do
+ GMP_FAT_SUFFIX(tmp_suffix, $i)
+ CPUVEC_list="$CPUVEC_list CPUVEC_SETUP_$tmp_suffix"
+done
+
+
+# If there's any sse2 or mmx in the path, check whether the assembler
+# supports it, and remove if not.
+#
+# We only need this in ABI=32, for ABI=64 on x86_64 we can assume a new
+# enough assembler.
+#
+case $host in
+ X86_PATTERN | X86_64_PATTERN)
+ if test "$ABI" = 32; then
+ case "$path $fat_path" in
+ *mmx*) GMP_ASM_X86_MMX( , [GMP_STRIP_PATH(*mmx*)]) ;;
+ esac
+ case "$path $fat_path" in
+ *sse2*) GMP_ASM_X86_SSE2( , [GMP_STRIP_PATH(sse2)]) ;;
+ esac
+ fi
+ ;;
+esac
+
+
+if test "$enable_assembly" = "no"; then
+ path="generic"
+ CFLAGS="$CFLAGS -DNO_ASM"
+# for abi in $abilist; do
+# eval unset "path_\$abi"
+# eval gcc_${abi}_cflags=\"\$gcc_${abi}_cflags -DNO_ASM\"
+# done
+fi
+
+
+cat >&AC_FD_CC <<EOF
+Decided:
+ABI=$ABI
+CC=$CC
+CFLAGS=$CFLAGS
+CPPFLAGS=$CPPFLAGS
+GMP_LDFLAGS=$GMP_LDFLAGS
+CXX=$CXX
+CXXFLAGS=$CXXFLAGS
+path=$path
+EOF
+echo "using ABI=\"$ABI\""
+echo " CC=\"$CC\""
+echo " CFLAGS=\"$CFLAGS\""
+echo " CPPFLAGS=\"$CPPFLAGS\""
+if test $want_cxx = yes; then
+ echo " CXX=\"$CXX\""
+ echo " CXXFLAGS=\"$CXXFLAGS\""
+fi
+echo " MPN_PATH=\"$path\""
+
+
+CL_AS_NOEXECSTACK
+
+GMP_PROG_AR
+GMP_PROG_NM
+
+case $host in
+ # FIXME: On AIX 3 and 4, $libname.a is included in libtool
+ # $library_names_spec, so libgmp.a becomes a symlink to libgmp.so, making
+ # it impossible to build shared and static libraries simultaneously.
+ # Disable shared libraries by default, but let the user override with
+ # --enable-shared --disable-static.
+ #
+ # FIXME: This $libname.a problem looks like it might apply to *-*-amigaos*
+ # and *-*-os2* too, but wait for someone to test this before worrying
+ # about it. If there is a problem then of course libtool is the right
+ # place to fix it.
+ #
+ [*-*-aix[34]*])
+ if test -z "$enable_shared"; then enable_shared=no; fi ;;
+esac
+
+
+# Configs for Windows DLLs.
+
+AC_LIBTOOL_WIN32_DLL
+
+AC_SUBST(LIBGMP_DLL,0)
+case $host in
+ *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*)
+ # By default, build only static.
+ if test -z "$enable_shared"; then
+ enable_shared=no
+ fi
+ # Don't allow both static and DLL.
+ if test "$enable_shared" != no && test "$enable_static" != no; then
+ AC_MSG_ERROR([cannot build both static and DLL, since gmp.h is different for each.
+Use "--disable-static --enable-shared" to build just a DLL.])
+ fi
+
+ # "-no-undefined" is required when building a DLL, see documentation on
+ # AC_LIBTOOL_WIN32_DLL.
+ #
+ # "-Wl,--export-all-symbols" is a bit of a hack, it gets all libgmp and
+ # libgmpxx functions and variables exported. This is what libtool did
+ # in the past, and it's convenient for us in the test programs.
+ #
+ # Maybe it'd be prudent to check for --export-all-symbols before using
+ # it, but it seems to have been in ld since at least 2000, and there's
+ # not really any alternative we want to take up at the moment.
+ #
+ # "-Wl,output-def" is used to get a .def file for use by MS lib to make
+ # a .lib import library, described in the manual. libgmp-3.dll.def
+ # corresponds to the libmp-3.dll.def generated by libtool (as a result
+ # of -export-symbols on that library).
+ #
+ # Incidentally, libtool does generate an import library libgmp.dll.a,
+ # but it's "ar" format and cannot be used by the MS linker. There
+ # doesn't seem to be any GNU tool for generating or converting to .lib.
+ #
+ # FIXME: The .def files produced by -Wl,output-def include isascii,
+ # iscsym, iscsymf and toascii, apparently because mingw ctype.h doesn't
+ # inline isascii (used in gmp). It gives an extern inline for
+ # __isascii, but for some reason not the plain isascii.
+ #
+ if test "$enable_shared" = yes; then
+ GMP_LDFLAGS="$GMP_LDFLAGS -no-undefined -Wl,--export-all-symbols"
+ LIBGMP_LDFLAGS="$LIBGMP_LDFLAGS -Wl,--output-def,.libs/libgmp-3.dll.def"
+ LIBGMPXX_LDFLAGS="$LIBGMP_LDFLAGS -Wl,--output-def,.libs/libgmpxx-3.dll.def"
+ LIBGMP_DLL=1
+ fi
+ ;;
+esac
+
+
+# Ensure that $CONFIG_SHELL is available for AC_LIBTOOL_SYS_MAX_CMD_LEN.
+# It's often set already by _LT_AC_PROG_ECHO_BACKSLASH or
+# _AS_LINENO_PREPARE, but not always.
+#
+# The symptom of CONFIG_SHELL unset is some "expr" errors during the test,
+# and an empty result. This only happens when invoked as "sh configure",
+# ie. no path, and can be seen for instance on ia64-*-hpux*.
+#
+# FIXME: Newer libtool should have it's own fix for this.
+#
+if test -z "$CONFIG_SHELL"; then
+ CONFIG_SHELL=$SHELL
+fi
+
+# Enable CXX in libtool only if we want it, and never enable GCJ, nor RC on
+# mingw and cygwin. Under --disable-cxx this avoids some error messages
+# from libtool arising from the fact we didn't actually run AC_PROG_CXX.
+# Notice that any user-supplied --with-tags setting takes precedence.
+#
+# FIXME: Is this the right way to get this effect? Very possibly not, but
+# the current _LT_AC_TAGCONFIG doesn't really suggest an alternative.
+#
+if test "${with_tags+set}" != set; then
+ if test $want_cxx = yes; then
+ with_tags=CXX
+ else
+ with_tags=
+ fi
+fi
+
+# The dead hand of AC_REQUIRE makes AC_PROG_LIBTOOL expand and execute
+# AC_PROG_F77, even when F77 is not in the selected with_tags. This is
+# probably harmless, but it's unsightly and bloats our configure, so pretend
+# AC_PROG_F77 has been expanded already.
+#
+# FIXME: Rumour has it libtool will one day provide a way for a configure.in
+# to say what it wants from among supported languages etc.
+#
+AC_PROVIDE([AC_PROG_F77])
+
+AC_PROG_LIBTOOL
+
+# Generate an error here if attempting to build both shared and static when
+# $libname.a is in $library_names_spec (as mentioned above), rather than
+# wait for ar or ld to fail.
+#
+if test "$enable_shared" = yes && test "$enable_static" = yes; then
+ case $library_names_spec in
+ *libname.a*)
+ AC_MSG_ERROR([cannot create both shared and static libraries on this system, --disable one of the two])
+ ;;
+ esac
+fi
+
+AM_CONDITIONAL(ENABLE_STATIC, test "$enable_static" = yes)
+
+
+# Many of these library and header checks are for the benefit of
+# supplementary programs. libgmp doesn't use anything too weird.
+
+AC_HEADER_STDC
+AC_HEADER_TIME
+
+# Reasons for testing:
+# float.h - not in SunOS bundled cc
+# invent.h - IRIX specific
+# langinfo.h - X/Open standard only, not in djgpp for instance
+# locale.h - old systems won't have this
+# nl_types.h - X/Open standard only, not in djgpp for instance
+# (usually langinfo.h gives nl_item etc, but not on netbsd 1.4.1)
+# sys/attributes.h - IRIX specific
+# sys/iograph.h - IRIX specific
+# sys/mman.h - not in Cray Unicos
+# sys/param.h - not in mingw
+# sys/processor.h - solaris specific, though also present in macos
+# sys/pstat.h - HPUX specific
+# sys/resource.h - not in mingw
+# sys/sysctl.h - not in mingw
+# sys/sysinfo.h - OSF specific
+# sys/syssgi.h - IRIX specific
+# sys/systemcfg.h - AIX specific
+# sys/time.h - autoconf suggests testing, don't know anywhere without it
+# sys/times.h - not in mingw
+# machine/hal_sysinfo.h - OSF specific
+#
+# inttypes.h, stdint.h, unistd.h and sys/types.h are already in the autoconf
+# default tests
+#
+AC_CHECK_HEADERS(fcntl.h float.h invent.h langinfo.h locale.h nl_types.h sys/attributes.h sys/iograph.h sys/mman.h sys/param.h sys/processor.h sys/pstat.h sys/sysinfo.h sys/syssgi.h sys/systemcfg.h sys/time.h sys/times.h)
+
+# On SunOS, sys/resource.h needs sys/time.h (for struct timeval)
+AC_CHECK_HEADERS(sys/resource.h,,,
+[#if TIME_WITH_SYS_TIME
+# include <sys/time.h>
+# include <time.h>
+#else
+# if HAVE_SYS_TIME_H
+# include <sys/time.h>
+# else
+# include <time.h>
+# endif
+#endif])
+
+# On NetBSD and OpenBSD, sys/sysctl.h needs sys/param.h for various constants
+AC_CHECK_HEADERS(sys/sysctl.h,,,
+[#if HAVE_SYS_PARAM_H
+# include <sys/param.h>
+#endif])
+
+# On OSF 4.0, <machine/hal_sysinfo.h> must have <sys/sysinfo.h> for ulong_t
+AC_CHECK_HEADERS(machine/hal_sysinfo.h,,,
+[#if HAVE_SYS_SYSINFO_H
+# include <sys/sysinfo.h>
+#endif])
+
+# Reasons for testing:
+# optarg - not declared in mingw
+# fgetc, fscanf, ungetc, vfprintf - not declared in SunOS 4
+# sys_errlist, sys_nerr - not declared in SunOS 4
+#
+# optarg should be in unistd.h and the rest in stdio.h, both of which are
+# in the autoconf default includes.
+#
+# sys_errlist and sys_nerr are supposed to be in <errno.h> on SunOS according
+# to the man page (but aren't), in glibc they're in stdio.h.
+#
+AC_CHECK_DECLS([fgetc, fscanf, optarg, ungetc, vfprintf])
+AC_CHECK_DECLS([sys_errlist, sys_nerr], , ,
+[#include <stdio.h>
+#include <errno.h>])
+
+AC_TYPE_SIGNAL
+
+# Reasons for testing:
+# intmax_t - C99
+# long double - not in the HP bundled K&R cc
+# long long - only in reasonably recent compilers
+# ptrdiff_t - seems to be everywhere, maybe don't need to check this
+# quad_t - BSD specific
+# uint_least32_t - C99
+#
+# the default includes are sufficient for all these types
+#
+AC_CHECK_TYPES([intmax_t, long double, long long, ptrdiff_t, quad_t,
+ uint_least32_t, intptr_t])
+
+AC_C_STRINGIZE
+
+# FIXME: Really want #ifndef __cplusplus around the #define volatile
+# replacement autoconf gives, since volatile is always available in C++.
+# But we don't use it in C++ currently.
+AC_C_VOLATILE
+
+AC_C_RESTRICT
+
+GMP_C_STDARG
+GMP_C_ATTRIBUTE_CONST
+GMP_C_ATTRIBUTE_MALLOC
+GMP_C_ATTRIBUTE_MODE
+GMP_C_ATTRIBUTE_NORETURN
+
+GMP_H_EXTERN_INLINE
+
+# from libtool
+AC_CHECK_LIBM
+AC_SUBST(LIBM)
+
+GMP_FUNC_ALLOCA
+GMP_OPTION_ALLOCA
+
+GMP_H_HAVE_FILE
+
+AC_C_BIGENDIAN(
+ [AC_DEFINE(HAVE_LIMB_BIG_ENDIAN, 1)
+ GMP_DEFINE_RAW("define_not_for_expansion(\`HAVE_LIMB_BIG_ENDIAN')", POST)],
+ [AC_DEFINE(HAVE_LIMB_LITTLE_ENDIAN, 1)
+ GMP_DEFINE_RAW("define_not_for_expansion(\`HAVE_LIMB_LITTLE_ENDIAN')", POST)
+ ], [:])
+AH_VERBATIM([HAVE_LIMB],
+[/* Define one of these to 1 for the endianness of `mp_limb_t'.
+ If the endianness is not a simple big or little, or you don't know what
+ it is, then leave both undefined. */
+#undef HAVE_LIMB_BIG_ENDIAN
+#undef HAVE_LIMB_LITTLE_ENDIAN])
+
+GMP_C_DOUBLE_FORMAT
+
+
+# Reasons for testing:
+# alarm - not in mingw
+# attr_get - IRIX specific
+# clock_gettime - not in glibc 2.2.4, only very recent systems
+# cputime - not in glibc
+# getsysinfo - OSF specific
+# getrusage - not in mingw
+# gettimeofday - not in mingw
+# mmap - not in mingw, djgpp
+# nl_langinfo - X/Open standard only, not in djgpp for instance
+# obstack_vprintf - glibc specific
+# processor_info - solaris specific
+# pstat_getprocessor - HPUX specific (10.x and up)
+# raise - an ANSI-ism, though probably almost universal by now
+# read_real_time - AIX specific
+# sigaction - not in mingw
+# sigaltstack - not in mingw, or old AIX (reputedly)
+# sigstack - not in mingw
+# strerror - not in SunOS
+# strnlen - glibc extension (some other systems too)
+# syssgi - IRIX specific
+# times - not in mingw
+#
+# AC_FUNC_STRNLEN is not used because we don't want the AC_LIBOBJ
+# replacement setups it gives. It detects a faulty strnlen on AIX, but
+# missing out on that test is ok since our only use of strnlen is in
+# __gmp_replacement_vsnprintf which is not required on AIX since it has a
+# vsnprintf.
+#
+AC_CHECK_FUNCS(alarm attr_get clock cputime getpagesize getrusage gettimeofday getsysinfo localeconv memset mmap mprotect nl_langinfo obstack_vprintf popen processor_info pstat_getprocessor raise read_real_time sigaction sigaltstack sigstack syssgi strchr strerror strnlen strtol strtoul sysconf sysctl sysctlbyname times)
+
+# clock_gettime is in librt on *-*-osf5.1 and on glibc, so att -lrt to
+# TUNE_LIBS if needed. On linux (tested on x86_32, 2.6.26),
+# clock_getres reports ns accuracy, while in a quick test on osf
+# clock_getres said only 1 millisecond.
+
+old_LIBS="$LIBS"
+AC_SEARCH_LIBS(clock_gettime, rt, [
+ AC_DEFINE([HAVE_CLOCK_GETTIME],1,[Define to 1 if you have the `clock_gettime' function])])
+TUNE_LIBS="$LIBS"
+LIBS="$old_LIBS"
+
+AC_SUBST(TUNE_LIBS)
+
+GMP_FUNC_VSNPRINTF
+GMP_FUNC_SSCANF_WRITABLE_INPUT
+
+# Reasons for checking:
+# pst_processor psp_iticksperclktick - not in hpux 9
+#
+AC_CHECK_MEMBER(struct pst_processor.psp_iticksperclktick,
+ [AC_DEFINE(HAVE_PSP_ITICKSPERCLKTICK, 1,
+[Define to 1 if <sys/pstat.h> `struct pst_processor' exists
+and contains `psp_iticksperclktick'.])],,
+ [#include <sys/pstat.h>])
+
+# C++ tests, when required
+#
+if test $enable_cxx = yes; then
+ AC_LANG_PUSH(C++)
+
+ # Reasons for testing:
+ # <sstream> - not in g++ 2.95.2
+ # std::locale - not in g++ 2.95.4
+ #
+ AC_CHECK_HEADERS([sstream])
+ AC_CHECK_TYPES([std::locale],,,[#include <locale>])
+
+ AC_LANG_POP(C++)
+fi
+
+
+# Pick the correct source files in $path and link them to mpn/.
+# $gmp_mpn_functions lists all functions we need.
+#
+# The rule is to find a file with the function name and a .asm, .S,
+# .s, or .c extension. Certain multi-function files with special names
+# can provide some functions too. (mpn/Makefile.am passes
+# -DOPERATION_<func> to get them to generate the right code.)
+
+# Note: $gmp_mpn_functions must have mod_1 before pre_mod_1 so the former
+# can optionally provide the latter as an extra entrypoint. Likewise
+# divrem_1 and pre_divrem_1.
+
+gmp_mpn_functions_optional="umul udiv \
+ invert_limb sqr_diagonal sqr_diag_addlsh1 \
+ mul_2 mul_3 mul_4 mul_5 mul_6 \
+ addmul_2 addmul_3 addmul_4 addmul_5 addmul_6 addmul_7 addmul_8 \
+ addlsh1_n sublsh1_n rsblsh1_n rsh1add_n rsh1sub_n \
+ addlsh2_n sublsh2_n rsblsh2_n \
+ addlsh_n sublsh_n rsblsh_n \
+ add_n_sub_n addaddmul_1msb0"
+
+gmp_mpn_functions="$extra_functions \
+ add add_1 add_n sub sub_1 sub_n addcnd_n subcnd_n neg com \
+ mul_1 addmul_1 submul_1 \
+ add_err1_n add_err2_n add_err3_n sub_err1_n sub_err2_n sub_err3_n \
+ lshift rshift dive_1 diveby3 divis divrem divrem_1 divrem_2 \
+ fib2_ui mod_1 mod_34lsub1 mode1o pre_divrem_1 pre_mod_1 dump \
+ mod_1_1 mod_1_2 mod_1_3 mod_1_4 lshiftc \
+ mul mul_fft mul_n sqr mul_basecase sqr_basecase nussbaumer_mul \
+ mulmid_basecase toom42_mulmid mulmid_n mulmid \
+ random random2 pow_1 \
+ rootrem sqrtrem get_str set_str scan0 scan1 popcount hamdist cmp \
+ perfsqr perfpow \
+ gcd_1 gcd gcdext_1 gcdext gcd_subdiv_step \
+ gcdext_lehmer \
+ div_q tdiv_qr jacbase jacobi_2 jacobi get_d \
+ matrix22_mul matrix22_mul1_inverse_vector \
+ hgcd_matrix hgcd2 hgcd_step hgcd_reduce hgcd hgcd_appr \
+ hgcd2_jacobi hgcd_jacobi \
+ mullo_n mullo_basecase \
+ toom22_mul toom32_mul toom42_mul toom52_mul toom62_mul \
+ toom33_mul toom43_mul toom53_mul toom54_mul toom63_mul \
+ toom44_mul \
+ toom6h_mul toom6_sqr toom8h_mul toom8_sqr \
+ toom_couple_handling \
+ toom2_sqr toom3_sqr toom4_sqr \
+ toom_eval_dgr3_pm1 toom_eval_dgr3_pm2 \
+ toom_eval_pm1 toom_eval_pm2 toom_eval_pm2exp toom_eval_pm2rexp \
+ toom_interpolate_5pts toom_interpolate_6pts toom_interpolate_7pts \
+ toom_interpolate_8pts toom_interpolate_12pts toom_interpolate_16pts \
+ invertappr invert binvert mulmod_bnm1 sqrmod_bnm1 \
+ div_qr_2 div_qr_2n_pi1 div_qr_2u_pi1 \
+ sbpi1_div_q sbpi1_div_qr sbpi1_divappr_q \
+ dcpi1_div_q dcpi1_div_qr dcpi1_divappr_q \
+ mu_div_qr mu_divappr_q mu_div_q \
+ bdiv_q_1 \
+ sbpi1_bdiv_q sbpi1_bdiv_qr \
+ dcpi1_bdiv_q dcpi1_bdiv_qr \
+ mu_bdiv_q mu_bdiv_qr \
+ bdiv_q bdiv_qr broot brootinv bsqrt bsqrtinv \
+ divexact bdiv_dbm1c redc_1 redc_2 redc_n powm powlo powm_sec \
+ sb_div_qr_sec sb_div_r_sec sbpi1_div_qr_sec sbpi1_div_r_sec \
+ trialdiv remove \
+ and_n andn_n nand_n ior_n iorn_n nior_n xor_n xnor_n \
+ copyi copyd zero tabselect \
+ comb_tables \
+ $gmp_mpn_functions_optional"
+
+define(GMP_MULFUNC_CHOICES,
+[# functions that can be provided by multi-function files
+tmp_mulfunc=
+case $tmp_fn in
+ add_n|sub_n) tmp_mulfunc="aors_n" ;;
+ add_err1_n|sub_err1_n)
+ tmp_mulfunc="aors_err1_n" ;;
+ add_err2_n|sub_err2_n)
+ tmp_mulfunc="aors_err2_n" ;;
+ add_err3_n|sub_err3_n)
+ tmp_mulfunc="aors_err3_n" ;;
+ addcnd_n|subcnd_n) tmp_mulfunc="aorscnd_n" ;;
+ addmul_1|submul_1) tmp_mulfunc="aorsmul_1" ;;
+ popcount|hamdist) tmp_mulfunc="popham" ;;
+ and_n|andn_n|nand_n | ior_n|iorn_n|nior_n | xor_n|xnor_n)
+ tmp_mulfunc="logops_n" ;;
+ lshift|rshift) tmp_mulfunc="lorrshift";;
+ addlsh1_n)
+ tmp_mulfunc="aorslsh1_n aorrlsh1_n";;
+ sublsh1_n)
+ tmp_mulfunc="aorslsh1_n sorrlsh1_n";;
+ rsblsh1_n)
+ tmp_mulfunc="aorrlsh1_n sorrlsh1_n";;
+ addlsh2_n)
+ tmp_mulfunc="aorslsh2_n aorrlsh2_n";;
+ sublsh2_n)
+ tmp_mulfunc="aorslsh2_n sorrlsh2_n";;
+ rsblsh2_n)
+ tmp_mulfunc="aorrlsh2_n sorrlsh2_n";;
+ addlsh_n)
+ tmp_mulfunc="aorslsh_n aorrlsh_n";;
+ sublsh_n)
+ tmp_mulfunc="aorslsh_n sorrlsh_n";;
+ rsblsh_n)
+ tmp_mulfunc="aorrlsh_n sorrlsh_n";;
+ rsh1add_n|rsh1sub_n)
+ tmp_mulfunc="rsh1aors_n";;
+ sb_div_qr_sec|sb_div_r_sec)
+ tmp_mulfunc="sb_div_sec";;
+ sbpi1_div_qr_sec|sbpi1_div_r_sec)
+ tmp_mulfunc="sbpi1_div_sec";;
+esac
+])
+
+# the list of all object files used by mpn/Makefile.in and the
+# top-level Makefile.in, respectively
+mpn_objects=
+mpn_objs_in_libgmp=
+
+# links from the sources, to be removed by "make distclean"
+gmp_srclinks=
+
+
+# mpn_relative_top_srcdir is $top_srcdir, but for use from within the mpn
+# build directory. If $srcdir is relative then we use a relative path too,
+# so the two trees can be moved together.
+case $srcdir in
+ [[\\/]* | ?:[\\/]*]) # absolute, as per autoconf
+ mpn_relative_top_srcdir=$srcdir ;;
+ *) # relative
+ mpn_relative_top_srcdir=../$srcdir ;;
+esac
+
+
+define(MPN_SUFFIXES,[asm S s c])
+
+dnl Usage: GMP_FILE_TO_FUNCTION_BASE(func,file)
+dnl
+dnl Set $func to the function base name for $file, eg. dive_1 gives
+dnl divexact_1.
+dnl
+define(GMP_FILE_TO_FUNCTION,
+[case $$2 in
+ dive_1) $1=divexact_1 ;;
+ diveby3) $1=divexact_by3c ;;
+ pre_divrem_1) $1=preinv_divrem_1 ;;
+ mode1o) $1=modexact_1c_odd ;;
+ pre_mod_1) $1=preinv_mod_1 ;;
+ mod_1_1) $1=mod_1_1p ;;
+ mod_1_1_cps) $1=mod_1_1p_cps ;;
+ mod_1_2) $1=mod_1s_2p ;;
+ mod_1_2_cps) $1=mod_1s_2p_cps ;;
+ mod_1_3) $1=mod_1s_3p ;;
+ mod_1_3_cps) $1=mod_1s_3p_cps ;;
+ mod_1_4) $1=mod_1s_4p ;;
+ mod_1_4_cps) $1=mod_1s_4p_cps ;;
+ *) $1=$$2 ;;
+esac
+])
+
+# Fat binary setups.
+#
+# We proceed through each $fat_path directory, and look for $fat_function
+# routines there. Those found are incorporated in the build by generating a
+# little mpn/<foo>.asm or mpn/<foo>.c file in the build directory, with
+# suitable function renaming, and adding that to $mpn_objects (the same as a
+# normal mpn file).
+#
+# fat.h is generated with macros to let internal calls to each $fat_function
+# go directly through __gmpn_cpuvec, plus macros and declarations helping to
+# setup that structure, on a per-directory basis ready for
+# mpn/<cpu>/fat/fat.c.
+#
+# fat.h includes thresholds listed in $fat_thresholds, extracted from
+# gmp-mparam.h in each directory. An overall maximum for each threshold is
+# established, for use in making fixed size arrays of temporary space.
+# (Eg. MUL_TOOM33_THRESHOLD_LIMIT used by mpn/generic/mul.c.)
+#
+# It'd be possible to do some of this manually, but when there's more than a
+# few functions and a few directories it becomes very tedious, and very
+# prone to having some routine accidentally omitted. On that basis it seems
+# best to automate as much as possible, even if the code to do so is a bit
+# ugly.
+#
+
+if test -n "$fat_path"; then
+ # Usually the mpn build directory is created with mpn/Makefile
+ # instantiation, but we want to write to it sooner.
+ mkdir mpn 2>/dev/null
+
+ echo "/* fat.h - setups for fat binaries." >fat.h
+ echo " Generated by configure - DO NOT EDIT. */" >>fat.h
+
+ AC_DEFINE(WANT_FAT_BINARY, 1, [Define to 1 when building a fat binary.])
+ GMP_DEFINE(WANT_FAT_BINARY, yes)
+
+ # Don't want normal copies of fat functions
+ for tmp_fn in $fat_functions; do
+ GMP_REMOVE_FROM_LIST(gmp_mpn_functions, $tmp_fn)
+ GMP_REMOVE_FROM_LIST(gmp_mpn_functions_optional, $tmp_fn)
+ done
+
+ for tmp_fn in $fat_functions; do
+ GMP_FILE_TO_FUNCTION(tmp_fbase,tmp_fn)
+ echo "
+#ifndef OPERATION_$tmp_fn
+#undef mpn_$tmp_fbase
+#define mpn_$tmp_fbase (*__gmpn_cpuvec.$tmp_fbase)
+#endif
+DECL_$tmp_fbase (__MPN(${tmp_fbase}_init));" >>fat.h
+ # encourage various macros to use fat functions
+ AC_DEFINE_UNQUOTED(HAVE_NATIVE_mpn_$tmp_fbase)
+ done
+
+ echo "" >>fat.h
+ echo "/* variable thresholds */" >>fat.h
+ for tmp_tn in $fat_thresholds; do
+ echo "#undef $tmp_tn" >>fat.h
+ echo "#define $tmp_tn CPUVEC_THRESHOLD (`echo $tmp_tn | tr [A-Z] [a-z]`)" >>fat.h
+ done
+
+ echo "
+/* Copy all fields into __gmpn_cpuvec.
+ memcpy is not used because it might operate byte-wise (depending on its
+ implementation), and we need the function pointer writes to be atomic.
+ "volatile" discourages the compiler from trying to optimize this. */
+#define CPUVEC_INSTALL(vec) \\
+ do { \\
+ volatile struct cpuvec_t *p = &__gmpn_cpuvec; \\" >>fat.h
+ for tmp_fn in $fat_functions; do
+ GMP_FILE_TO_FUNCTION(tmp_fbase,tmp_fn)
+ echo " p->$tmp_fbase = vec.$tmp_fbase; \\" >>fat.h
+ done
+ for tmp_tn in $fat_thresholds; do
+ tmp_field_name=`echo $tmp_tn | tr [[A-Z]] [[a-z]]`
+ echo " p->$tmp_field_name = vec.$tmp_field_name; \\" >>fat.h
+ done
+ echo " } while (0)" >>fat.h
+
+ echo "
+/* A helper to check all fields are filled. */
+#define ASSERT_CPUVEC(vec) \\
+ do { \\" >>fat.h
+ for tmp_fn in $fat_functions; do
+ GMP_FILE_TO_FUNCTION(tmp_fbase,tmp_fn)
+ echo " ASSERT (vec.$tmp_fbase != NULL); \\" >>fat.h
+ done
+ for tmp_tn in $fat_thresholds; do
+ tmp_field_name=`echo $tmp_tn | tr [[A-Z]] [[a-z]]`
+ echo " ASSERT (vec.$tmp_field_name != 0); \\" >>fat.h
+ done
+ echo " } while (0)" >>fat.h
+
+ echo "
+/* Call ITERATE(field) for each fat threshold field. */
+#define ITERATE_FAT_THRESHOLDS() \\
+ do { \\" >>fat.h
+ for tmp_tn in $fat_thresholds; do
+ tmp_field_name=`echo $tmp_tn | tr [[A-Z]] [[a-z]]`
+ echo " ITERATE ($tmp_tn, $tmp_field_name); \\" >>fat.h
+ done
+ echo " } while (0)" >>fat.h
+
+ for tmp_dir in $fat_path; do
+ CPUVEC_SETUP=
+ THRESH_ASM_SETUP=
+ echo "" >>fat.h
+ GMP_FAT_SUFFIX(tmp_suffix, $tmp_dir)
+
+ # In order to keep names unique on a DOS 8.3 filesystem, use a prefix
+ # (rather than a suffix) for the generated file names, and abbreviate.
+ case $tmp_suffix in
+ pentium) tmp_prefix=p ;;
+ pentium_mmx) tmp_prefix=pm ;;
+ p6_mmx) tmp_prefix=p2 ;;
+ p6_p3mmx) tmp_prefix=p3 ;;
+ pentium4) tmp_prefix=p4 ;;
+ pentium4_mmx) tmp_prefix=p4m ;;
+ pentium4_sse2) tmp_prefix=p4s ;;
+ k6_mmx) tmp_prefix=k6m ;;
+ k6_k62mmx) tmp_prefix=k62 ;;
+ k7_mmx) tmp_prefix=k7m ;;
+ *) tmp_prefix=$tmp_suffix ;;
+ esac
+
+ # Extract desired thresholds from gmp-mparam.h file in this directory,
+ # if present.
+ tmp_mparam=$srcdir/mpn/$tmp_dir/gmp-mparam.h
+ if test -f $tmp_mparam; then
+ for tmp_tn in $fat_thresholds; do
+ tmp_thresh=`sed -n "s/^#define $tmp_tn[ ]*\\([0-9][0-9]*\\).*$/\\1/p" $tmp_mparam`
+ if test -n "$tmp_thresh"; then
+ THRESH_ASM_SETUP=["${THRESH_ASM_SETUP}define($tmp_tn,$tmp_thresh)
+"]
+ CPUVEC_SETUP="$CPUVEC_SETUP decided_cpuvec.`echo $tmp_tn | tr [[A-Z]] [[a-z]]` = $tmp_thresh; \\
+"
+ eval tmp_limit=\$${tmp_tn}_LIMIT
+ if test -z "$tmp_limit"; then
+ tmp_limit=0
+ fi
+ if test $tmp_thresh -gt $tmp_limit; then
+ eval ${tmp_tn}_LIMIT=$tmp_thresh
+ fi
+ fi
+ done
+ fi
+
+ for tmp_fn in $fat_functions; do
+ GMP_MULFUNC_CHOICES
+
+ for tmp_base in $tmp_fn $tmp_mulfunc; do
+ for tmp_ext in MPN_SUFFIXES; do
+ tmp_file=$srcdir/mpn/$tmp_dir/$tmp_base.$tmp_ext
+ if test -f $tmp_file; then
+
+ # If the host uses a non-standard ABI, check if tmp_file supports it
+ #
+ if test -n "$GMP_NONSTD_ABI" && test $tmp_ext != "c"; then
+ abi=[`sed -n 's/^[ ]*ABI_SUPPORT(\(.*\))/\1/p' $tmp_file `]
+ if echo "$abi" | grep -q "\\b${GMP_NONSTD_ABI}\\b"; then
+ true
+ else
+ continue
+ fi
+ fi
+
+ mpn_objects="$mpn_objects ${tmp_prefix}_$tmp_fn.lo"
+ mpn_objs_in_libgmp="$mpn_objs_in_libgmp mpn/${tmp_prefix}_$tmp_fn.lo"
+
+ GMP_FILE_TO_FUNCTION(tmp_fbase,tmp_fn)
+
+ # carry-in variant, eg. divrem_1c or modexact_1c_odd
+ case $tmp_fbase in
+ *_1*) tmp_fbasec=`echo $tmp_fbase | sed 's/_1/_1c/'` ;;
+ *) tmp_fbasec=${tmp_fbase}c ;;
+ esac
+
+ # Create a little file doing an include from srcdir. The
+ # OPERATION and renamings aren't all needed all the time, but
+ # they don't hurt if unused.
+ #
+ # FIXME: Should generate these via config.status commands.
+ # Would need them all in one AC_CONFIG_COMMANDS though, since
+ # that macro doesn't accept a set of separate commands generated
+ # by shell code.
+ #
+ case $tmp_ext in
+ asm)
+ # hide the d-n-l from autoconf's error checking
+ tmp_d_n_l=d""nl
+ echo ["$tmp_d_n_l mpn_$tmp_fbase - from $tmp_dir directory for fat binary.
+$tmp_d_n_l Generated by configure - DO NOT EDIT.
+
+define(OPERATION_$tmp_fn)
+define(__gmpn_$tmp_fbase, __gmpn_${tmp_fbase}_$tmp_suffix)
+define(__gmpn_$tmp_fbasec,__gmpn_${tmp_fbasec}_${tmp_suffix})
+define(__gmpn_preinv_${tmp_fbase},__gmpn_preinv_${tmp_fbase}_${tmp_suffix})
+define(__gmpn_${tmp_fbase}_cps,__gmpn_${tmp_fbase}_cps_${tmp_suffix})
+
+$tmp_d_n_l For k6 and k7 gcd_1 calling their corresponding mpn_modexact_1_odd
+ifdef(\`__gmpn_modexact_1_odd',,
+\`define(__gmpn_modexact_1_odd,__gmpn_modexact_1_odd_${tmp_suffix})')
+
+$THRESH_ASM_SETUP
+include][($mpn_relative_top_srcdir/mpn/$tmp_dir/$tmp_base.asm)
+"] >mpn/${tmp_prefix}_$tmp_fn.asm
+ ;;
+ c)
+ echo ["/* mpn_$tmp_fbase - from $tmp_dir directory for fat binary.
+ Generated by configure - DO NOT EDIT. */
+
+#define OPERATION_$tmp_fn 1
+#define __gmpn_$tmp_fbase __gmpn_${tmp_fbase}_$tmp_suffix
+#define __gmpn_$tmp_fbasec __gmpn_${tmp_fbasec}_${tmp_suffix}
+#define __gmpn_preinv_${tmp_fbase} __gmpn_preinv_${tmp_fbase}_${tmp_suffix}
+#define __gmpn_${tmp_fbase}_cps __gmpn_${tmp_fbase}_cps_${tmp_suffix}
+
+#include \"$mpn_relative_top_srcdir/mpn/$tmp_dir/$tmp_base.c\"
+"] >mpn/${tmp_prefix}_$tmp_fn.c
+ ;;
+ esac
+
+ # Prototype, and append to CPUVEC_SETUP for this directory.
+ echo "DECL_$tmp_fbase (__gmpn_${tmp_fbase}_$tmp_suffix);" >>fat.h
+ CPUVEC_SETUP="$CPUVEC_SETUP decided_cpuvec.$tmp_fbase = __gmpn_${tmp_fbase}_${tmp_suffix}; \\
+"
+ # Ditto for any preinv variant (preinv_divrem_1, preinv_mod_1).
+ if grep "^PROLOGUE(mpn_preinv_$tmp_fn)" $tmp_file >/dev/null; then
+ echo "DECL_preinv_$tmp_fbase (__gmpn_preinv_${tmp_fbase}_$tmp_suffix);" >>fat.h
+ CPUVEC_SETUP="$CPUVEC_SETUP decided_cpuvec.preinv_$tmp_fbase = __gmpn_preinv_${tmp_fbase}_${tmp_suffix}; \\
+"
+ fi
+
+ # Ditto for any mod_1...cps variant
+ if grep "^PROLOGUE(mpn_${tmp_fbase}_cps)" $tmp_file >/dev/null; then
+ echo "DECL_${tmp_fbase}_cps (__gmpn_${tmp_fbase}_cps_$tmp_suffix);" >>fat.h
+ CPUVEC_SETUP="$CPUVEC_SETUP decided_cpuvec.${tmp_fbase}_cps = __gmpn_${tmp_fbase}_cps_${tmp_suffix}; \\
+"
+ fi
+ fi
+ done
+ done
+ done
+
+ # Emit CPUVEC_SETUP for this directory
+ echo "" >>fat.h
+ echo "#define CPUVEC_SETUP_$tmp_suffix \\" >>fat.h
+ echo " do { \\" >>fat.h
+ echo "$CPUVEC_SETUP } while (0)" >>fat.h
+ done
+
+ # Emit threshold limits
+ echo "" >>fat.h
+ for tmp_tn in $fat_thresholds; do
+ eval tmp_limit=\$${tmp_tn}_LIMIT
+ echo "#define ${tmp_tn}_LIMIT $tmp_limit" >>fat.h
+ done
+fi
+
+
+# Normal binary setups.
+#
+
+for tmp_ext in MPN_SUFFIXES; do
+ eval found_$tmp_ext=no
+done
+
+for tmp_fn in $gmp_mpn_functions; do
+ for tmp_ext in MPN_SUFFIXES; do
+ test "$no_create" = yes || rm -f mpn/$tmp_fn.$tmp_ext
+ done
+
+ # mpn_preinv_divrem_1 might have been provided by divrem_1.asm, likewise
+ # mpn_preinv_mod_1 by mod_1.asm.
+ case $tmp_fn in
+ pre_divrem_1)
+ if test "$HAVE_NATIVE_mpn_preinv_divrem_1" = yes; then continue; fi ;;
+ pre_mod_1)
+ if test "$HAVE_NATIVE_mpn_preinv_mod_1" = yes; then continue; fi ;;
+ esac
+
+ GMP_MULFUNC_CHOICES
+
+ found=no
+ for tmp_dir in $path; do
+ for tmp_base in $tmp_fn $tmp_mulfunc; do
+ for tmp_ext in MPN_SUFFIXES; do
+ tmp_file=$srcdir/mpn/$tmp_dir/$tmp_base.$tmp_ext
+ if test -f $tmp_file; then
+
+ # For a nails build, check if the file supports our nail bits.
+ # Generic code always supports all nails.
+ #
+ # FIXME: When a multi-function file is selected to provide one of
+ # the nails-neutral routines, like logops_n for and_n, the
+ # PROLOGUE grepping will create HAVE_NATIVE_mpn_<foo> defines for
+ # all functions in that file, even if they haven't all been
+ # nailified. Not sure what to do about this, it's only really a
+ # problem for logops_n, and it's not too terrible to insist those
+ # get nailified always.
+ #
+ if test $GMP_NAIL_BITS != 0 && test $tmp_dir != generic; then
+ case $tmp_fn in
+ and_n | ior_n | xor_n | andn_n | \
+ copyi | copyd | \
+ popcount | hamdist | \
+ udiv | udiv_w_sdiv | umul | \
+ cntlz | invert_limb)
+ # these operations are either unaffected by nails or defined
+ # to operate on full limbs
+ ;;
+ *)
+ nails=[`sed -n 's/^[ ]*NAILS_SUPPORT(\(.*\))/\1/p' $tmp_file `]
+ for n in $nails; do
+ case $n in
+ *-*)
+ n_start=`echo "$n" | sed -n 's/\(.*\)-.*/\1/p'`
+ n_end=`echo "$n" | sed -n 's/.*-\(.*\)/\1/p'`
+ ;;
+ *)
+ n_start=$n
+ n_end=$n
+ ;;
+ esac
+ if test $GMP_NAIL_BITS -ge $n_start && test $GMP_NAIL_BITS -le $n_end; then
+ found=yes
+ break
+ fi
+ done
+ if test $found != yes; then
+ continue
+ fi
+ ;;
+ esac
+ fi
+
+ # If the host uses a non-standard ABI, check if tmp_file supports it
+ #
+ if test -n "$GMP_NONSTD_ABI" && test $tmp_ext != "c"; then
+ abi=[`sed -n 's/^[ ]*ABI_SUPPORT(\(.*\))/\1/p' $tmp_file `]
+ if echo "$abi" | grep -q "\\b${GMP_NONSTD_ABI}\\b"; then
+ true
+ else
+ continue
+ fi
+ fi
+
+ found=yes
+ eval found_$tmp_ext=yes
+
+ if test $tmp_ext = c; then
+ tmp_u='$U'
+ else
+ tmp_u=
+ fi
+
+ mpn_objects="$mpn_objects $tmp_fn$tmp_u.lo"
+ mpn_objs_in_libgmp="$mpn_objs_in_libgmp mpn/$tmp_fn$tmp_u.lo"
+ AC_CONFIG_LINKS(mpn/$tmp_fn.$tmp_ext:mpn/$tmp_dir/$tmp_base.$tmp_ext)
+ gmp_srclinks="$gmp_srclinks mpn/$tmp_fn.$tmp_ext"
+
+ # Duplicate AC_DEFINEs are harmless, so it doesn't matter
+ # that multi-function files get grepped here repeatedly.
+ # The PROLOGUE pattern excludes the optional second parameter.
+ gmp_ep=[`
+ sed -n 's/^[ ]*MULFUNC_PROLOGUE(\(.*\))/\1/p' $tmp_file ;
+ sed -n 's/^[ ]*PROLOGUE(\([^,]*\).*)/\1/p' $tmp_file
+ `]
+ for gmp_tmp in $gmp_ep; do
+ AC_DEFINE_UNQUOTED(HAVE_NATIVE_$gmp_tmp)
+ eval HAVE_NATIVE_$gmp_tmp=yes
+ done
+
+ case $tmp_fn in
+ sqr_basecase) sqr_basecase_source=$tmp_file ;;
+ esac
+
+ break
+ fi
+ done
+ if test $found = yes; then break ; fi
+ done
+ if test $found = yes; then break ; fi
+ done
+
+ if test $found = no; then
+ for tmp_optional in $gmp_mpn_functions_optional; do
+ if test $tmp_optional = $tmp_fn; then
+ found=yes
+ fi
+ done
+ if test $found = no; then
+ AC_MSG_ERROR([no version of $tmp_fn found in path: $path])
+ fi
+ fi
+done
+
+# All cycle counters are .asm files currently
+if test -n "$SPEED_CYCLECOUNTER_OBJ"; then
+ found_asm=yes
+fi
+
+dnl The following list only needs to have templates for those defines which
+dnl are going to be tested by the code, there's no need to have every
+dnl possible mpn routine.
+
+AH_VERBATIM([HAVE_NATIVE],
+[/* Define to 1 each of the following for which a native (ie. CPU specific)
+ implementation of the corresponding routine exists. */
+#undef HAVE_NATIVE_mpn_add_n
+#undef HAVE_NATIVE_mpn_add_n_sub_n
+#undef HAVE_NATIVE_mpn_add_nc
+#undef HAVE_NATIVE_mpn_addaddmul_1msb0
+#undef HAVE_NATIVE_mpn_addcnd_n
+#undef HAVE_NATIVE_mpn_addlsh1_n
+#undef HAVE_NATIVE_mpn_addlsh2_n
+#undef HAVE_NATIVE_mpn_addlsh_n
+#undef HAVE_NATIVE_mpn_addlsh1_nc
+#undef HAVE_NATIVE_mpn_addlsh2_nc
+#undef HAVE_NATIVE_mpn_addlsh_nc
+#undef HAVE_NATIVE_mpn_addlsh1_n_ip1
+#undef HAVE_NATIVE_mpn_addlsh2_n_ip1
+#undef HAVE_NATIVE_mpn_addlsh_n_ip1
+#undef HAVE_NATIVE_mpn_addlsh1_nc_ip1
+#undef HAVE_NATIVE_mpn_addlsh2_nc_ip1
+#undef HAVE_NATIVE_mpn_addlsh_nc_ip1
+#undef HAVE_NATIVE_mpn_addlsh1_n_ip2
+#undef HAVE_NATIVE_mpn_addlsh2_n_ip2
+#undef HAVE_NATIVE_mpn_addlsh_n_ip2
+#undef HAVE_NATIVE_mpn_addlsh1_nc_ip2
+#undef HAVE_NATIVE_mpn_addlsh2_nc_ip2
+#undef HAVE_NATIVE_mpn_addlsh_nc_ip2
+#undef HAVE_NATIVE_mpn_addmul_1c
+#undef HAVE_NATIVE_mpn_addmul_2
+#undef HAVE_NATIVE_mpn_addmul_3
+#undef HAVE_NATIVE_mpn_addmul_4
+#undef HAVE_NATIVE_mpn_addmul_5
+#undef HAVE_NATIVE_mpn_addmul_6
+#undef HAVE_NATIVE_mpn_addmul_7
+#undef HAVE_NATIVE_mpn_addmul_8
+#undef HAVE_NATIVE_mpn_addmul_2s
+#undef HAVE_NATIVE_mpn_and_n
+#undef HAVE_NATIVE_mpn_andn_n
+#undef HAVE_NATIVE_mpn_bdiv_dbm1c
+#undef HAVE_NATIVE_mpn_bdiv_q_1
+#undef HAVE_NATIVE_mpn_pi1_bdiv_q_1
+#undef HAVE_NATIVE_mpn_com
+#undef HAVE_NATIVE_mpn_copyd
+#undef HAVE_NATIVE_mpn_copyi
+#undef HAVE_NATIVE_mpn_div_qr_2
+#undef HAVE_NATIVE_mpn_divexact_1
+#undef HAVE_NATIVE_mpn_divexact_by3c
+#undef HAVE_NATIVE_mpn_divrem_1
+#undef HAVE_NATIVE_mpn_divrem_1c
+#undef HAVE_NATIVE_mpn_divrem_2
+#undef HAVE_NATIVE_mpn_gcd_1
+#undef HAVE_NATIVE_mpn_hamdist
+#undef HAVE_NATIVE_mpn_invert_limb
+#undef HAVE_NATIVE_mpn_ior_n
+#undef HAVE_NATIVE_mpn_iorn_n
+#undef HAVE_NATIVE_mpn_lshift
+#undef HAVE_NATIVE_mpn_lshiftc
+#undef HAVE_NATIVE_mpn_lshsub_n
+#undef HAVE_NATIVE_mpn_mod_1
+#undef HAVE_NATIVE_mpn_mod_1_1p
+#undef HAVE_NATIVE_mpn_mod_1c
+#undef HAVE_NATIVE_mpn_mod_1s_2p
+#undef HAVE_NATIVE_mpn_mod_1s_4p
+#undef HAVE_NATIVE_mpn_mod_34lsub1
+#undef HAVE_NATIVE_mpn_modexact_1_odd
+#undef HAVE_NATIVE_mpn_modexact_1c_odd
+#undef HAVE_NATIVE_mpn_mul_1
+#undef HAVE_NATIVE_mpn_mul_1c
+#undef HAVE_NATIVE_mpn_mul_2
+#undef HAVE_NATIVE_mpn_mul_3
+#undef HAVE_NATIVE_mpn_mul_4
+#undef HAVE_NATIVE_mpn_mul_5
+#undef HAVE_NATIVE_mpn_mul_6
+#undef HAVE_NATIVE_mpn_mul_basecase
+#undef HAVE_NATIVE_mpn_nand_n
+#undef HAVE_NATIVE_mpn_nior_n
+#undef HAVE_NATIVE_mpn_popcount
+#undef HAVE_NATIVE_mpn_preinv_divrem_1
+#undef HAVE_NATIVE_mpn_preinv_mod_1
+#undef HAVE_NATIVE_mpn_redc_1
+#undef HAVE_NATIVE_mpn_redc_2
+#undef HAVE_NATIVE_mpn_rsblsh1_n
+#undef HAVE_NATIVE_mpn_rsblsh2_n
+#undef HAVE_NATIVE_mpn_rsblsh_n
+#undef HAVE_NATIVE_mpn_rsblsh1_nc
+#undef HAVE_NATIVE_mpn_rsblsh2_nc
+#undef HAVE_NATIVE_mpn_rsblsh_nc
+#undef HAVE_NATIVE_mpn_rsh1add_n
+#undef HAVE_NATIVE_mpn_rsh1add_nc
+#undef HAVE_NATIVE_mpn_rsh1sub_n
+#undef HAVE_NATIVE_mpn_rsh1sub_nc
+#undef HAVE_NATIVE_mpn_rshift
+#undef HAVE_NATIVE_mpn_sqr_basecase
+#undef HAVE_NATIVE_mpn_sqr_diagonal
+#undef HAVE_NATIVE_mpn_sqr_diag_addlsh1
+#undef HAVE_NATIVE_mpn_sub_n
+#undef HAVE_NATIVE_mpn_sub_nc
+#undef HAVE_NATIVE_mpn_subcnd_n
+#undef HAVE_NATIVE_mpn_sublsh1_n
+#undef HAVE_NATIVE_mpn_sublsh2_n
+#undef HAVE_NATIVE_mpn_sublsh_n
+#undef HAVE_NATIVE_mpn_sublsh1_nc
+#undef HAVE_NATIVE_mpn_sublsh2_nc
+#undef HAVE_NATIVE_mpn_sublsh_nc
+#undef HAVE_NATIVE_mpn_sublsh1_n_ip1
+#undef HAVE_NATIVE_mpn_sublsh2_n_ip1
+#undef HAVE_NATIVE_mpn_sublsh_n_ip1
+#undef HAVE_NATIVE_mpn_sublsh1_nc_ip1
+#undef HAVE_NATIVE_mpn_sublsh2_nc_ip1
+#undef HAVE_NATIVE_mpn_sublsh_nc_ip1
+#undef HAVE_NATIVE_mpn_submul_1c
+#undef HAVE_NATIVE_mpn_tabselect
+#undef HAVE_NATIVE_mpn_udiv_qrnnd
+#undef HAVE_NATIVE_mpn_udiv_qrnnd_r
+#undef HAVE_NATIVE_mpn_umul_ppmm
+#undef HAVE_NATIVE_mpn_umul_ppmm_r
+#undef HAVE_NATIVE_mpn_xor_n
+#undef HAVE_NATIVE_mpn_xnor_n])
+
+
+# Don't demand an m4 unless it's actually needed.
+if test $found_asm = yes; then
+ GMP_PROG_M4
+ GMP_M4_M4WRAP_SPURIOUS
+# else
+# It's unclear why this m4-not-needed stuff was ever done.
+# if test -z "$M4" ; then
+# M4=m4-not-needed
+# fi
+fi
+
+# Only do the GMP_ASM checks if there's a .S or .asm wanting them.
+if test $found_asm = no && test $found_S = no; then
+ gmp_asm_syntax_testing=no
+fi
+
+if test "$gmp_asm_syntax_testing" != no; then
+ GMP_ASM_TEXT
+ GMP_ASM_DATA
+ GMP_ASM_LABEL_SUFFIX
+ GMP_ASM_GLOBL
+ GMP_ASM_GLOBL_ATTR
+ GMP_ASM_UNDERSCORE
+ GMP_ASM_RODATA
+ GMP_ASM_TYPE
+ GMP_ASM_SIZE
+ GMP_ASM_LSYM_PREFIX
+ GMP_ASM_W32
+ GMP_ASM_ALIGN_LOG
+
+ case $host in
+ hppa*-*-*)
+ # for both pa32 and pa64
+ GMP_INCLUDE_MPN(pa32/pa-defs.m4)
+ ;;
+ IA64_PATTERN)
+ GMP_ASM_IA64_ALIGN_OK
+ ;;
+ M68K_PATTERN)
+ GMP_ASM_M68K_INSTRUCTION
+ GMP_ASM_M68K_ADDRESSING
+ GMP_ASM_M68K_BRANCHES
+ ;;
+ [powerpc*-*-* | power[3-9]-*-*])
+ GMP_ASM_POWERPC_PIC_ALWAYS
+ GMP_ASM_POWERPC_R_REGISTERS
+ GMP_INCLUDE_MPN(powerpc32/powerpc-defs.m4)
+ case $host in
+ *-*-aix*)
+ case $ABI in
+ mode64) GMP_INCLUDE_MPN(powerpc64/aix.m4) ;;
+ *) GMP_INCLUDE_MPN(powerpc32/aix.m4) ;;
+ esac
+ ;;
+ *-*-linux* | *-*-*bsd*)
+ case $ABI in
+ mode64) GMP_INCLUDE_MPN(powerpc64/elf.m4) ;;
+ mode32 | 32) GMP_INCLUDE_MPN(powerpc32/elf.m4) ;;
+ esac
+ ;;
+ *-*-darwin*)
+ case $ABI in
+ mode64) GMP_INCLUDE_MPN(powerpc64/darwin.m4) ;;
+ mode32 | 32) GMP_INCLUDE_MPN(powerpc32/darwin.m4) ;;
+ esac
+ ;;
+ *)
+ # Assume unrecognized operating system is the powerpc eABI
+ GMP_INCLUDE_MPN(powerpc32/eabi.m4)
+ ;;
+ esac
+ ;;
+ power*-*-aix*)
+ GMP_INCLUDE_MPN(powerpc32/aix.m4)
+ ;;
+ *sparc*-*-*)
+ case $ABI in
+ 64)
+ GMP_ASM_SPARC_REGISTER
+ ;;
+ esac
+ ;;
+ X86_PATTERN | X86_64_PATTERN)
+ GMP_ASM_ALIGN_FILL_0x90
+ case $ABI in
+ 32)
+ GMP_INCLUDE_MPN(x86/x86-defs.m4)
+ AC_DEFINE(HAVE_HOST_CPU_FAMILY_x86)
+ GMP_ASM_COFF_TYPE
+ GMP_ASM_X86_GOT_UNDERSCORE
+ GMP_ASM_X86_SHLDL_CL
+ case $enable_profiling in
+ prof | gprof) GMP_ASM_X86_MCOUNT ;;
+ esac
+ case $host in
+ *-*-darwin*)
+ GMP_INCLUDE_MPN(x86/darwin.m4) ;;
+ esac
+ ;;
+ 64|x32)
+ GMP_INCLUDE_MPN(x86_64/x86_64-defs.m4)
+ AC_DEFINE(HAVE_HOST_CPU_FAMILY_x86_64)
+ case $host in
+ *-*-darwin*)
+ GMP_INCLUDE_MPN(x86_64/darwin.m4) ;;
+ *-*-mingw* | *-*-cygwin)
+ GMP_INCLUDE_MPN(x86_64/dos64.m4) ;;
+ esac
+ ;;
+ esac
+ ;;
+ esac
+fi
+
+# For --enable-minithres, prepend "minithres" to path so that its special
+# gmp-mparam.h will be used.
+if test $enable_minithres = yes; then
+ path="minithres $path"
+fi
+
+# Create link for gmp-mparam.h.
+gmp_mparam_source=
+for gmp_mparam_dir in $path; do
+ test "$no_create" = yes || rm -f gmp-mparam.h
+ tmp_file=$srcdir/mpn/$gmp_mparam_dir/gmp-mparam.h
+ if test -f $tmp_file; then
+ AC_CONFIG_LINKS(gmp-mparam.h:mpn/$gmp_mparam_dir/gmp-mparam.h)
+ gmp_srclinks="$gmp_srclinks gmp-mparam.h"
+ gmp_mparam_source=$tmp_file
+ break
+ fi
+done
+if test -z "$gmp_mparam_source"; then
+ AC_MSG_ERROR([no version of gmp-mparam.h found in path: $path])
+fi
+
+# For a helpful message from tune/tuneup.c
+gmp_mparam_suggest=$gmp_mparam_source
+if test "$gmp_mparam_dir" = generic; then
+ for i in $path; do break; done
+ if test "$i" != generic; then
+ gmp_mparam_suggest="new file $srcdir/mpn/$i/gmp-mparam.h"
+ fi
+fi
+AC_DEFINE_UNQUOTED(GMP_MPARAM_H_SUGGEST, "$gmp_mparam_source",
+[The gmp-mparam.h file (a string) the tune program should suggest updating.])
+
+
+# Copy relevant parameters from gmp-mparam.h to config.m4.
+# We only do this for parameters that are used by some assembly files.
+# Fat binaries do this on a per-file basis, so skip in that case.
+#
+if test -z "$fat_path"; then
+ for i in SQR_TOOM2_THRESHOLD BMOD_1_TO_MOD_1_THRESHOLD SHLD_SLOW SHRD_SLOW; do
+ value=`sed -n 's/^#define '$i'[ ]*\([0-9][0-9]*\).*$/\1/p' $gmp_mparam_source`
+ if test -n "$value"; then
+ GMP_DEFINE_RAW(["define(<$i>,<$value>)"])
+ fi
+ done
+fi
+
+
+# Sizes of some types, needed at preprocessing time.
+#
+# FIXME: The assumption that GMP_LIMB_BITS is 8*sizeof(mp_limb_t) might
+# be slightly rash, but it's true everywhere we know of and ought to be true
+# of any sensible system. In a generic C build, grepping LONG_BIT out of
+# <limits.h> might be an alternative, for maximum portability.
+#
+AC_CHECK_SIZEOF(void *)
+AC_CHECK_SIZEOF(unsigned short)
+AC_CHECK_SIZEOF(unsigned)
+AC_CHECK_SIZEOF(unsigned long)
+AC_CHECK_SIZEOF(mp_limb_t, , GMP_INCLUDE_GMP_H)
+if test "$ac_cv_sizeof_mp_limb_t" = 0; then
+ AC_MSG_ERROR([Oops, mp_limb_t doesn't seem to work])
+fi
+AC_SUBST(GMP_LIMB_BITS, `expr 8 \* $ac_cv_sizeof_mp_limb_t`)
+GMP_DEFINE_RAW(["define(<SIZEOF_UNSIGNED>,<$ac_cv_sizeof_unsigned>)"])
+
+# Check compiler limb size matches gmp-mparam.h
+#
+# FIXME: Some of the cycle counter objects in the tune directory depend on
+# the size of ulong, it'd be possible to check that here, though a mismatch
+# probably wouldn't want to be fatal, none of the libgmp assembler code
+# depends on ulong.
+#
+mparam_bits=[`sed -n 's/^#define GMP_LIMB_BITS[ ][ ]*\([0-9]*\).*$/\1/p' $gmp_mparam_source`]
+if test -n "$mparam_bits" && test "$mparam_bits" -ne $GMP_LIMB_BITS; then
+ if test "$test_CFLAGS" = set; then
+ AC_MSG_ERROR([Oops, mp_limb_t is $GMP_LIMB_BITS bits, but the assembler code
+in this configuration expects $mparam_bits bits.
+You appear to have set \$CFLAGS, perhaps you also need to tell GMP the
+intended ABI, see "ABI and ISA" in the manual.])
+ else
+ AC_MSG_ERROR([Oops, mp_limb_t is $GMP_LIMB_BITS bits, but the assembler code
+in this configuration expects $mparam_bits bits.])
+ fi
+fi
+
+GMP_DEFINE_RAW(["define(<GMP_LIMB_BITS>,$GMP_LIMB_BITS)"])
+GMP_DEFINE_RAW(["define(<GMP_NAIL_BITS>,$GMP_NAIL_BITS)"])
+GMP_DEFINE_RAW(["define(<GMP_NUMB_BITS>,eval(GMP_LIMB_BITS-GMP_NAIL_BITS))"])
+
+
+AC_SUBST(mpn_objects)
+AC_SUBST(mpn_objs_in_libgmp)
+AC_SUBST(gmp_srclinks)
+
+
+# A recompiled sqr_basecase for use in the tune program, if necessary.
+TUNE_SQR_OBJ=
+test -d tune || mkdir tune
+case $sqr_basecase_source in
+ *.asm)
+ sqr_max=[`sed -n 's/^def...(SQR_TOOM2_THRESHOLD_MAX, *\([0-9]*\))/\1/p' $sqr_basecase_source`]
+ if test -n "$sqr_max"; then
+ TUNE_SQR_OBJ=sqr_asm.o
+ AC_DEFINE_UNQUOTED(TUNE_SQR_TOOM2_MAX,$sqr_max,
+ [Maximum size the tune program can test for SQR_TOOM2_THRESHOLD])
+ fi
+ cat >tune/sqr_basecase.c <<EOF
+/* not sure that an empty file can compile, so put in a dummy */
+int sqr_basecase_dummy;
+EOF
+ ;;
+ *.c)
+ TUNE_SQR_OBJ=
+ AC_DEFINE(TUNE_SQR_TOOM2_MAX,SQR_TOOM2_MAX_GENERIC)
+ cat >tune/sqr_basecase.c <<EOF
+#define TUNE_PROGRAM_BUILD 1
+#define TUNE_PROGRAM_BUILD_SQR 1
+#include "mpn/sqr_basecase.c"
+EOF
+ ;;
+esac
+AC_SUBST(TUNE_SQR_OBJ)
+
+
+# Configs for demos/pexpr.c.
+#
+AC_CONFIG_FILES(demos/pexpr-config.h:demos/pexpr-config-h.in)
+GMP_SUBST_CHECK_FUNCS(clock, cputime, getrusage, gettimeofday, sigaction, sigaltstack, sigstack)
+GMP_SUBST_CHECK_HEADERS(sys/resource.h)
+AC_CHECK_TYPES([stack_t], HAVE_STACK_T_01=1, HAVE_STACK_T_01=0,
+ [#include <signal.h>])
+AC_SUBST(HAVE_STACK_T_01)
+
+# Configs for demos/calc directory
+#
+# AC_SUBST+AC_CONFIG_FILES is used for calc-config.h, rather than AC_DEFINE+
+# AC_CONFIG_HEADERS, since with the latter automake (1.8) will then put the
+# directory (ie. demos/calc) into $(DEFAULT_INCLUDES) for every Makefile.in,
+# which would look very strange.
+#
+# -lcurses is required by libreadline. On a typical SVR4 style system this
+# normally doesn't have to be given explicitly, since libreadline.so will
+# have a NEEDED record for it. But if someone for some reason is using only
+# a static libreadline.a then we must give -lcurses. Readline (as of
+# version 4.3) doesn't use libtool, so we can't rely on a .la to cover
+# necessary dependencies.
+#
+# On a couple of systems we've seen libreadline available, but the headers
+# not in the default include path, so check for readline/readline.h. We've
+# also seen readline/history.h missing, not sure if that's just a broken
+# install or a very old version, but check that too.
+#
+AC_CONFIG_FILES(demos/calc/calc-config.h:demos/calc/calc-config-h.in)
+LIBCURSES=
+if test $with_readline != no; then
+ AC_CHECK_LIB(ncurses, tputs, [LIBCURSES=-lncurses],
+ [AC_CHECK_LIB(curses, tputs, [LIBCURSES=-lcurses])])
+fi
+AC_SUBST(LIBCURSES)
+use_readline=$with_readline
+if test $with_readline = detect; then
+ use_readline=no
+ AC_CHECK_LIB(readline, readline,
+ [AC_CHECK_HEADER(readline/readline.h,
+ [AC_CHECK_HEADER(readline/history.h, use_readline=yes)])],
+ , $LIBCURSES)
+ AC_MSG_CHECKING(readline detected)
+ AC_MSG_RESULT($use_readline)
+fi
+if test $use_readline = yes; then
+ AC_SUBST(WITH_READLINE_01, 1)
+ AC_SUBST(LIBREADLINE, -lreadline)
+else
+ WITH_READLINE_01=0
+fi
+AC_PROG_YACC
+AM_PROG_LEX
+
+# Configs for demos/expr directory
+#
+# Libtool already runs an AC_CHECK_TOOL for ranlib, but we give
+# AC_PROG_RANLIB anyway since automake is supposed to complain if it's not
+# called. (Automake 1.8.4 doesn't, at least not when the only library is in
+# an EXTRA_LIBRARIES.)
+#
+AC_PROG_RANLIB
+
+
+# Create config.m4.
+GMP_FINISH
+
+# Create Makefiles
+# FIXME: Upcoming version of autoconf/automake may not like broken lines.
+# Right now automake isn't accepting the new AC_CONFIG_FILES scheme.
+
+AC_OUTPUT(Makefile \
+ mpf/Makefile mpn/Makefile mpq/Makefile \
+ mpz/Makefile printf/Makefile scanf/Makefile rand/Makefile cxx/Makefile \
+ tests/Makefile tests/devel/Makefile \
+ tests/mpf/Makefile tests/mpn/Makefile tests/mpq/Makefile \
+ tests/mpz/Makefile tests/rand/Makefile tests/misc/Makefile \
+ tests/cxx/Makefile \
+ doc/Makefile tune/Makefile \
+ demos/Makefile demos/calc/Makefile demos/expr/Makefile \
+ gmp.h:gmp-h.in)
+
+AC_MSG_NOTICE([summary of build options:
+
+ Version: ${PACKAGE_STRING}
+ Host type: ${host}
+ ABI: ${ABI}
+ Install prefix: ${prefix}
+ Compiler: ${CC}
+ Static libraries: ${enable_static}
+ Shared libraries: ${enable_shared}
+])
+
+if test x$cross_compiling = xyes ; then
+ case "$host" in
+ *-*-mingw* | *-*-cygwin)
+ if test x$ABI = x64 ; then
+ AC_MSG_NOTICE([If wine64 is installed, use make check TESTS_ENVIRONMENT=wine64.])
+ else
+ AC_MSG_NOTICE([If wine is installed, use make check TESTS_ENVIRONMENT=wine.])
+ fi
+ ;;
+ esac
+fi
+++ /dev/null
-dnl Process this file with autoconf to produce a configure script.
-
-
-define(GMP_COPYRIGHT,[[
-
-Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
-2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published
-by the Free Software Foundation; either version 3 of the License, or (at
-your option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-]])
-
-AC_COPYRIGHT(GMP_COPYRIGHT)
-AH_TOP(/*GMP_COPYRIGHT*/)
-
-AC_REVISION($Revision$)
-AC_PREREQ(2.59)
-AC_INIT(GNU MP, GMP_VERSION, [gmp-bugs@gmplib.org, see http://gmplib.org/manual/Reporting-Bugs.html], gmp)
-AC_CONFIG_SRCDIR(gmp-impl.h)
-m4_pattern_forbid([^[ \t]*GMP_])
-m4_pattern_allow(GMP_LDFLAGS)
-m4_pattern_allow(GMP_LIMB_BITS)
-m4_pattern_allow(GMP_MPARAM_H_SUGGEST)
-m4_pattern_allow(GMP_NAIL_BITS)
-m4_pattern_allow(GMP_NUMB_BITS)
-
-# If --target is not used then $target_alias is empty, but if say
-# "./configure athlon-pc-freebsd3.5" is used, then all three of
-# $build_alias, $host_alias and $target_alias are set to
-# "athlon-pc-freebsd3.5".
-#
-if test -n "$target_alias" && test "$target_alias" != "$host_alias"; then
- AC_MSG_ERROR([--target is not appropriate for GMP
-Use --build=CPU-VENDOR-OS if you need to specify your CPU and/or system
-explicitly. Use --host if cross-compiling (see "Installing GMP" in the
-manual for more on this).])
-fi
-
-GMP_INIT(config.m4)
-
-AC_CANONICAL_HOST
-
-dnl Automake "no-dependencies" is used because include file dependencies
-dnl are not useful to us. Pretty much everything depends just on gmp.h,
-dnl gmp-impl.h and longlong.h, and yet only rarely does everything need to
-dnl be rebuilt for changes to those files.
-dnl
-dnl "no-dependencies" also helps with the way we're setup to run
-dnl AC_PROG_CXX only conditionally. If dependencies are used then recent
-dnl automake (eg 1.7.2) appends an AM_CONDITIONAL to AC_PROG_CXX, and then
-dnl gets upset if it's not actually executed.
-dnl
-dnl Note that there's a copy of these options in the top-level Makefile.am,
-dnl so update there too if changing anything.
-dnl
-AM_INIT_AUTOMAKE([1.8 gnu no-dependencies $(top_builddir)/ansi2knr])
-AM_CONFIG_HEADER(config.h:config.in)
-AM_MAINTAINER_MODE
-
-
-AC_ARG_ENABLE(assert,
-AC_HELP_STRING([--enable-assert],[enable ASSERT checking [[default=no]]]),
-[case $enableval in
-yes|no) ;;
-*) AC_MSG_ERROR([bad value $enableval for --enable-assert, need yes or no]) ;;
-esac],
-[enable_assert=no])
-
-if test "$enable_assert" = "yes"; then
- AC_DEFINE(WANT_ASSERT,1,
- [Define to 1 to enable ASSERT checking, per --enable-assert])
- want_assert_01=1
-else
- want_assert_01=0
-fi
-GMP_DEFINE_RAW(["define(<WANT_ASSERT>,$want_assert_01)"])
-
-
-AC_ARG_ENABLE(alloca,
-AC_HELP_STRING([--enable-alloca],[how to get temp memory [[default=reentrant]]]),
-[case $enableval in
-alloca|malloc-reentrant|malloc-notreentrant) ;;
-yes|no|reentrant|notreentrant) ;;
-debug) ;;
-*)
- AC_MSG_ERROR([bad value $enableval for --enable-alloca, need one of:
-yes no reentrant notreentrant alloca malloc-reentrant malloc-notreentrant debug]) ;;
-esac],
-[enable_alloca=reentrant])
-
-
-# IMPROVE ME: The default for C++ is disabled. The tests currently
-# performed below for a working C++ compiler are not particularly strong,
-# and in general can't be expected to get the right setup on their own. The
-# most significant problem is getting the ABI the same. Defaulting CXXFLAGS
-# to CFLAGS takes only a small step towards this. It's also probably worth
-# worrying whether the C and C++ runtimes from say gcc and a vendor C++ can
-# work together. Some rather broken C++ installations were encountered
-# during testing, and though such things clearly aren't GMP's problem, if
-# --enable-cxx=detect were to be the default then some careful checks of
-# which, if any, C++ compiler on the system is up to scratch would be
-# wanted.
-#
-AC_ARG_ENABLE(cxx,
-AC_HELP_STRING([--enable-cxx],[enable C++ support [[default=no]]]),
-[case $enableval in
-yes|no|detect) ;;
-*) AC_MSG_ERROR([bad value $enableval for --enable-cxx, need yes/no/detect]) ;;
-esac],
-[enable_cxx=no])
-
-
-AC_ARG_ENABLE(fft,
-AC_HELP_STRING([--enable-fft],[enable FFTs for multiplication [[default=yes]]]),
-[case $enableval in
-yes|no) ;;
-*) AC_MSG_ERROR([bad value $enableval for --enable-fft, need yes or no]) ;;
-esac],
-[enable_fft=yes])
-
-if test "$enable_fft" = "yes"; then
- AC_DEFINE(WANT_FFT,1,
- [Define to 1 to enable FFTs for multiplication, per --enable-fft])
-fi
-
-
-AC_ARG_ENABLE(old-fft-full,
-AC_HELP_STRING([--enable-old-fft-full],[enable old mpn_mul_fft_full for multiplication [[default=no]]]),
-[case $enableval in
-yes|no) ;;
-*) AC_MSG_ERROR([bad value $enableval for --enable-old-fft-full, need yes or no]) ;;
-esac],
-[enable_old_fft_full=no])
-
-if test "$enable_old_fft_full" = "yes"; then
- AC_DEFINE(WANT_OLD_FFT_FULL,1,
- [Define to 1 to enable old mpn_mul_fft_full for multiplication, per --enable-old-fft-full])
-fi
-
-
-AC_ARG_ENABLE(mpbsd,
-AC_HELP_STRING([--enable-mpbsd],
- [build Berkeley MP compatibility library [[default=no]]]),
-[case $enableval in
-yes|no) ;;
-*) AC_MSG_ERROR([bad value $enableval for --enable-mpbsd, need yes or no]) ;;
-esac],
-[enable_mpbsd=no])
-AM_CONDITIONAL(WANT_MPBSD, test "$enable_mpbsd" = "yes")
-
-
-AC_ARG_ENABLE(nails,
-AC_HELP_STRING([--enable-nails],[use nails on limbs [[default=no]]]),
-[case $enableval in
-[yes|no|[02468]|[0-9][02468]]) ;;
-[*[13579]])
- AC_MSG_ERROR([bad value $enableval for --enable-nails, only even nail sizes supported]) ;;
-*)
- AC_MSG_ERROR([bad value $enableval for --enable-nails, need yes/no/number]) ;;
-esac],
-[enable_nails=no])
-
-case $enable_nails in
-yes) GMP_NAIL_BITS=2 ;;
-no) GMP_NAIL_BITS=0 ;;
-*) GMP_NAIL_BITS=$enable_nails ;;
-esac
-AC_SUBST(GMP_NAIL_BITS)
-
-
-AC_ARG_ENABLE(profiling,
-AC_HELP_STRING([--enable-profiling],
- [build with profiler support [[default=no]]]),
-[case $enableval in
-no|prof|gprof|instrument) ;;
-*) AC_MSG_ERROR([bad value $enableval for --enable-profiling, need no/prof/gprof/instrument]) ;;
-esac],
-[enable_profiling=no])
-
-case $enable_profiling in
- prof)
- AC_DEFINE(WANT_PROFILING_PROF, 1,
- [Define to 1 if --enable-profiling=prof])
- ;;
- gprof)
- AC_DEFINE(WANT_PROFILING_GPROF, 1,
- [Define to 1 if --enable-profiling=gprof])
- ;;
- instrument)
- AC_DEFINE(WANT_PROFILING_INSTRUMENT, 1,
- [Define to 1 if --enable-profiling=instrument])
- ;;
-esac
-
-GMP_DEFINE_RAW(["define(<WANT_PROFILING>,<\`$enable_profiling'>)"])
-
-# -fomit-frame-pointer is incompatible with -pg on some chips
-if test "$enable_profiling" = gprof; then
- fomit_frame_pointer=
-else
- fomit_frame_pointer="-fomit-frame-pointer"
-fi
-
-
-AC_ARG_WITH(readline,
-AC_HELP_STRING([--with-readline],
- [readline support in calc demo program [[default=detect]]]),
-[case $withval in
-yes|no|detect) ;;
-*) AC_MSG_ERROR([bad value $withval for --with-readline, need yes/no/detect]) ;;
-esac],
-[with_readline=detect])
-
-
-AC_ARG_ENABLE(fat,
-AC_HELP_STRING([--enable-fat],
- [build a fat binary on systems that support it [[default=no]]]),
-[case $enableval in
-yes|no) ;;
-*) AC_MSG_ERROR([bad value $enableval for --enable-fat, need yes or no]) ;;
-esac],
-[enable_fat=no])
-
-
-AC_ARG_ENABLE(minithres,
-AC_HELP_STRING([--enable-minithres],
- [choose minimal thresholds for testing [[default=no]]]),
-[case $enableval in
-yes|no) ;;
-*) AC_MSG_ERROR([bad value $enableval for --enable-minithres, need yes or no]) ;;
-esac],
-[enable_minithres=no])
-
-
-
-tmp_host=`echo $host_cpu | sed 's/\./_/'`
-AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_$tmp_host)
-GMP_DEFINE_RAW("define_not_for_expansion(\`HAVE_HOST_CPU_$tmp_host')", POST)
-
-dnl The HAVE_HOST_CPU_ list here only needs to have entries for those which
-dnl are going to be tested, not everything that can possibly be selected.
-dnl
-dnl The HAVE_HOST_CPU_FAMILY_ list similarly, and note that the AC_DEFINEs
-dnl for these are under the cpu specific setups below.
-
-AH_VERBATIM([HAVE_HOST_CPU_1],
-[/* Define one of these to 1 for the host CPU family.
- If your CPU is not in any of these families, leave all undefined.
- For an AMD64 chip, define "x86" in ABI=32, but not in ABI=64. */
-#undef HAVE_HOST_CPU_FAMILY_alpha
-#undef HAVE_HOST_CPU_FAMILY_m68k
-#undef HAVE_HOST_CPU_FAMILY_power
-#undef HAVE_HOST_CPU_FAMILY_powerpc
-#undef HAVE_HOST_CPU_FAMILY_x86
-#undef HAVE_HOST_CPU_FAMILY_x86_64
-
-/* Define one of the following to 1 for the host CPU, as per the output of
- ./config.guess. If your CPU is not listed here, leave all undefined. */
-#undef HAVE_HOST_CPU_alphaev67
-#undef HAVE_HOST_CPU_alphaev68
-#undef HAVE_HOST_CPU_alphaev7
-#undef HAVE_HOST_CPU_m68020
-#undef HAVE_HOST_CPU_m68030
-#undef HAVE_HOST_CPU_m68040
-#undef HAVE_HOST_CPU_m68060
-#undef HAVE_HOST_CPU_m68360
-#undef HAVE_HOST_CPU_powerpc604
-#undef HAVE_HOST_CPU_powerpc604e
-#undef HAVE_HOST_CPU_powerpc750
-#undef HAVE_HOST_CPU_powerpc7400
-#undef HAVE_HOST_CPU_supersparc
-#undef HAVE_HOST_CPU_i386
-#undef HAVE_HOST_CPU_i586
-#undef HAVE_HOST_CPU_i686
-#undef HAVE_HOST_CPU_pentium
-#undef HAVE_HOST_CPU_pentiummmx
-#undef HAVE_HOST_CPU_pentiumpro
-#undef HAVE_HOST_CPU_pentium2
-#undef HAVE_HOST_CPU_pentium3
-#undef HAVE_HOST_CPU_s390_z900
-#undef HAVE_HOST_CPU_s390_z990
-#undef HAVE_HOST_CPU_s390_z9
-#undef HAVE_HOST_CPU_s390_z10
-#undef HAVE_HOST_CPU_s390_z196
-
-/* Define to 1 iff we have a s390 with 64-bit registers. */
-#undef HAVE_HOST_CPU_s390_zarch])
-
-
-# Table of compilers, options, and mpn paths. This code has various related
-# purposes
-#
-# - better default CC/CFLAGS selections than autoconf otherwise gives
-# - default CC/CFLAGS selections for extra CPU types specific to GMP
-# - a few tests for known bad compilers
-# - choice of ABIs on suitable systems
-# - selection of corresponding mpn search path
-#
-# After GMP specific searches and tests, the standard autoconf AC_PROG_CC is
-# called. User selections of CC etc are respected.
-#
-# Care is taken not to use macros like AC_TRY_COMPILE during the GMP
-# pre-testing, since they of course depend on AC_PROG_CC, and also some of
-# them cache their results, which is not wanted.
-#
-# The ABI selection mechanism is unique to GMP. All that reaches autoconf
-# is a different selection of CC/CFLAGS according to the best ABI the system
-# supports, and/or what the user selects. Naturally the mpn assembler code
-# selected is very dependent on the ABI.
-#
-# The closest the standard tools come to a notion of ABI is something like
-# "sparc64" which encodes a CPU and an ABI together. This doesn't seem to
-# scale well for GMP, where exact CPU types like "ultrasparc2" are wanted,
-# separate from the ABI used on them.
-#
-#
-# The variables set here are
-#
-# cclist the compiler choices
-# xx_cflags flags for compiler xx
-# xx_cflags_maybe flags for compiler xx, if they work
-# xx_cppflags cpp flags for compiler xx
-# xx_cflags_optlist list of sets of optional flags
-# xx_cflags_yyy set yyy of optional flags for compiler xx
-# xx_ldflags -Wc,-foo flags for libtool linking with compiler xx
-# ar_flags extra flags for $AR
-# nm_flags extra flags for $NM
-# limb limb size, can be "longlong"
-# path mpn search path
-# extra_functions extra mpn functions
-# fat_path fat binary mpn search path [if fat binary desired]
-# fat_functions fat functions
-# fat_thresholds fat thresholds
-#
-# Suppose xx_cflags_optlist="arch", then flags from $xx_cflags_arch are
-# tried, and the first flag that works will be used. An optlist like "arch
-# cpu optimize" can be used to get multiple independent sets of flags tried.
-# The first that works from each will be used. If no flag in a set works
-# then nothing from that set is added.
-#
-# For multiple ABIs, the scheme extends as follows.
-#
-# abilist set of ABI choices
-# cclist_aa compiler choices in ABI aa
-# xx_aa_cflags flags for xx in ABI aa
-# xx_aa_cflags_maybe flags for xx in ABI aa, if they work
-# xx_aa_cppflags cpp flags for xx in ABI aa
-# xx_aa_cflags_optlist list of sets of optional flags in ABI aa
-# xx_aa_cflags_yyy set yyy of optional flags for compiler xx in ABI aa
-# xx_aa_ldflags -Wc,-foo flags for libtool linking
-# ar_aa_flags extra flags for $AR in ABI aa
-# nm_aa_flags extra flags for $NM in ABI aa
-# limb_aa limb size in ABI aa, can be "longlong"
-# path_aa mpn search path in ABI aa
-# extra_functions_aa extra mpn functions in ABI aa
-#
-# As a convenience, the unadorned xx_cflags (etc) are used for the last ABI
-# in ablist, if an xx_aa_cflags for that ABI isn't given. For example if
-# abilist="64 32" then $cc_64_cflags will be used for the 64-bit ABI, but
-# for the 32-bit either $cc_32_cflags or $cc_cflags is used, whichever is
-# defined. This makes it easy to add some 64-bit compilers and flags to an
-# unadorned 32-bit set.
-#
-# limb=longlong (or limb_aa=longlong) applies to all compilers within that
-# ABI. It won't work to have some needing long long and some not, since a
-# single instantiated gmp.h will be used by both.
-#
-# SPEED_CYCLECOUNTER, cyclecounter_size and CALLING_CONVENTIONS_OBJS are
-# also set here, with an ABI suffix.
-#
-#
-#
-# A table-driven approach like this to mapping cpu type to good compiler
-# options is a bit of a maintenance burden, but there's not much uniformity
-# between options specifications on different compilers. Some sort of
-# separately updatable tool might be cute.
-#
-# The use of lots of variables like this, direct and indirect, tends to
-# obscure when and how various things are done, but unfortunately it's
-# pretty much the only way. If shell subroutines were portable then actual
-# code like "if this .. do that" could be written, but attempting the same
-# with full copies of GMP_PROG_CC_WORKS etc expanded at every point would
-# hugely bloat the output.
-
-
-AC_ARG_VAR(ABI, [desired ABI (for processors supporting more than one ABI)])
-
-# abilist needs to be non-empty, "standard" is just a generic name here
-abilist="standard"
-
-# FIXME: We'd like to prefer an ANSI compiler, perhaps by preferring
-# c89 over cc here. But note that on HP-UX c89 provides a castrated
-# environment, and would want to be excluded somehow. Maybe
-# AC_PROG_CC_STDC already does enough to stick cc into ANSI mode and
-# we don't need to worry.
-#
-cclist="gcc cc"
-
-gcc_cflags="-O2 -pedantic"
-gcc_64_cflags="-O2 -pedantic"
-cc_cflags="-O"
-cc_64_cflags="-O"
-
-SPEED_CYCLECOUNTER_OBJ=
-cyclecounter_size=2
-
-AC_SUBST(HAVE_HOST_CPU_FAMILY_power, 0)
-AC_SUBST(HAVE_HOST_CPU_FAMILY_powerpc,0)
-
-case $host in
-
- a29k*-*-*)
- path="a29k"
- ;;
-
-
- alpha*-*-*)
- AC_DEFINE(HAVE_HOST_CPU_FAMILY_alpha)
- case $host_cpu in
- alphaev5* | alphapca5*)
- path="alpha/ev5 alpha" ;;
- alphaev67 | alphaev68 | alphaev7*)
- path="alpha/ev67 alpha/ev6 alpha" ;;
- alphaev6)
- path="alpha/ev6 alpha" ;;
- *)
- path="alpha" ;;
- esac
- extra_functions="cntlz"
- gcc_cflags_optlist="asm cpu oldas" # need asm ahead of cpu, see below
- gcc_cflags_oldas="-Wa,-oldas" # see GMP_GCC_WA_OLDAS.
-
- # gcc 2.7.2.3 doesn't know any -mcpu= for alpha, apparently.
- # gcc 2.95 knows -mcpu= ev4, ev5, ev56, pca56, ev6.
- # gcc 3.0 adds nothing.
- # gcc 3.1 adds ev45, ev67 (but ev45 is the same as ev4).
- # gcc 3.2 adds nothing.
- #
- # gcc version "2.9-gnupro-99r1" under "-O2 -mcpu=ev6" strikes internal
- # compiler errors too easily and is rejected by GMP_PROG_CC_WORKS. Each
- # -mcpu=ev6 below has a fallback to -mcpu=ev56 for this reason.
- #
- case $host_cpu in
- alpha) gcc_cflags_cpu="-mcpu=ev4" ;;
- alphaev5) gcc_cflags_cpu="-mcpu=ev5" ;;
- alphaev56) gcc_cflags_cpu="-mcpu=ev56" ;;
- alphapca56 | alphapca57)
- gcc_cflags_cpu="-mcpu=pca56" ;;
- alphaev6) gcc_cflags_cpu="-mcpu=ev6 -mcpu=ev56" ;;
- alphaev67 | alphaev68 | alphaev7*)
- gcc_cflags_cpu="-mcpu=ev67 -mcpu=ev6 -mcpu=ev56" ;;
- esac
-
- # gcc version "2.9-gnupro-99r1" on alphaev68-dec-osf5.1 has been seen
- # accepting -mcpu=ev6, but not putting the assembler in the right mode
- # for what it produces. We need to do this for it, and need to do it
- # before testing the -mcpu options.
- #
- # On old versions of gcc, which don't know -mcpu=, we believe an
- # explicit -Wa,-mev5 etc will be necessary to put the assembler in
- # the right mode for our .asm files and longlong.h asm blocks.
- #
- # On newer versions of gcc, when -mcpu= is known, we must give a -Wa
- # which is at least as high as the code gcc will generate. gcc
- # establishes what it needs with a ".arch" directive, our command line
- # option seems to override that.
- #
- # gas prior to 2.14 doesn't accept -mev67, but -mev6 seems enough for
- # ctlz and cttz (in 2.10.0 at least).
- #
- # OSF `as' accepts ev68 but stupidly treats it as ev4. -arch only seems
- # to affect insns like ldbu which are expanded as macros when necessary.
- # Insns like ctlz which were never available as macros are always
- # accepted and always generate their plain code.
- #
- case $host_cpu in
- alpha) gcc_cflags_asm="-Wa,-arch,ev4 -Wa,-mev4" ;;
- alphaev5) gcc_cflags_asm="-Wa,-arch,ev5 -Wa,-mev5" ;;
- alphaev56) gcc_cflags_asm="-Wa,-arch,ev56 -Wa,-mev56" ;;
- alphapca56 | alphapca57)
- gcc_cflags_asm="-Wa,-arch,pca56 -Wa,-mpca56" ;;
- alphaev6) gcc_cflags_asm="-Wa,-arch,ev6 -Wa,-mev6" ;;
- alphaev67 | alphaev68 | alphaev7*)
- gcc_cflags_asm="-Wa,-arch,ev67 -Wa,-mev67 -Wa,-arch,ev6 -Wa,-mev6" ;;
- esac
-
- # It might be better to ask "cc" whether it's Cray C or DEC C,
- # instead of relying on the OS part of $host. But it's hard to
- # imagine either of those compilers anywhere except their native
- # systems.
- #
- GMP_INCLUDE_MPN(alpha/alpha-defs.m4)
- case $host in
- *-cray-unicos*)
- cc_cflags="-O" # no -g, it silently disables all optimizations
- GMP_INCLUDE_MPN(alpha/unicos.m4)
- # Don't perform any assembly syntax tests on this beast.
- gmp_asm_syntax_testing=no
- ;;
- *-*-osf*)
- GMP_INCLUDE_MPN(alpha/default.m4)
- cc_cflags=""
- cc_cflags_optlist="opt cpu"
-
- # not sure if -fast works on old versions, so make it optional
- cc_cflags_opt="-fast -O2"
-
- # DEC C V5.9-005 knows ev4, ev5, ev56, pca56, ev6.
- # Compaq C V6.3-029 adds ev67.
- #
- case $host_cpu in
- alpha) cc_cflags_cpu="-arch~ev4~-tune~ev4" ;;
- alphaev5) cc_cflags_cpu="-arch~ev5~-tune~ev5" ;;
- alphaev56) cc_cflags_cpu="-arch~ev56~-tune~ev56" ;;
- alphapca56 | alphapca57)
- cc_cflags_cpu="-arch~pca56~-tune~pca56" ;;
- alphaev6) cc_cflags_cpu="-arch~ev6~-tune~ev6" ;;
- alphaev67 | alphaev68 | alphaev7*)
- cc_cflags_cpu="-arch~ev67~-tune~ev67 -arch~ev6~-tune~ev6" ;;
- esac
- ;;
- *)
- GMP_INCLUDE_MPN(alpha/default.m4)
- ;;
- esac
-
- case $host in
- *-*-unicos*)
- # tune/alpha.asm assumes int==4bytes but unicos uses int==8bytes
- ;;
- *)
- SPEED_CYCLECOUNTER_OBJ=alpha.lo
- cyclecounter_size=1 ;;
- esac
- ;;
-
-
- # Cray vector machines.
- # This must come after alpha* so that we can recognize present and future
- # vector processors with a wildcard.
- *-cray-unicos*)
- gmp_asm_syntax_testing=no
- cclist="cc"
- # We used to have -hscalar0 here as a workaround for miscompilation of
- # mpz/import.c, but let's hope Cray fixes their bugs instead, since
- # -hscalar0 causes disastrously poor code to be generated.
- cc_cflags="-O3 -hnofastmd -htask0 -Wa,-B"
- path="cray"
- ;;
-
-
- arm*-*-*)
- path="arm"
- gcc_cflags="$gcc_cflags $fomit_frame_pointer"
- gcc_testlist="gcc-arm-umodsi"
- GMP_INCLUDE_MPN(arm/arm-defs.m4)
- ;;
-
-
- clipper*-*-*)
- path="clipper"
- ;;
-
-
- # Fujitsu
- [f30[01]-fujitsu-sysv*])
- cclist="gcc vcc"
- # FIXME: flags for vcc?
- vcc_cflags="-g"
- path="fujitsu"
- ;;
-
-
- hppa*-*-*)
- # HP cc (the one sold separately) is K&R by default, but AM_C_PROTOTYPES
- # will add "-Ae", or "-Aa -D_HPUX_SOURCE", to put it into ansi mode, if
- # possible.
- #
- # gcc for hppa 2.0 can be built either for 2.0n (32-bit) or 2.0w
- # (64-bit), but not both, so there's no option to choose the desired
- # mode, we must instead detect which of the two it is. This is done by
- # checking sizeof(long), either 4 or 8 bytes respectively. Do this in
- # ABI=1.0 too, in case someone tries to build that with a 2.0w gcc.
- #
- gcc_cflags_optlist="arch"
- gcc_testlist="sizeof-long-4"
- SPEED_CYCLECOUNTER_OBJ=hppa.lo
- cyclecounter_size=1
-
- # FIXME: For hppa2.0*, path should be "pa32/hppa2_0 pa32/hppa1_1 pa32".
- # (Can't remember why this isn't done already, have to check what .asm
- # files are available in each and how they run on a typical 2.0 cpu.)
- #
- case $host_cpu in
- hppa1.0*) path="pa32" ;;
- hppa7000*) path="pa32/hppa1_1 pa32" ;;
- hppa2.0* | hppa64)
- path="pa32/hppa2_0 pa32/hppa1_1/pa7100 pa32/hppa1_1 pa32" ;;
- *) # default to 7100
- path="pa32/hppa1_1/pa7100 pa32/hppa1_1 pa32" ;;
- esac
-
- # gcc 2.7.2.3 knows -mpa-risc-1-0 and -mpa-risc-1-1
- # gcc 2.95 adds -mpa-risc-2-0, plus synonyms -march=1.0, 1.1 and 2.0
- #
- # We don't use -mpa-risc-2-0 in ABI=1.0 because 64-bit registers may not
- # be saved by the kernel on an old system. Actually gcc (as of 3.2)
- # only adds a few float instructions with -mpa-risc-2-0, so it would
- # probably be safe, but let's not take the chance. In any case, a
- # configuration like --host=hppa2.0 ABI=1.0 is far from optimal.
- #
- case $host_cpu in
- hppa1.0*) gcc_cflags_arch="-mpa-risc-1-0" ;;
- *) # default to 7100
- gcc_cflags_arch="-mpa-risc-1-1" ;;
- esac
-
- case $host_cpu in
- hppa1.0*) cc_cflags="+O2" ;;
- *) # default to 7100
- cc_cflags="+DA1.1 +O2" ;;
- esac
-
- case $host in
- hppa2.0*-*-* | hppa64-*-*)
- cclist_20n="gcc cc"
- abilist="2.0n 1.0"
- path_20n="pa64"
- limb_20n=longlong
- any_20n_testlist="sizeof-long-4"
- SPEED_CYCLECOUNTER_OBJ_20n=hppa2.lo
- cyclecounter_size_20n=2
-
- # -mpa-risc-2-0 is only an optional flag, in case an old gcc is
- # used. Assembler support for 2.0 is essential though, for our asm
- # files.
- gcc_20n_cflags="-O2"
- gcc_20n_cflags_optlist="arch"
- gcc_20n_cflags_arch="-mpa-risc-2-0 -mpa-risc-1-1"
- gcc_20n_testlist="sizeof-long-4 hppa-level-2.0"
-
- cc_20n_cflags="+DA2.0 +e +O2 -Wl,+vnocompatwarnings"
- cc_20n_testlist="hpc-hppa-2-0"
-
- # ABI=2.0w is available for hppa2.0w and hppa2.0, but not for
- # hppa2.0n, on the assumption that that the latter indicates a
- # desire for ABI=2.0n.
- case $host in
- hppa2.0n-*-*) ;;
- *)
- # HPUX 10 and earlier cannot run 2.0w. Not sure about other
- # systems (GNU/Linux for instance), but lets assume they're ok.
- case $host in
- [*-*-hpux[1-9] | *-*-hpux[1-9].* | *-*-hpux10 | *-*-hpux10.*]) ;;
- [*-*-linux*]) abilist="1.0" ;; # due to linux permanent kernel bug
- *) abilist="2.0w $abilist" ;;
- esac
-
- cclist_20w="gcc cc"
- gcc_20w_cflags="-O2 -mpa-risc-2-0"
- cc_20w_cflags="+DD64 +O2"
- cc_20w_testlist="hpc-hppa-2-0"
- path_20w="pa64"
- any_20w_testlist="sizeof-long-8"
- SPEED_CYCLECOUNTER_OBJ_20w=hppa2w.lo
- cyclecounter_size_20w=2
- ;;
- esac
- ;;
- esac
- ;;
-
-
- i960*-*-*)
- path="i960"
- ;;
-
-
- IA64_PATTERN)
- abilist="64"
- GMP_INCLUDE_MPN(ia64/ia64-defs.m4)
- SPEED_CYCLECOUNTER_OBJ=ia64.lo
-
- case $host_cpu in
- itanium) path="ia64/itanium ia64" ;;
- itanium2) path="ia64/itanium2 ia64" ;;
- *) path="ia64" ;;
- esac
-
- gcc_64_cflags_optlist="tune"
- gcc_32_cflags_optlist=$gcc_64_cflags_optlist
-
- # gcc pre-release 3.4 adds -mtune itanium and itanium2
- case $host_cpu in
- itanium) gcc_cflags_tune="-mtune=itanium" ;;
- itanium2) gcc_cflags_tune="-mtune=itanium2" ;;
- esac
-
- case $host in
- *-*-linux*)
- cclist="gcc icc"
- icc_cflags="-no-gcc"
- icc_cflags_optlist="opt"
- # Don't use -O3, it is for "large data sets" and also miscompiles GMP.
- # But icc miscompiles GMP at any optimization level, at higher levels
- # it miscompiles more files...
- icc_cflags_opt="-O2 -O1"
- ;;
-
- *-*-hpux*)
- # HP cc sometimes gets internal errors if the optimization level is
- # too high. GMP_PROG_CC_WORKS detects this, the "_opt" fallbacks
- # let us use whatever seems to work.
- #
- abilist="32 64"
-
- cclist_32="gcc cc"
- path_32="ia64"
- cc_32_cflags=""
- cc_32_cflags_optlist="opt"
- cc_32_cflags_opt="+O3 +O2 +O1"
- gcc_32_cflags="-milp32 -O2"
- limb_32=longlong
- SPEED_CYCLECOUNTER_OBJ_32=ia64.lo
- cyclecounter_size_32=2
-
- # Must have +DD64 in CPPFLAGS to get the right __LP64__ for headers,
- # but also need it in CFLAGS for linking programs, since automake
- # only uses CFLAGS when linking, not CPPFLAGS.
- # FIXME: Maybe should use cc_64_ldflags for this, but that would
- # need GMP_LDFLAGS used consistently by all the programs.
- #
- cc_64_cflags="+DD64"
- cc_64_cppflags="+DD64"
- cc_64_cflags_optlist="opt"
- cc_64_cflags_opt="+O3 +O2 +O1"
- gcc_64_cflags="$gcc_64_cflags -mlp64"
- ;;
- esac
- ;;
-
-
- # Motorola 68k
- #
- M68K_PATTERN)
- AC_DEFINE(HAVE_HOST_CPU_FAMILY_m68k)
- GMP_INCLUDE_MPN(m68k/m68k-defs.m4)
- gcc_cflags="$gcc_cflags $fomit_frame_pointer"
- gcc_cflags_optlist="arch"
-
- # gcc 2.7.2 knows -m68000, -m68020, -m68030, -m68040.
- # gcc 2.95 adds -mcpu32, -m68060.
- # FIXME: Maybe "-m68020 -mnobitfield" would suit cpu32 on 2.7.2.
- #
- case $host_cpu in
- m68020) gcc_cflags_arch="-m68020" ;;
- m68030) gcc_cflags_arch="-m68030" ;;
- m68040) gcc_cflags_arch="-m68040" ;;
- m68060) gcc_cflags_arch="-m68060 -m68000" ;;
- m68360) gcc_cflags_arch="-mcpu32 -m68000" ;;
- *) gcc_cflags_arch="-m68000" ;;
- esac
-
- # FIXME: m68k/mc68020 looks like it's ok for cpu32, but this wants to be
- # tested. Will need to introduce an m68k/cpu32 if m68k/mc68020 ever uses
- # the bitfield instructions.
- case $host_cpu in
- [m680[234]0 | m68360]) path="m68k/mc68020 m68k" ;;
- *) path="m68k" ;;
- esac
- ;;
-
-
- # Motorola 88k
- m88k*-*-*)
- path="m88k"
- ;;
- m88110*-*-*)
- gcc_cflags="$gcc_cflags -m88110"
- path="m88k/mc88110 m88k"
- ;;
-
-
- # National Semiconductor 32k
- ns32k*-*-*)
- path="ns32k"
- ;;
-
-
- # IRIX 5 and earlier can only run 32-bit o32.
- #
- # IRIX 6 and up always has a 64-bit mips CPU can run n32 or 64. n32 is
- # preferred over 64, but only because that's been the default in past
- # versions of GMP. The two are equally efficient.
- #
- # Linux kernel 2.2.13 arch/mips/kernel/irixelf.c has a comment about not
- # supporting n32 or 64.
- #
- # For reference, libtool (eg. 1.5.6) recognises the n32 ABI and knows the
- # right options to use when linking (both cc and gcc), so no need for
- # anything special from us.
- #
- mips*-*-*)
- abilist="o32"
- gcc_cflags_optlist="abi"
- gcc_cflags_abi="-mabi=32"
- gcc_testlist="gcc-mips-o32"
- path="mips32"
- cc_cflags="-O2 -o32" # no -g, it disables all optimizations
- # this suits both mips32 and mips64
- GMP_INCLUDE_MPN(mips32/mips-defs.m4)
-
- case $host in
- [mips64*-*-* | mips*-*-irix[6789]*])
- abilist="n32 64 o32"
-
- cclist_n32="gcc cc"
- gcc_n32_cflags="-O2 -mabi=n32"
- cc_n32_cflags="-O2 -n32" # no -g, it disables all optimizations
- limb_n32=longlong
- path_n32="mips64"
-
- cclist_64="gcc cc"
- gcc_64_cflags="$gcc_64_cflags -mabi=64"
- gcc_64_ldflags="-Wc,-mabi=64"
- cc_64_cflags="-O2 -64" # no -g, it disables all optimizations
- cc_64_ldflags="-Wc,-64"
- path_64="mips64"
- ;;
- esac
- ;;
-
-
- # Darwin (powerpc-apple-darwin1.3) has it's hacked gcc installed as cc.
- # Our usual "gcc in disguise" detection means gcc_cflags etc here gets
- # used.
- #
- # The darwin pre-compiling preprocessor is disabled with -no-cpp-precomp
- # since it doesn't like "__attribute__ ((mode (SI)))" etc in gmp-impl.h,
- # and so always ends up running the plain preprocessor anyway. This could
- # be done in CPPFLAGS rather than CFLAGS, but there's not many places
- # preprocessing is done separately, and this is only a speedup, the normal
- # preprocessor gets run if there's any problems.
- #
- # We used to use -Wa,-mppc with gcc, but can't remember exactly why.
- # Presumably it was for old versions of gcc where -mpowerpc doesn't put
- # the assembler in the right mode. In any case -Wa,-mppc is not good, for
- # instance -mcpu=604 makes recent gcc use -m604 to get access to the
- # "fsel" instruction, but a -Wa,-mppc overrides that, making code that
- # comes out with fsel fail.
- #
- # (Note also that the darwin assembler doesn't accept "-mppc", so any
- # -Wa,-mppc was used only if it worked. The right flag on darwin would be
- # "-arch ppc" or some such, but that's already the default.)
- #
- [powerpc*-*-* | power[3-9]-*-*])
- AC_DEFINE(HAVE_HOST_CPU_FAMILY_powerpc)
- HAVE_HOST_CPU_FAMILY_powerpc=1
- abilist="32"
- cclist="gcc cc"
- cc_cflags="-O2"
- gcc_32_cflags="$gcc_cflags -mpowerpc"
- gcc_cflags_optlist="precomp subtype asm cpu"
- gcc_cflags_precomp="-no-cpp-precomp"
- gcc_cflags_subtype="-force_cpusubtype_ALL" # for vmx on darwin
- gcc_cflags_asm=""
- gcc_cflags_cpu=""
- vmx_path=""
-
- # grab this object, though it's not a true cycle counter routine
- SPEED_CYCLECOUNTER_OBJ=powerpc.lo
- cyclecounter_size=0
-
- case $host_cpu in
- powerpc740 | powerpc750)
- path="powerpc32/750 powerpc32" ;;
- powerpc7400 | powerpc7410)
- path="powerpc32/vmx powerpc32/750 powerpc32" ;;
- [powerpc74[45]?])
- path="powerpc32/vmx powerpc32" ;;
- *)
- path="powerpc32" ;;
- esac
-
- case $host_cpu in
- powerpc401) gcc_cflags_cpu="-mcpu=401" ;;
- powerpc403) gcc_cflags_cpu="-mcpu=403"
- xlc_cflags_arch="-qarch=403 -qarch=ppc" ;;
- powerpc405) gcc_cflags_cpu="-mcpu=405" ;;
- powerpc505) gcc_cflags_cpu="-mcpu=505" ;;
- powerpc601) gcc_cflags_cpu="-mcpu=601"
- xlc_cflags_arch="-qarch=601 -qarch=ppc" ;;
- powerpc602) gcc_cflags_cpu="-mcpu=602"
- xlc_cflags_arch="-qarch=602 -qarch=ppc" ;;
- powerpc603) gcc_cflags_cpu="-mcpu=603"
- xlc_cflags_arch="-qarch=603 -qarch=ppc" ;;
- powerpc603e) gcc_cflags_cpu="-mcpu=603e -mcpu=603"
- xlc_cflags_arch="-qarch=603 -qarch=ppc" ;;
- powerpc604) gcc_cflags_cpu="-mcpu=604"
- xlc_cflags_arch="-qarch=604 -qarch=ppc" ;;
- powerpc604e) gcc_cflags_cpu="-mcpu=604e -mcpu=604"
- xlc_cflags_arch="-qarch=604 -qarch=ppc" ;;
- powerpc620) gcc_cflags_cpu="-mcpu=620" ;;
- powerpc630) gcc_cflags_cpu="-mcpu=630"
- xlc_cflags_arch="-qarch=pwr3"
- cpu_path="p3 p3-p7" ;;
- powerpc740) gcc_cflags_cpu="-mcpu=740" ;;
- powerpc7400 | powerpc7410)
- gcc_cflags_asm="-Wa,-maltivec"
- gcc_cflags_cpu="-mcpu=7400 -mcpu=750" ;;
- [powerpc74[45]?])
- gcc_cflags_asm="-Wa,-maltivec"
- gcc_cflags_cpu="-mcpu=7450" ;;
- powerpc750) gcc_cflags_cpu="-mcpu=750" ;;
- powerpc801) gcc_cflags_cpu="-mcpu=801" ;;
- powerpc821) gcc_cflags_cpu="-mcpu=821" ;;
- powerpc823) gcc_cflags_cpu="-mcpu=823" ;;
- powerpc860) gcc_cflags_cpu="-mcpu=860" ;;
- powerpc970) gcc_cflags_cpu="-mtune=970"
- xlc_cflags_arch="-qarch=970 -qarch=pwr3"
- vmx_path="powerpc64/vmx"
- cpu_path="p4 p3-p7" ;;
- power4) gcc_cflags_cpu="-mtune=power4"
- xlc_cflags_arch="-qarch=pwr4"
- cpu_path="p4 p3-p7" ;;
- power5) gcc_cflags_cpu="-mtune=power5 -mtune=power4"
- xlc_cflags_arch="-qarch=pwr5"
- cpu_path="p5 p4 p3-p7" ;;
- power6) gcc_cflags_cpu="-mtune=power6"
- xlc_cflags_arch="-qarch=pwr6"
- cpu_path="p6 p3-p7" ;;
- power7) gcc_cflags_cpu="-mtune=power7 -mtune=power5"
- xlc_cflags_arch="-qarch=pwr7 -qarch=pwr5"
- cpu_path="p7 p5 p4 p3-p7" ;;
- esac
-
- case $host in
- *-*-aix*)
- cclist="gcc xlc cc"
- gcc_32_cflags_maybe="-maix32"
- xlc_cflags="-O2 -qmaxmem=20000"
- xlc_cflags_optlist="arch"
- xlc_32_cflags_maybe="-q32"
- ar_32_flags="-X32"
- nm_32_flags="-X32"
- esac
-
- case $host in
- POWERPC64_PATTERN)
- case $host in
- *-*-aix*)
- # On AIX a true 64-bit ABI is available.
- # Need -Wc to pass object type flags through to the linker.
- abilist="aix64 $abilist"
- cclist_aix64="gcc xlc"
- gcc_aix64_cflags="-O2 -maix64 -mpowerpc64"
- gcc_aix64_cflags_optlist="cpu"
- gcc_aix64_ldflags="-Wc,-maix64"
- xlc_aix64_cflags="-O2 -q64 -qmaxmem=20000"
- xlc_aix64_cflags_optlist="arch"
- xlc_aix64_ldflags="-Wc,-q64"
- # Must indicate object type to ar and nm
- ar_aix64_flags="-X64"
- nm_aix64_flags="-X64"
- path_aix64=""
- for i in $cpu_path; do path_aix64="${path_aix64}powerpc64/mode64/$i "; done
- path_aix64="${path_aix64}powerpc64/mode64 $vmx_path powerpc64"
- # grab this object, though it's not a true cycle counter routine
- SPEED_CYCLECOUNTER_OBJ_aix64=powerpc64.lo
- cyclecounter_size_aix64=0
- ;;
- *-*-darwin*)
- # On Darwin we can use 64-bit instructions with a longlong limb,
- # but the chip still in 32-bit mode.
- # In theory this can be used on any OS which knows how to save
- # 64-bit registers in a context switch.
- #
- # Note that we must use -mpowerpc64 with gcc, since the
- # longlong.h macros expect limb operands in a single 64-bit
- # register, not two 32-bit registers as would be given for a
- # long long without -mpowerpc64. In theory we could detect and
- # accommodate both styles, but the proper 64-bit registers will
- # be fastest and are what we really want to use.
- #
- # One would think -mpowerpc64 would set the assembler in the right
- # mode to handle 64-bit instructions. But for that, also
- # -force_cpusubtype_ALL is needed.
- #
- # Do not use -fast for Darwin, it actually adds options
- # incompatible with a shared library.
- #
- abilist="mode64 mode32 $abilist"
- gcc_32_cflags_maybe="-m32"
- gcc_cflags_opt="-O3 -O2 -O1" # will this become used?
- cclist_mode32="gcc"
- gcc_mode32_cflags_maybe="-m32"
- gcc_mode32_cflags="-mpowerpc64"
- gcc_mode32_cflags_optlist="subtype cpu opt"
- gcc_mode32_cflags_subtype="-force_cpusubtype_ALL"
- gcc_mode32_cflags_opt="-O3 -O2 -O1"
- path_mode32="powerpc64/mode32 $vmx_path powerpc64"
- limb_mode32=longlong
- cclist_mode64="gcc"
- gcc_mode64_cflags="-m64"
- gcc_mode64_cflags_optlist="cpu opt"
- gcc_mode64_cflags_opt="-O3 -O2 -O1"
- path_mode64=""
- for i in $cpu_path; do path_mode64="${path_mode64}powerpc64/mode64/$i "; done
- path_mode64="${path_mode64}powerpc64/mode64 $vmx_path powerpc64"
- SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo
- cyclecounter_size_mode64=0
- any_mode64_testlist="sizeof-long-8"
- ;;
- *-*-linux* | *-*-*bsd*)
- # On GNU/Linux, assume the processor is in 64-bit mode. Some
- # environments have a gcc that is always in 64-bit mode, while
- # others require -m64, hence the use of cflags_maybe. The
- # sizeof-long-8 test checks the mode is right (for the no option
- # case).
- #
- # -mpowerpc64 is not used, since it should be the default in
- # 64-bit mode. (We need its effect for the various longlong.h
- # asm macros to be right of course.)
- #
- # gcc64 was an early port of gcc to 64-bit mode, but should be
- # obsolete before too long. We prefer plain gcc when it knows
- # 64-bits.
- #
- abilist="mode64 mode32 $abilist"
- gcc_32_cflags_maybe="-m32"
- cclist_mode32="gcc"
- gcc_mode32_cflags_maybe="-m32"
- gcc_mode32_cflags="-mpowerpc64"
- gcc_mode32_cflags_optlist="cpu opt"
- gcc_mode32_cflags_opt="-O3 -O2 -O1"
- path_mode32="powerpc64/mode32 $vmx_path powerpc64"
- limb_mode32=longlong
- cclist_mode64="gcc gcc64"
- gcc_mode64_cflags_maybe="-m64"
- gcc_mode64_cflags_optlist="cpu opt"
- gcc_mode64_cflags_opt="-O3 -O2 -O1"
- path_mode64=""
- for i in $cpu_path; do path_mode64="${path_mode64}powerpc64/mode64/$i "; done
- path_mode64="${path_mode64}powerpc64/mode64 $vmx_path powerpc64"
- SPEED_CYCLECOUNTER_OBJ_mode64=powerpc64.lo
- cyclecounter_size_mode64=0
- any_mode64_testlist="sizeof-long-8"
- ;;
- esac
- ;;
- esac
- ;;
-
-
- # POWER 32-bit
- [power-*-* | power[12]-*-* | power2sc-*-*])
- AC_DEFINE(HAVE_HOST_CPU_FAMILY_power)
- HAVE_HOST_CPU_FAMILY_power=1
- cclist="gcc"
- extra_functions="udiv_w_sdiv"
- path="power"
-
- # gcc 2.7.2 knows rios1, rios2, rsc
- #
- # -mcpu=rios2 can tickle an AIX assembler bug (see GMP_PROG_CC_WORKS) so
- # there needs to be a fallback to just -mpower.
- #
- gcc_cflags_optlist="cpu"
- case $host in
- power-*-*) gcc_cflags_cpu="-mcpu=power -mpower" ;;
- power1-*-*) gcc_cflags_cpu="-mcpu=rios1 -mpower" ;;
- power2-*-*) gcc_cflags_cpu="-mcpu=rios2 -mpower" ;;
- power2sc-*-*) gcc_cflags_cpu="-mcpu=rsc -mpower" ;;
- esac
- case $host in
- *-*-aix*)
- cclist="gcc xlc"
- xlc_cflags="-O2 -qarch=pwr -qmaxmem=20000"
- ;;
- esac
- ;;
-
-
- pyramid-*-*)
- path="pyr"
- ;;
-
-
- # IBM System/390 and z/Architecture
- S390_PATTERN | S390X_PATTERN)
- abilist="32"
- gcc_cflags="$gcc_cflags $fomit_frame_pointer"
- gcc_cflags_optlist="arch"
- path="s390_32"
- extra_functions="udiv_w_sdiv"
- gcc_32_cflags_maybe="-m31"
-
- case $host_cpu in
- s390)
- ;;
- z900 | z900esa)
- cpu="z900"
- gccarch="$cpu"
- path="s390_32/esame/$cpu s390_32/esame s390_32"
- gcc_cflags_arch="-march=$gccarch"
- AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
- AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
- extra_functions=""
- ;;
- z990 | z990esa)
- cpu="z990"
- gccarch="$cpu"
- path="s390_32/esame/$cpu s390_32/esame s390_32"
- gcc_cflags_arch="-march=$gccarch"
- AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
- AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
- extra_functions=""
- ;;
- z9 | z9esa)
- cpu="z9"
- gccarch="z9-109"
- path="s390_32/esame/$cpu s390_32/esame s390_32"
- gcc_cflags_arch="-march=$gccarch"
- AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
- AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
- extra_functions=""
- ;;
- z10 | z10esa)
- cpu="z10"
- gccarch="z10"
- path="s390_32/esame/$cpu s390_32/esame s390_32"
- gcc_cflags_arch="-march=$gccarch"
- AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
- AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
- extra_functions=""
- ;;
- z196 | z196esa)
- cpu="z196"
- gccarch="z196"
- path="s390_32/esame/$cpu s390_32/esame s390_32"
- gcc_cflags_arch="-march=$gccarch"
- AC_DEFINE_UNQUOTED(HAVE_HOST_CPU_s390_$cpu)
- AC_DEFINE(HAVE_HOST_CPU_s390_zarch)
- extra_functions=""
- ;;
- esac
-
- case $host in
- S390X_PATTERN)
- abilist="64 32"
- cclist_64="gcc"
- gcc_64_cflags_optlist="arch"
- gcc_64_cflags="$gcc_cflags -m64"
- path_64="s390_64/$host_cpu s390_64"
- extra_functions=""
- ;;
- esac
- ;;
-
-
- sh-*-*) path="sh" ;;
- [sh[2-4]-*-*]) path="sh/sh2 sh" ;;
-
-
- *sparc*-*-*)
- # sizeof(long)==4 or 8 is tested, to ensure we get the right ABI. We've
- # had various bug reports where users have set CFLAGS for their desired
- # mode, but not set our ABI. For some reason it's sparc where this
- # keeps coming up, presumably users there are accustomed to driving the
- # compiler mode that way. The effect of our testlist setting is to
- # reject ABI=64 in favour of ABI=32 if the user has forced the flags to
- # 32-bit mode.
- #
- abilist="32"
- cclist="gcc acc cc"
- any_testlist="sizeof-long-4"
- GMP_INCLUDE_MPN(sparc32/sparc-defs.m4)
-
- case $host_cpu in
- sparcv8 | microsparc | turbosparc)
- path="sparc32/v8 sparc32" ;;
- supersparc)
- path="sparc32/v8/supersparc sparc32/v8 sparc32" ;;
- sparc64 | sparcv9* | ultrasparc*)
- path="sparc32/v9 sparc32/v8 sparc32" ;;
- *)
- path="sparc32" ;;
- esac
-
- # gcc 2.7.2 doesn't know about v9 and doesn't pass -xarch=v8plus to the
- # assembler. Add it explicitly since the solaris assembler won't accept
- # our sparc32/v9 asm code without it. gas accepts -xarch=v8plus too, so
- # it can be in the cflags unconditionally (though gas doesn't need it).
- #
- # gcc -m32 is needed to force 32-bit mode on a dual-ABI system, but past
- # gcc doesn't know that flag, hence cflags_maybe. Note that -m32 cannot
- # be done through the optlist since the plain cflags would be run first
- # and we don't want to require the default mode (whatever it is) works.
- #
- # Note it's gcc_32_cflags_maybe and not gcc_cflags_maybe because the
- # latter would be used in the 64-bit ABI on systems like "*bsd" where
- # abilist="64" only.
- #
- case $host_cpu in
- sparc64 | sparcv9* | ultrasparc*)
- gcc_cflags="$gcc_cflags -Wa,-xarch=v8plus" ;;
- *)
- gcc_cflags="$gcc_cflags" ;;
- esac
- gcc_32_cflags_maybe="-m32"
- gcc_cflags_optlist="cpu"
-
- # gcc 2.7.2 knows -mcypress, -msupersparc, -mv8, -msparclite.
- # gcc 2.95 knows -mcpu= v7, hypersparc, sparclite86x, f930, f934,
- # sparclet, tsc701, v9, ultrasparc. A warning is given that the
- # plain -m forms will disappear.
- # gcc 3.0 adds nothing.
- # gcc 3.1 adds nothing.
- # gcc 3.2 adds nothing.
- # gcc 3.3 adds ultrasparc3.
- #
- case $host_cpu in
- supersparc) gcc_cflags_cpu="-mcpu=supersparc -msupersparc" ;;
- sparcv8 | microsparc | turbosparc)
- gcc_cflags_cpu="-mcpu=v8 -mv8" ;;
- sparc64 | sparcv9*) gcc_cflags_cpu="-mcpu=v9 -mv8" ;;
- ultrasparc3) gcc_cflags_cpu="-mcpu=ultrasparc3 -mcpu=ultrasparc -mv8" ;;
- ultrasparc*) gcc_cflags_cpu="-mcpu=ultrasparc -mv8" ;;
- *) gcc_cflags_cpu="-mcpu=v7 -mcypress" ;;
- esac
-
- # SunPRO cc and acc, and SunOS bundled cc
- case $host in
- *-*-solaris* | *-*-sunos*)
- # Note no -g, it disables all optimizations.
- cc_cflags=
- cc_cflags_optlist="opt arch cpu"
-
- # SunOS cc doesn't know -xO4, fallback to -O2.
- cc_cflags_opt="-xO4 -O2"
-
- # SunOS cc doesn't know -xarch, apparently always generating v7
- # code, so make this optional
- case $host_cpu in
- sparcv8 | microsparc | supersparc | turbosparc)
- cc_cflags_arch="-xarch=v8" ;;
- sparc64 | sparcv9* | ultrasparc*) cc_cflags_arch="-xarch=v8plus" ;;
- *) cc_cflags_arch="-xarch=v7" ;;
- esac
-
- # SunOS cc doesn't know -xchip and doesn't seem to have an equivalent.
- # SunPRO cc 5 recognises -xchip=generic, old, super, super2, micro,
- # micro2, hyper, hyper2, powerup, ultra, ultra2, ultra2i.
- # SunPRO cc 6 adds -xchip=ultra2e, ultra3cu.
- #
- # FIXME: Which of ultra, ultra2 or ultra2i is the best fallback for
- # ultrasparc3?
- #
- case $host_cpu in
- supersparc) cc_cflags_cpu="-xchip=super" ;;
- microsparc) cc_cflags_cpu="-xchip=micro" ;;
- turbosparc) cc_cflags_cpu="-xchip=micro2" ;;
- ultrasparc) cc_cflags_cpu="-xchip=ultra" ;;
- ultrasparc2) cc_cflags_cpu="-xchip=ultra2" ;;
- ultrasparc2i) cc_cflags_cpu="-xchip=ultra2i" ;;
- ultrasparc3) cc_cflags_cpu="-xchip=ultra3 -xchip=ultra" ;;
- *) cc_cflags_cpu="-xchip=generic" ;;
- esac
- esac
-
- case $host_cpu in
- sparc64 | sparcv9* | ultrasparc*)
- case $host in
- # Solaris 6 and earlier cannot run ABI=64 since it doesn't save
- # registers properly, so ABI=32 is left as the only choice.
- #
- [*-*-solaris2.[0-6] | *-*-solaris2.[0-6].*]) ;;
-
- # BSD sparc64 ports are 64-bit-only systems, so ABI=64 is the only
- # choice. In fact they need no special compiler flags, gcc -m64
- # is the default, but it doesn't hurt to add it. v9 CPUs always
- # use the sparc64 port, since the plain 32-bit sparc ports don't
- # run on a v9.
- #
- *-*-*bsd*) abilist="64" ;;
-
- # For all other systems, we try both 64 and 32.
- #
- # GNU/Linux sparc64 has only recently gained a 64-bit user mode.
- # In the past sparc64 meant a v9 cpu, but there were no 64-bit
- # operations in user mode. We assume that if "gcc -m64" works
- # then the system is suitable. Hopefully even if someone attempts
- # to put a new gcc and/or glibc on an old system it won't run.
- #
- *) abilist="64 32" ;;
- esac
-
- case $host_cpu in
- ultrasparc | ultrasparc2 | ultrasparc2i)
- path_64="sparc64/ultrasparc12 sparc64" ;;
- [ultrasparc[34]])
- path_64="sparc64/ultrasparc34 sparc64/ultrasparc1234 sparc64" ;;
- [ultrasparct[1234]])
- path_64="sparc64" ;;
- *)
- path_64="sparc64"
- esac
-
- cclist_64="gcc"
- any_64_testlist="sizeof-long-8"
-
- # gcc -mptr64 is probably implied by -m64, but we're not sure if
- # this was always so. On Solaris in the past we always used both
- # "-m64 -mptr64".
- #
- # gcc -Wa,-xarch=v9 is thought to be necessary in some cases on
- # solaris, but it would seem likely that if gcc is going to generate
- # 64-bit code it will have to add that option itself where needed.
- # An extra copy of this option should be harmless though, but leave
- # it until we're sure. (Might want -xarch=v9a or -xarch=v9b for the
- # higher cpu types instead.)
- #
- gcc_64_cflags="$gcc_64_cflags -m64 -mptr64"
- gcc_64_ldflags="-Wc,-m64"
- gcc_64_cflags_optlist="cpu"
-
- case $host in
- *-*-solaris*)
- # Sun cc.
- #
- # We used to have -fast and some fixup options here, but it
- # recurrently caused problems with miscompilation. Of course,
- # -fast is documented as miscompiling things for the sake of speed.
- #
- cclist_64="$cclist_64 cc"
- cc_64_cflags="-xO3 -xarch=v9"
- cc_64_cflags_optlist="cpu"
- ;;
- esac
-
- # using the v9 %tick register
- SPEED_CYCLECOUNTER_OBJ_32=sparcv9.lo
- SPEED_CYCLECOUNTER_OBJ_64=sparcv9.lo
- cyclecounter_size_32=2
- cyclecounter_size_64=2
- ;;
- esac
- ;;
-
-
- # VAX
- vax*-*-*)
- # Currently gcc (version 3.0) on vax always uses a frame pointer
- # (config/vax/vax.h FRAME_POINTER_REQUIRED=1), so -fomit-frame-pointer
- # will be ignored.
- #
- gcc_cflags="$gcc_cflags $fomit_frame_pointer"
- path="vax"
- extra_functions="udiv_w_sdiv"
- ;;
-
-
- # AMD and Intel x86 configurations, including AMD64
- #
- # Rumour has it gcc -O2 used to give worse register allocation than just
- # -O, but lets assume that's no longer true.
- #
- # -m32 forces 32-bit mode on a bi-arch 32/64 amd64 build of gcc. -m64 is
- # the default in such a build (we think), so -m32 is essential for ABI=32.
- # This is, of course, done for any $host_cpu, not just x86_64, so we can
- # get such a gcc into the right mode to cross-compile to say i486-*-*.
- #
- # -m32 is not available in gcc 2.95 and earlier, hence cflags_maybe to use
- # it when it works. We check sizeof(long)==4 to ensure we get the right
- # mode, in case -m32 has failed not because it's an old gcc, but because
- # it's a dual 32/64-bit gcc without a 32-bit libc, or whatever.
- #
- X86_PATTERN | X86_64_PATTERN)
- abilist="32"
- cclist="gcc icc cc"
- gcc_cflags="$gcc_cflags $fomit_frame_pointer"
- gcc_32_cflags_maybe="-m32"
- icc_cflags="-no-gcc"
- icc_cflags_optlist="opt"
- icc_cflags_opt="-O3 -O2 -O1"
- any_32_testlist="sizeof-long-4"
- CALLING_CONVENTIONS_OBJS='x86call.lo x86check$U.lo'
-
- # Availability of rdtsc is checked at run-time.
- SPEED_CYCLECOUNTER_OBJ=pentium.lo
-
- # gcc 2.7.2 only knows i386 and i486, using -m386 or -m486. These
- # represent -mcpu= since -m486 doesn't generate 486 specific insns.
- # gcc 2.95 adds k6, pentium and pentiumpro, and takes -march= and -mcpu=.
- # gcc 3.0 adds athlon.
- # gcc 3.1 adds k6-2, k6-3, pentium-mmx, pentium2, pentium3, pentium4,
- # athlon-tbird, athlon-4, athlon-xp, athlon-mp.
- # gcc 3.2 adds winchip2.
- # gcc 3.3 adds winchip-c6.
- # gcc 3.3.1 from mandrake adds k8 and knows -mtune.
- # gcc 3.4 adds c3, c3-2, k8, and deprecates -mcpu in favour of -mtune.
- #
- # In gcc 2.95.[0123], -march=pentiumpro provoked a stack slot bug in an
- # old version of mpz/powm.c. Seems to be fine with the current code, so
- # no need for any restrictions on that option.
- #
- # -march=pentiumpro can fail if the assembler doesn't know "cmov"
- # (eg. solaris 2.8 native "as"), so always have -march=pentium after
- # that as a fallback.
- #
- # -march=pentium4 and -march=k8 enable SSE2 instructions, which may or
- # may not be supported by the assembler and/or the OS, and is bad in gcc
- # prior to 3.3. The tests will reject these if no good, so fallbacks
- # like "-march=pentium4 -mno-sse2" are given to try also without SSE2.
- # Note the relevant -march types are listed in the optflags handling
- # below, be sure to update there if adding new types emitting SSE2.
- #
- # -mtune is used at the start of each cpu option list to give something
- # gcc 3.4 will use, thereby avoiding warnings from -mcpu. -mcpu forms
- # are retained for use by prior gcc. For example pentium has
- # "-mtune=pentium -mcpu=pentium ...", the -mtune is for 3.4 and the
- # -mcpu for prior. If there's a brand new choice in 3.4 for a chip,
- # like k8 for x86_64, then it can be the -mtune at the start, no need to
- # duplicate anything.
- #
- gcc_cflags_optlist="cpu arch"
- case $host_cpu in
- i386*)
- gcc_cflags_cpu="-mtune=i386 -mcpu=i386 -m386"
- gcc_cflags_arch="-march=i386"
- path="x86"
- ;;
- i486*)
- gcc_cflags_cpu="-mtune=i486 -mcpu=i486 -m486"
- gcc_cflags_arch="-march=i486"
- path="x86/i486 x86"
- ;;
- i586 | pentium)
- gcc_cflags_cpu="-mtune=pentium -mcpu=pentium -m486"
- gcc_cflags_arch="-march=pentium"
- path="x86/pentium x86"
- ;;
- pentiummmx)
- gcc_cflags_cpu="-mtune=pentium-mmx -mcpu=pentium-mmx -mcpu=pentium -m486"
- gcc_cflags_arch="-march=pentium-mmx -march=pentium"
- path="x86/pentium/mmx x86/pentium x86"
- ;;
- i686 | pentiumpro)
- gcc_cflags_cpu="-mtune=pentiumpro -mcpu=pentiumpro -mcpu=i486 -m486"
- gcc_cflags_arch="-march=pentiumpro -march=pentium"
- path="x86/p6 x86"
- ;;
- pentium2)
- gcc_cflags_cpu="-mtune=pentium2 -mcpu=pentium2 -mcpu=pentiumpro -mcpu=i486 -m486"
- gcc_cflags_arch="-march=pentium2 -march=pentiumpro -march=pentium"
- path="x86/p6/mmx x86/p6 x86"
- ;;
- pentium3)
- gcc_cflags_cpu="-mtune=pentium3 -mcpu=pentium3 -mcpu=pentiumpro -mcpu=i486 -m486"
- gcc_cflags_arch="-march=pentium3 -march=pentiumpro -march=pentium"
- path="x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
- ;;
- pentiumm)
- gcc_cflags_cpu="-mtune=pentium3 -mcpu=pentium3 -mcpu=pentiumpro -mcpu=i486 -m486"
- gcc_cflags_arch="-march=pentium3 -march=pentiumpro -march=pentium"
- path="x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
- ;;
- k6)
- gcc_cflags_cpu="-mtune=k6 -mcpu=k6 -mcpu=i486 -m486"
- gcc_cflags_arch="-march=k6"
- path="x86/k6/mmx x86/k6 x86"
- ;;
- k62)
- gcc_cflags_cpu="-mtune=k6-2 -mcpu=k6-2 -mcpu=k6 -mcpu=i486 -m486"
- gcc_cflags_arch="-march=k6-2 -march=k6"
- path="x86/k6/k62mmx x86/k6/mmx x86/k6 x86"
- ;;
- k63)
- gcc_cflags_cpu="-mtune=k6-3 -mcpu=k6-3 -mcpu=k6 -mcpu=i486 -m486"
- gcc_cflags_arch="-march=k6-3 -march=k6"
- path="x86/k6/k62mmx x86/k6/mmx x86/k6 x86"
- ;;
- geode)
- gcc_cflags_cpu="-mtune=k6-3 -mcpu=k6-3 -mcpu=k6 -mcpu=i486 -m486"
- gcc_cflags_arch="-march=k6-3 -march=k6"
- path="x86/geode x86/k6/k62mmx x86/k6/mmx x86/k6 x86"
- ;;
- athlon)
- # Athlon instruction costs are close to P6 (3 cycle load latency,
- # 4-6 cycle mul, 40 cycle div, pairable adc, etc) so if gcc doesn't
- # know athlon (eg. 2.95.2 doesn't) then fall back on pentiumpro.
- gcc_cflags_cpu="-mtune=athlon -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486"
- gcc_cflags_arch="-march=athlon -march=pentiumpro -march=pentium"
- path="x86/k7/mmx x86/k7 x86"
- ;;
- i786 | pentium4)
- # pentiumpro is the primary fallback when gcc doesn't know pentium4.
- # This gets us cmov to eliminate branches. Maybe "athlon" would be
- # a possibility on gcc 3.0.
- #
- gcc_cflags_cpu="-mtune=pentium4 -mcpu=pentium4 -mcpu=pentiumpro -mcpu=i486 -m486"
- gcc_cflags_arch="-march=pentium4 -march=pentium4~-mno-sse2 -march=pentiumpro -march=pentium"
- gcc_64_cflags_cpu="-mtune=nocona"
- path="x86/pentium4/sse2 x86/pentium4/mmx x86/pentium4 x86"
- path_64="x86_64/pentium4 x86_64"
- ;;
- viac32)
- # Not sure of the best fallbacks here for -mcpu.
- # c3-2 has sse and mmx, so pentium3 is good for -march.
- gcc_cflags_cpu="-mtune=c3-2 -mcpu=c3-2 -mcpu=i486 -m486"
- gcc_cflags_arch="-march=c3-2 -march=pentium3 -march=pentiumpro -march=pentium"
- path="x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
- ;;
- viac3*)
- # Not sure of the best fallbacks here.
- gcc_cflags_cpu="-mtune=c3 -mcpu=c3 -mcpu=i486 -m486"
- gcc_cflags_arch="-march=c3 -march=pentium-mmx -march=pentium"
- path="x86/pentium/mmx x86/pentium x86"
- ;;
- athlon64 | k8 | x86_64)
- gcc_cflags_cpu="-mtune=k8 -mcpu=athlon -mcpu=pentiumpro -mcpu=i486 -m486"
- gcc_cflags_arch="-march=k8 -march=k8~-mno-sse2 -march=athlon -march=pentiumpro -march=pentium"
- path="x86/k8 x86/k7/mmx x86/k7 x86"
- path_64="x86_64/k8 x86_64"
- ;;
- k10)
- gcc_cflags_cpu="-mtune=amdfam10 -mtune=k8"
- gcc_cflags_arch="-march=amdfam10 -march=k8 -march=k8~-mno-sse2"
- path="x86/k10 x86/k8 x86/k7/mmx x86/k7 x86"
- path_64="x86_64/k10 x86_64/k8 x86_64"
- ;;
- bobcat)
- gcc_cflags_cpu="-mtune=btver1 -mtune=amdfam10 -mtune=k8"
- gcc_cflags_arch="-march=btver1 -march=amdfam10 -march=k8 -march=k8~-mno-sse2"
- path="x86/bobcat x86/k7/mmx x86/k7 x86"
- path_64="x86_64/bobcat x86_64/k10 x86_64/k8 x86_64"
- ;;
- bulldozer | bd1)
- gcc_cflags_cpu="-mtune=bdver1 -mtune=amdfam10 -mtune=k8"
- gcc_cflags_arch="-march=bdver1 -march=amdfam10 -march=k8 -march=k8~-mno-sse2"
- path="x86/bd1 x86/k7/mmx x86/k7 x86"
- path_64="x86_64/bd1 x86_64"
- ;;
- core2)
- gcc_cflags_cpu="-mtune=core2 -mtune=k8"
- gcc_cflags_arch="-march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
- path="x86/core2 x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
- path_64="x86_64/core2 x86_64"
- ;;
- corei | coreinhm | coreiwsm)
- gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
- gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
- path="x86/coreinhm x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
- path_64="x86_64/coreinhm x86_64/core2 x86_64"
- ;;
- coreisbr)
- gcc_cflags_cpu="-mtune=corei7 -mtune=core2 -mtune=k8"
- gcc_cflags_arch="-march=corei7 -march=core2 -march=core2~-mno-sse2 -march=k8 -march=k8~-mno-sse2"
- path="x86/coreisbr x86/p6/sse2 x86/p6/p3mmx x86/p6/mmx x86/p6 x86"
- path_64="x86_64/coreisbr x86_64/coreinhm x86_64/core2 x86_64"
- ;;
- atom)
- gcc_cflags_cpu="-mtune=atom -mtune=pentium3"
- gcc_cflags_arch="-march=atom -march=pentium3"
- path="x86/atom/sse2 x86/atom/mmx x86/atom x86"
- path_64="x86_64/atom x86_64"
- ;;
- nano)
- gcc_cflags_cpu="-mtune=nano"
- gcc_cflags_arch="-march=nano"
- path="x86/nano x86"
- path_64="x86_64/nano x86_64"
- ;;
- *)
- gcc_cflags_cpu="-mtune=i486 -mcpu=i486 -m486"
- gcc_cflags_arch="-march=i486"
- path="x86"
- path_64="x86_64"
- ;;
- esac
-
- case $host in
- X86_64_PATTERN)
- cclist_64="gcc"
- gcc_64_cflags="$gcc_64_cflags -m64"
- gcc_64_cflags_optlist="cpu arch"
- CALLING_CONVENTIONS_OBJS_64='amd64call.lo amd64check$U.lo'
- SPEED_CYCLECOUNTER_OBJ_64=x86_64.lo
- cyclecounter_size_64=2
- abilist="64 32"
-
- case $host in
- *-*-solaris*)
- # Sun cc.
- cclist_64="$cclist_64 cc"
- cc_64_cflags="-xO3 -m64"
- ;;
- *-*-mingw* | *-*-cygwin)
- limb_64=longlong
- path_64="" # Windows amd64 calling conventions are *different*
- # Silence many pedantic warnings for w64. FIXME.
- gcc_64_cflags="$gcc_64_cflags -std=gnu99"
- ;;
- esac
- ;;
- esac
- ;;
-
-
- # FIXME: z8kx won't get through config.sub. Could make 16 versus 32 bit
- # limb an ABI option perhaps.
- z8kx*-*-*)
- path="z8000x"
- extra_functions="udiv_w_sdiv"
- ;;
- z8k*-*-*)
- path="z8000"
- extra_functions="udiv_w_sdiv"
- ;;
-
-
- # Special CPU "none" selects generic C. -DNO_ASM is used to disable gcc
- # asm blocks in longlong.h (since they're driven by cpp pre-defined
- # symbols like __alpha rather than the configured $host_cpu).
- #
- none-*-*)
- abilist="long longlong"
- cclist_long=$cclist
- gcc_long_cflags=$gcc_cflags
- gcc_long_cppflags="-DNO_ASM"
- cc_long_cflags=$cc_cflags
- cclist_longlong=$cclist
- gcc_longlong_cflags=$gcc_cflags
- gcc_longlong_cppflags="-DNO_ASM"
- cc_longlong_cflags=$cc_cflags
- limb_longlong=longlong
- ;;
-
-esac
-
-# mingw can be built by the cygwin gcc if -mno-cygwin is added. For
-# convenience add this automatically if it works. Actual mingw gcc accepts
-# -mno-cygwin too, but of course is the default. mingw only runs on the
-# x86s, but allow any CPU here so as to catch "none" too.
-#
-case $host in
- *-*-mingw*)
- gcc_cflags_optlist="$gcc_cflags_optlist nocygwin"
- gcc_cflags_nocygwin="-mno-cygwin"
- ;;
-esac
-
-
-CFLAGS_or_unset=${CFLAGS-'(unset)'}
-CPPFLAGS_or_unset=${CPPFLAGS-'(unset)'}
-
-cat >&AC_FD_CC <<EOF
-User:
-ABI=$ABI
-CC=$CC
-CFLAGS=$CFLAGS_or_unset
-CPPFLAGS=$CPPFLAGS_or_unset
-MPN_PATH=$MPN_PATH
-GMP:
-abilist=$abilist
-cclist=$cclist
-EOF
-
-
-test_CFLAGS=${CFLAGS+set}
-test_CPPFLAGS=${CPPFLAGS+set}
-
-for abi in $abilist; do
- abi_last="$abi"
-done
-
-# If the user specifies an ABI then it must be in $abilist, after that
-# $abilist is restricted to just that choice.
-#
-if test -n "$ABI"; then
- found=no
- for abi in $abilist; do
- if test $abi = "$ABI"; then found=yes; break; fi
- done
- if test $found = no; then
- AC_MSG_ERROR([ABI=$ABI is not among the following valid choices: $abilist])
- fi
- abilist="$ABI"
-fi
-
-found_compiler=no
-
-for abi in $abilist; do
-
- echo "checking ABI=$abi"
-
- # Suppose abilist="64 32", then for abi=64, will have abi1="_64" and
- # abi2="_64". For abi=32, will have abi1="_32" and abi2="". This is how
- # $gcc_cflags becomes a fallback for $gcc_32_cflags (the last in the
- # abilist), but there's no fallback for $gcc_64_cflags.
- #
- abi1=[`echo _$abi | sed 's/[.]//g'`]
- if test $abi = $abi_last; then abi2=; else abi2="$abi1"; fi
-
- # Compiler choices under this ABI
- eval cclist_chosen=\"\$cclist$abi1\"
- test -n "$cclist_chosen" || eval cclist_chosen=\"\$cclist$abi2\"
-
- # If there's a user specified $CC then don't use a list for
- # $cclist_chosen, just a single value for $ccbase.
- #
- if test -n "$CC"; then
-
- # The first word of $CC, stripped of any directory. For instance
- # CC="/usr/local/bin/gcc -pipe" will give "gcc".
- #
- for ccbase in $CC; do break; done
- ccbase=`echo $ccbase | sed 's:.*/::'`
-
- # If this $ccbase is in $cclist_chosen then it's a compiler we know and
- # we can do flags defaulting with it. If not, then $cclist_chosen is
- # set to "unrecognised" so no default flags are used.
- #
- # "unrecognised" is used to avoid bad effects with eval if $ccbase has
- # non-symbol characters. For instance ccbase=my+cc would end up with
- # something like cflags="$my+cc_cflags" which would give
- # cflags="+cc_cflags" rather than the intended empty string for an
- # unknown compiler.
- #
- found=unrecognised
- for i in $cclist_chosen; do
- if test "$ccbase" = $i; then
- found=$ccbase
- break
- fi
- done
- cclist_chosen=$found
- fi
-
- for ccbase in $cclist_chosen; do
-
- # When cross compiling, look for a compiler with the $host_alias as a
- # prefix, the same way that AC_CHECK_TOOL does. But don't do this to a
- # user-selected $CC.
- #
- # $cross_compiling will be yes/no/maybe at this point. Do the host
- # prefixing for "maybe" as well as "yes".
- #
- if test "$cross_compiling" != no && test -z "$CC"; then
- cross_compiling_prefix="${host_alias}-"
- fi
-
- for ccprefix in $cross_compiling_prefix ""; do
-
- cc="$CC"
- test -n "$cc" || cc="$ccprefix$ccbase"
-
- # If the compiler is gcc but installed under another name, then change
- # $ccbase so as to use the flags we know for gcc. This helps for
- # instance when specifying CC=gcc272 on Debian GNU/Linux, or the
- # native cc which is really gcc on NeXT or MacOS-X.
- #
- # FIXME: There's a slight misfeature here. If cc is actually gcc but
- # gcc is not a known compiler under this $abi then we'll end up
- # testing it with no flags and it'll work, but chances are it won't be
- # in the right mode for the ABI we desire. Let's quietly hope this
- # doesn't happen.
- #
- if test $ccbase != gcc; then
- GMP_PROG_CC_IS_GNU($cc,ccbase=gcc)
- fi
-
- # Similarly if the compiler is IBM xlc but invoked as cc or whatever
- # then change $ccbase and make the default xlc flags available.
- if test $ccbase != xlc; then
- GMP_PROG_CC_IS_XLC($cc,ccbase=xlc)
- fi
-
- # acc was Sun's first unbundled compiler back in the SunOS days, or
- # something like that, but today its man page says it's not meant to
- # be used directly (instead via /usr/ucb/cc). The options are pretty
- # much the same as the main SunPRO cc, so share those configs.
- #
- case $host in
- *sparc*-*-solaris* | *sparc*-*-sunos*)
- if test "$ccbase" = acc; then ccbase=cc; fi ;;
- esac
-
- for tmp_cflags_maybe in yes no; do
- eval cflags=\"\$${ccbase}${abi1}_cflags\"
- test -n "$cflags" || eval cflags=\"\$${ccbase}${abi2}_cflags\"
-
- if test "$tmp_cflags_maybe" = yes; then
- # don't try cflags_maybe when the user set CFLAGS
- if test "$test_CFLAGS" = set; then continue; fi
- eval cflags_maybe=\"\$${ccbase}${abi1}_cflags_maybe\"
- test -n "$cflags_maybe" || eval cflags_maybe=\"\$${ccbase}${abi2}_cflags_maybe\"
- # don't try cflags_maybe if there's nothing set
- if test -z "$cflags_maybe"; then continue; fi
- cflags="$cflags_maybe $cflags"
- fi
-
- # Any user CFLAGS, even an empty string, takes precedence
- if test "$test_CFLAGS" = set; then cflags=$CFLAGS; fi
-
- # Any user CPPFLAGS, even an empty string, takes precedence
- eval cppflags=\"\$${ccbase}${abi1}_cppflags\"
- test -n "$cppflags" || eval cppflags=\"\$${ccbase}${abi2}_cppflags\"
- if test "$test_CPPFLAGS" = set; then cppflags=$CPPFLAGS; fi
-
- # --enable-profiling adds -p/-pg even to user-specified CFLAGS.
- # This is convenient, but it's perhaps a bit naughty to modify user
- # CFLAGS.
- case "$enable_profiling" in
- prof) cflags="$cflags -p" ;;
- gprof) cflags="$cflags -pg" ;;
- instrument) cflags="$cflags -finstrument-functions" ;;
- esac
-
- GMP_PROG_CC_WORKS($cc $cflags $cppflags,,continue)
-
- # If we're supposed to be using a "long long" for a limb, check that
- # it works.
- eval limb_chosen=\"\$limb$abi1\"
- test -n "$limb_chosen" || eval limb_chosen=\"\$limb$abi2\"
- if test "$limb_chosen" = longlong; then
- GMP_PROG_CC_WORKS_LONGLONG($cc $cflags $cppflags,,continue)
- fi
-
- # The tests to perform on this $cc, if any
- eval testlist=\"\$${ccbase}${abi1}_testlist\"
- test -n "$testlist" || eval testlist=\"\$${ccbase}${abi2}_testlist\"
- test -n "$testlist" || eval testlist=\"\$any${abi1}_testlist\"
- test -n "$testlist" || eval testlist=\"\$any${abi2}_testlist\"
-
- testlist_pass=yes
- for tst in $testlist; do
- case $tst in
- hpc-hppa-2-0) GMP_HPC_HPPA_2_0($cc,,testlist_pass=no) ;;
- gcc-arm-umodsi) GMP_GCC_ARM_UMODSI($cc,,testlist_pass=no) ;;
- gcc-mips-o32) GMP_GCC_MIPS_O32($cc,,testlist_pass=no) ;;
- hppa-level-2.0) GMP_HPPA_LEVEL_20($cc $cflags,,testlist_pass=no) ;;
- sizeof*) GMP_C_TEST_SIZEOF($cc $cflags,$tst,,testlist_pass=no) ;;
- esac
- if test $testlist_pass = no; then break; fi
- done
-
- if test $testlist_pass = yes; then
- found_compiler=yes
- break
- fi
- done
-
- if test $found_compiler = yes; then break; fi
- done
-
- if test $found_compiler = yes; then break; fi
- done
-
- if test $found_compiler = yes; then break; fi
-done
-
-
-# If we recognised the CPU, as indicated by $path being set, then insist
-# that we have a working compiler, either from our $cclist choices or from
-# $CC. We can't let AC_PROG_CC look around for a compiler because it might
-# find one that we've rejected (for not supporting the modes our asm code
-# demands, etc).
-#
-# If we didn't recognise the CPU (and this includes host_cpu=none), then
-# fall through and let AC_PROG_CC look around for a compiler too. This is
-# mostly in the interests of following a standard autoconf setup, after all
-# we've already tested cc and gcc adequately (hopefully). As of autoconf
-# 2.50 the only thing AC_PROG_CC really adds is a check for "cl" (Microsoft
-# C on MS-DOS systems).
-#
-if test $found_compiler = no && test -n "$path"; then
- AC_MSG_ERROR([could not find a working compiler, see config.log for details])
-fi
-
-case $host in
- X86_PATTERN | X86_64_PATTERN)
- # If the user asked for a fat build, override the path and flags set above
- if test $enable_fat = yes; then
- gcc_cflags_cpu=""
- gcc_cflags_arch=""
-
- if test "$abi" = 32; then
- extra_functions="$extra_functions fat fat_entry"
- path="x86/fat x86"
- fat_path="x86 x86/fat x86/i486
- x86/k6 x86/k6/mmx x86/k6/k62mmx
- x86/k7 x86/k7/mmx
- x86/pentium x86/pentium/mmx
- x86/p6 x86/p6/mmx x86/p6/p3mmx x86/p6/sse2
- x86/pentium4 x86/pentium4/mmx x86/pentium4/sse2"
- fi
-
- if test "$abi" = 64; then
- gcc_64_cflags=""
- extra_functions_64="$extra_functions_64 fat fat_entry"
- path_64="x86_64/fat x86_64"
- fat_path="x86_64 x86_64/fat x86_64/pentium4 x86_64/core2 x86_64/coreinhm x86_64/coreisbr x86_64/atom x86_64/nano"
- fi
-
- fat_functions="add_n addmul_1 copyd copyi
- dive_1 diveby3 divrem_1 gcd_1 lshift
- mod_1 mod_34lsub1 mode1o mul_1 mul_basecase
- pre_divrem_1 pre_mod_1 rshift
- sqr_basecase sub_n submul_1"
- fat_thresholds="MUL_TOOM22_THRESHOLD MUL_TOOM33_THRESHOLD
- SQR_TOOM2_THRESHOLD SQR_TOOM3_THRESHOLD"
- fi
- ;;
-esac
-
-
-if test $found_compiler = yes; then
-
- # If we're creating CFLAGS, then look for optional additions. If the user
- # set CFLAGS then leave it alone.
- #
- if test "$test_CFLAGS" != set; then
- eval optlist=\"\$${ccbase}${abi1}_cflags_optlist\"
- test -n "$optlist" || eval optlist=\"\$${ccbase}${abi2}_cflags_optlist\"
-
- for opt in $optlist; do
- eval optflags=\"\$${ccbase}${abi1}_cflags_${opt}\"
- test -n "$optflags" || eval optflags=\"\$${ccbase}${abi2}_cflags_${opt}\"
- test -n "$optflags" || eval optflags=\"\$${ccbase}_cflags_${opt}\"
-
- for flag in $optflags; do
-
- # ~ represents a space in an option spec
- flag=`echo "$flag" | tr '~' ' '`
-
- case $flag in
- -march=pentium4 | -march=k8)
- # For -march settings which enable SSE2 we exclude certain bad
- # gcc versions and we need an OS knowing how to save xmm regs.
- #
- # This is only for ABI=32, any 64-bit gcc is good and any OS
- # knowing x86_64 will know xmm.
- #
- # -march=k8 was only introduced in gcc 3.3, so we shouldn't need
- # the GMP_GCC_PENTIUM4_SSE2 check (for gcc 3.2 and prior). But
- # it doesn't hurt to run it anyway, sharing code with the
- # pentium4 case.
- #
- if test "$abi" = 32; then
- GMP_GCC_PENTIUM4_SSE2($cc $cflags $cppflags,, continue)
- GMP_OS_X86_XMM($cc $cflags $cppflags,, continue)
- fi
- ;;
- -no-cpp-precomp)
- # special check, avoiding a warning
- GMP_GCC_NO_CPP_PRECOMP($ccbase,$cc,$cflags,
- [cflags="$cflags $flag"
- break],
- [continue])
- ;;
- -Wa,-m*)
- case $host in
- alpha*-*-*)
- GMP_GCC_WA_MCPU($cc $cflags, $flag, , [continue])
- ;;
- esac
- ;;
- -Wa,-oldas)
- GMP_GCC_WA_OLDAS($cc $cflags $cppflags,
- [cflags="$cflags $flag"
- break],
- [continue])
- ;;
- esac
-
- GMP_PROG_CC_WORKS($cc $cflags $cppflags $flag,
- [cflags="$cflags $flag"
- break])
- done
- done
- fi
-
- ABI="$abi"
- CC="$cc"
- CFLAGS="$cflags"
- CPPFLAGS="$cppflags"
-
-
- # Could easily have this in config.h too, if desired.
- ABI_nodots=`echo $ABI | sed 's/\./_/'`
- GMP_DEFINE_RAW("define_not_for_expansion(\`HAVE_ABI_$ABI_nodots')", POST)
-
-
- # GMP_LDFLAGS substitution, selected according to ABI.
- # These are needed on libgmp.la and libmp.la, but currently not on
- # convenience libraries like tune/libspeed.la or mpz/libmpz.la.
- #
- eval GMP_LDFLAGS=\"\$${ccbase}${abi1}_ldflags\"
- test -n "$GMP_LDFLAGS" || eval GMP_LDFLAGS=\"\$${ccbase}${abi1}_ldflags\"
- AC_SUBST(GMP_LDFLAGS)
- AC_SUBST(LIBGMP_LDFLAGS)
- AC_SUBST(LIBGMPXX_LDFLAGS)
-
- # extra_functions, selected according to ABI
- eval tmp=\"\$extra_functions$abi1\"
- test -n "$tmp" || eval tmp=\"\$extra_functions$abi2\"
- extra_functions="$tmp"
-
-
- # Cycle counter, selected according to ABI.
- #
- eval tmp=\"\$SPEED_CYCLECOUNTER_OBJ$abi1\"
- test -n "$tmp" || eval tmp=\"\$SPEED_CYCLECOUNTER_OBJ$abi2\"
- SPEED_CYCLECOUNTER_OBJ="$tmp"
- eval tmp=\"\$cyclecounter_size$abi1\"
- test -n "$tmp" || eval tmp=\"\$cyclecounter_size$abi2\"
- cyclecounter_size="$tmp"
-
- if test -n "$SPEED_CYCLECOUNTER_OBJ"; then
- AC_DEFINE_UNQUOTED(HAVE_SPEED_CYCLECOUNTER, $cyclecounter_size,
- [Tune directory speed_cyclecounter, undef=none, 1=32bits, 2=64bits)])
- fi
- AC_SUBST(SPEED_CYCLECOUNTER_OBJ)
-
-
- # Calling conventions checking, selected according to ABI.
- #
- eval tmp=\"\$CALLING_CONVENTIONS_OBJS$abi1\"
- test -n "$tmp" || eval tmp=\"\$CALLING_CONVENTIONS_OBJS$abi2\"
- CALLING_CONVENTIONS_OBJS="$tmp"
-
- if test -n "$CALLING_CONVENTIONS_OBJS"; then
- AC_DEFINE(HAVE_CALLING_CONVENTIONS,1,
- [Define to 1 if tests/libtests has calling conventions checking for the CPU])
- fi
- AC_SUBST(CALLING_CONVENTIONS_OBJS)
-
-fi
-
-
-# If the user gave an MPN_PATH, use that verbatim, otherwise choose
-# according to the ABI and add "generic".
-#
-if test -n "$MPN_PATH"; then
- path="$MPN_PATH"
-else
- eval tmp=\"\$path$abi1\"
- test -n "$tmp" || eval tmp=\"\$path$abi2\"
- path="$tmp generic"
-fi
-
-
-# Long long limb setup for gmp.h.
-case $limb_chosen in
-longlong) DEFN_LONG_LONG_LIMB="#define _LONG_LONG_LIMB 1" ;;
-*) DEFN_LONG_LONG_LIMB="/* #undef _LONG_LONG_LIMB */" ;;
-esac
-AC_SUBST(DEFN_LONG_LONG_LIMB)
-
-
-# The C compiler and preprocessor, put into ANSI mode if possible.
-AC_PROG_CC
-AC_PROG_CC_STDC
-AC_PROG_CPP
-GMP_H_ANSI
-
-
-# The C compiler on the build system, and associated tests.
-GMP_PROG_CC_FOR_BUILD
-GMP_PROG_CPP_FOR_BUILD
-GMP_PROG_EXEEXT_FOR_BUILD
-GMP_C_FOR_BUILD_ANSI
-GMP_CHECK_LIBM_FOR_BUILD
-
-
-# How to assemble, used with CFLAGS etc, see mpn/Makeasm.am.
-# Using the compiler is a lot easier than figuring out how to invoke the
-# assembler directly.
-#
-test -n "$CCAS" || CCAS="$CC -c"
-AC_SUBST(CCAS)
-
-
-# The C++ compiler, if desired.
-want_cxx=no
-if test $enable_cxx != no; then
- test_CXXFLAGS=${CXXFLAGS+set}
- AC_PROG_CXX
-
- echo "CXXFLAGS chosen by autoconf: $CXXFLAGS" >&AC_FD_CC
- cxxflags_ac_prog_cxx=$CXXFLAGS
- cxxflags_list=ac_prog_cxx
-
- # If the user didn't specify $CXXFLAGS, then try $CFLAGS, with -g removed
- # if AC_PROG_CXX thinks that doesn't work. $CFLAGS stands a good chance
- # of working, eg. on a GNU system where CC=gcc and CXX=g++.
- #
- if test "$test_CXXFLAGS" != set; then
- cxxflags_cflags=$CFLAGS
- cxxflags_list="cflags $cxxflags_list"
- if test "$ac_prog_cxx_g" = no; then
- cxxflags_cflags=`echo "$cxxflags_cflags" | sed -e 's/ -g //' -e 's/^-g //' -e 's/ -g$//'`
- fi
- fi
-
- # See if the C++ compiler works. If the user specified CXXFLAGS then all
- # we're doing is checking whether AC_PROG_CXX succeeded, since it doesn't
- # give a fatal error, just leaves CXX set to a default g++. If on the
- # other hand the user didn't specify CXXFLAGS then we get to try here our
- # $cxxflags_list alternatives.
- #
- # Automake includes $CPPFLAGS in a C++ compile, so we do the same here.
- #
- for cxxflags_choice in $cxxflags_list; do
- eval CXXFLAGS=\"\$cxxflags_$cxxflags_choice\"
- GMP_PROG_CXX_WORKS($CXX $CPPFLAGS $CXXFLAGS,
- [want_cxx=yes
- break])
- done
-
- # If --enable-cxx=yes but a C++ compiler can't be found, then abort.
- if test $want_cxx = no && test $enable_cxx = yes; then
- AC_MSG_ERROR([C++ compiler not available, see config.log for details])
- fi
-fi
-
-AM_CONDITIONAL(WANT_CXX, test $want_cxx = yes)
-
-# FIXME: We're not interested in CXXCPP for ourselves, but if we don't do it
-# here then AC_PROG_LIBTOOL will AC_REQUIRE it (via _LT_AC_TAGCONFIG) and
-# hence execute it unconditionally, and that will fail if there's no C++
-# compiler (and no generic /lib/cpp).
-#
-if test $want_cxx = yes; then
- AC_PROG_CXXCPP
-fi
-
-
-# Path setups for Cray, according to IEEE or CFP. These must come after
-# deciding the compiler.
-#
-GMP_CRAY_OPTIONS(
- [add_path="cray/ieee"],
- [add_path="cray/cfp"; extra_functions="mulwwc90"],
- [add_path="cray/cfp"; extra_functions="mulwwj90"])
-
-
-if test -z "$MPN_PATH"; then
- path="$add_path $path"
-fi
-
-# For a nail build, also look in "nails" subdirectories.
-#
-if test $GMP_NAIL_BITS != 0 && test -z "$MPN_PATH"; then
- new_path=
- for i in $path; do
- case $i in
- generic) new_path="$new_path $i" ;;
- *) new_path="$new_path $i/nails $i" ;;
- esac
- done
- path=$new_path
-fi
-
-
-# Put all directories into CPUVEC_list so as to get a full set of
-# CPUVEC_SETUP_$tmp_suffix defines into config.h, even if some of them are
-# empty because mmx and/or sse2 had to be dropped.
-#
-for i in $fat_path; do
- GMP_FAT_SUFFIX(tmp_suffix, $i)
- CPUVEC_list="$CPUVEC_list CPUVEC_SETUP_$tmp_suffix"
-done
-
-
-# If there's any sse2 or mmx in the path, check whether the assembler
-# supports it, and remove if not.
-#
-# We only need this in ABI=32, for ABI=64 on x86_64 we can assume a new
-# enough assembler.
-#
-case $host in
- X86_PATTERN | X86_64_PATTERN)
- if test "$ABI" = 32; then
- case "$path $fat_path" in
- *mmx*) GMP_ASM_X86_MMX( , [GMP_STRIP_PATH(*mmx*)]) ;;
- esac
- case "$path $fat_path" in
- *sse2*) GMP_ASM_X86_SSE2( , [GMP_STRIP_PATH(sse2)]) ;;
- esac
- fi
- ;;
-esac
-
-
-cat >&AC_FD_CC <<EOF
-Decided:
-ABI=$ABI
-CC=$CC
-CFLAGS=$CFLAGS
-CPPFLAGS=$CPPFLAGS
-GMP_LDFLAGS=$GMP_LDFLAGS
-CXX=$CXX
-CXXFLAGS=$CXXFLAGS
-path=$path
-EOF
-echo "using ABI=\"$ABI\""
-echo " CC=\"$CC\""
-echo " CFLAGS=\"$CFLAGS\""
-echo " CPPFLAGS=\"$CPPFLAGS\""
-if test $want_cxx = yes; then
- echo " CXX=\"$CXX\""
- echo " CXXFLAGS=\"$CXXFLAGS\""
-fi
-echo " MPN_PATH=\"$path\""
-
-
-# Automake ansi2knr support.
-AM_C_PROTOTYPES
-
-CL_AS_NOEXECSTACK
-
-GMP_PROG_AR
-GMP_PROG_NM
-
-case $host in
- # FIXME: On AIX 3 and 4, $libname.a is included in libtool
- # $library_names_spec, so libgmp.a becomes a symlink to libgmp.so, making
- # it impossible to build shared and static libraries simultaneously.
- # Disable shared libraries by default, but let the user override with
- # --enable-shared --disable-static.
- #
- # FIXME: This $libname.a problem looks like it might apply to *-*-amigaos*
- # and *-*-os2* too, but wait for someone to test this before worrying
- # about it. If there is a problem then of course libtool is the right
- # place to fix it.
- #
- [*-*-aix[34]*])
- if test -z "$enable_shared"; then enable_shared=no; fi ;;
-esac
-
-
-# Configs for Windows DLLs.
-
-AC_LIBTOOL_WIN32_DLL
-
-AC_SUBST(LIBGMP_DLL,0)
-case $host in
- *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*)
- # By default, build only static.
- if test -z "$enable_shared"; then
- enable_shared=no
- fi
- # Don't allow both static and DLL.
- if test "$enable_shared" != no && test "$enable_static" != no; then
- AC_MSG_ERROR([cannot build both static and DLL, since gmp.h is different for each.
-Use "--disable-static --enable-shared" to build just a DLL.])
- fi
-
- # "-no-undefined" is required when building a DLL, see documentation on
- # AC_LIBTOOL_WIN32_DLL.
- #
- # "-Wl,--export-all-symbols" is a bit of a hack, it gets all libgmp and
- # libgmpxx functions and variables exported. This is what libtool did
- # in the past, and it's convenient for us in the test programs.
- #
- # Maybe it'd be prudent to check for --export-all-symbols before using
- # it, but it seems to have been in ld since at least 2000, and there's
- # not really any alternative we want to take up at the moment.
- #
- # "-Wl,output-def" is used to get a .def file for use by MS lib to make
- # a .lib import library, described in the manual. libgmp-3.dll.def
- # corresponds to the libmp-3.dll.def generated by libtool (as a result
- # of -export-symbols on that library).
- #
- # Incidentally, libtool does generate an import library libgmp.dll.a,
- # but it's "ar" format and cannot be used by the MS linker. There
- # doesn't seem to be any GNU tool for generating or converting to .lib.
- #
- # FIXME: The .def files produced by -Wl,output-def include isascii,
- # iscsym, iscsymf and toascii, apparently because mingw ctype.h doesn't
- # inline isascii (used in gmp). It gives an extern inline for
- # __isascii, but for some reason not the plain isascii.
- #
- if test "$enable_shared" = yes; then
- GMP_LDFLAGS="$GMP_LDFLAGS -no-undefined -Wl,--export-all-symbols"
- LIBGMP_LDFLAGS="$LIBGMP_LDFLAGS -Wl,--output-def,.libs/libgmp-3.dll.def"
- LIBGMPXX_LDFLAGS="$LIBGMP_LDFLAGS -Wl,--output-def,.libs/libgmpxx-3.dll.def"
- LIBGMP_DLL=1
- fi
- ;;
-esac
-
-
-# Ensure that $CONFIG_SHELL is available for AC_LIBTOOL_SYS_MAX_CMD_LEN.
-# It's often set already by _LT_AC_PROG_ECHO_BACKSLASH or
-# _AS_LINENO_PREPARE, but not always.
-#
-# The symptom of CONFIG_SHELL unset is some "expr" errors during the test,
-# and an empty result. This only happens when invoked as "sh configure",
-# ie. no path, and can be seen for instance on ia64-*-hpux*.
-#
-# FIXME: Newer libtool should have it's own fix for this.
-#
-if test -z "$CONFIG_SHELL"; then
- CONFIG_SHELL=$SHELL
-fi
-
-# Enable CXX in libtool only if we want it, and never enable GCJ, nor RC on
-# mingw and cygwin. Under --disable-cxx this avoids some error messages
-# from libtool arising from the fact we didn't actually run AC_PROG_CXX.
-# Notice that any user-supplied --with-tags setting takes precedence.
-#
-# FIXME: Is this the right way to get this effect? Very possibly not, but
-# the current _LT_AC_TAGCONFIG doesn't really suggest an alternative.
-#
-if test "${with_tags+set}" != set; then
- if test $want_cxx = yes; then
- with_tags=CXX
- else
- with_tags=
- fi
-fi
-
-# The dead hand of AC_REQUIRE makes AC_PROG_LIBTOOL expand and execute
-# AC_PROG_F77, even when F77 is not in the selected with_tags. This is
-# probably harmless, but it's unsightly and bloats our configure, so pretend
-# AC_PROG_F77 has been expanded already.
-#
-# FIXME: Rumour has it libtool will one day provide a way for a configure.in
-# to say what it wants from among supported languages etc.
-#
-AC_PROVIDE([AC_PROG_F77])
-
-AC_PROG_LIBTOOL
-
-# Generate an error here if attempting to build both shared and static when
-# $libname.a is in $library_names_spec (as mentioned above), rather than
-# wait for ar or ld to fail.
-#
-if test "$enable_shared" = yes && test "$enable_static" = yes; then
- case $library_names_spec in
- *libname.a*)
- AC_MSG_ERROR([cannot create both shared and static libraries on this system, --disable one of the two])
- ;;
- esac
-fi
-
-AM_CONDITIONAL(ENABLE_STATIC, test "$enable_static" = yes)
-
-
-# Many of these library and header checks are for the benefit of
-# supplementary programs. libgmp doesn't use anything too weird.
-
-AC_HEADER_STDC
-AC_HEADER_TIME
-
-# Reasons for testing:
-# float.h - not in SunOS bundled cc
-# invent.h - IRIX specific
-# langinfo.h - X/Open standard only, not in djgpp for instance
-# locale.h - old systems won't have this
-# nl_types.h - X/Open standard only, not in djgpp for instance
-# (usually langinfo.h gives nl_item etc, but not on netbsd 1.4.1)
-# sys/attributes.h - IRIX specific
-# sys/iograph.h - IRIX specific
-# sys/mman.h - not in Cray Unicos
-# sys/param.h - not in mingw
-# sys/processor.h - solaris specific, though also present in macos
-# sys/pstat.h - HPUX specific
-# sys/resource.h - not in mingw
-# sys/sysctl.h - not in mingw
-# sys/sysinfo.h - OSF specific
-# sys/syssgi.h - IRIX specific
-# sys/systemcfg.h - AIX specific
-# sys/time.h - autoconf suggests testing, don't know anywhere without it
-# sys/times.h - not in mingw
-# machine/hal_sysinfo.h - OSF specific
-#
-# inttypes.h, stdint.h, unistd.h and sys/types.h are already in the autoconf
-# default tests
-#
-AC_CHECK_HEADERS(fcntl.h float.h invent.h langinfo.h locale.h nl_types.h sys/attributes.h sys/iograph.h sys/mman.h sys/param.h sys/processor.h sys/pstat.h sys/sysinfo.h sys/syssgi.h sys/systemcfg.h sys/time.h sys/times.h)
-
-# On SunOS, sys/resource.h needs sys/time.h (for struct timeval)
-AC_CHECK_HEADERS(sys/resource.h,,,
-[#if TIME_WITH_SYS_TIME
-# include <sys/time.h>
-# include <time.h>
-#else
-# if HAVE_SYS_TIME_H
-# include <sys/time.h>
-# else
-# include <time.h>
-# endif
-#endif])
-
-# On NetBSD and OpenBSD, sys/sysctl.h needs sys/param.h for various constants
-AC_CHECK_HEADERS(sys/sysctl.h,,,
-[#if HAVE_SYS_PARAM_H
-# include <sys/param.h>
-#endif])
-
-# On OSF 4.0, <machine/hal_sysinfo.h> must have <sys/sysinfo.h> for ulong_t
-AC_CHECK_HEADERS(machine/hal_sysinfo.h,,,
-[#if HAVE_SYS_SYSINFO_H
-# include <sys/sysinfo.h>
-#endif])
-
-# Reasons for testing:
-# optarg - not declared in mingw
-# fgetc, fscanf, ungetc, vfprintf - not declared in SunOS 4
-# sys_errlist, sys_nerr - not declared in SunOS 4
-#
-# optarg should be in unistd.h and the rest in stdio.h, both of which are
-# in the autoconf default includes.
-#
-# sys_errlist and sys_nerr are supposed to be in <errno.h> on SunOS according
-# to the man page (but aren't), in glibc they're in stdio.h.
-#
-AC_CHECK_DECLS([fgetc, fscanf, optarg, ungetc, vfprintf])
-AC_CHECK_DECLS([sys_errlist, sys_nerr], , ,
-[#include <stdio.h>
-#include <errno.h>])
-
-AC_TYPE_SIGNAL
-
-# Reasons for testing:
-# intmax_t - C99
-# long double - not in the HP bundled K&R cc
-# long long - only in reasonably recent compilers
-# ptrdiff_t - seems to be everywhere, maybe don't need to check this
-# quad_t - BSD specific
-# uint_least32_t - C99
-#
-# the default includes are sufficient for all these types
-#
-AC_CHECK_TYPES([intmax_t, long double, long long, ptrdiff_t, quad_t,
- uint_least32_t, intptr_t])
-
-AC_C_STRINGIZE
-
-# FIXME: Really want #ifndef __cplusplus around the #define volatile
-# replacement autoconf gives, since volatile is always available in C++.
-# But we don't use it in C++ currently.
-AC_C_VOLATILE
-
-AC_C_RESTRICT
-
-GMP_C_STDARG
-GMP_C_ATTRIBUTE_CONST
-GMP_C_ATTRIBUTE_MALLOC
-GMP_C_ATTRIBUTE_MODE
-GMP_C_ATTRIBUTE_NORETURN
-
-GMP_H_EXTERN_INLINE
-
-# from libtool
-AC_CHECK_LIBM
-AC_SUBST(LIBM)
-
-GMP_FUNC_ALLOCA
-GMP_OPTION_ALLOCA
-
-GMP_H_HAVE_FILE
-
-AC_C_BIGENDIAN(
- [AC_DEFINE(HAVE_LIMB_BIG_ENDIAN, 1)
- GMP_DEFINE_RAW("define_not_for_expansion(\`HAVE_LIMB_BIG_ENDIAN')", POST)],
- [AC_DEFINE(HAVE_LIMB_LITTLE_ENDIAN, 1)
- GMP_DEFINE_RAW("define_not_for_expansion(\`HAVE_LIMB_LITTLE_ENDIAN')", POST)
- ], [:])
-AH_VERBATIM([HAVE_LIMB],
-[/* Define one of these to 1 for the endianness of `mp_limb_t'.
- If the endianness is not a simple big or little, or you don't know what
- it is, then leave both undefined. */
-#undef HAVE_LIMB_BIG_ENDIAN
-#undef HAVE_LIMB_LITTLE_ENDIAN])
-
-GMP_C_DOUBLE_FORMAT
-
-
-# Reasons for testing:
-# alarm - not in mingw
-# attr_get - IRIX specific
-# clock_gettime - not in glibc 2.2.4, only very recent systems
-# cputime - not in glibc
-# getsysinfo - OSF specific
-# getrusage - not in mingw
-# gettimeofday - not in mingw
-# mmap - not in mingw, djgpp
-# nl_langinfo - X/Open standard only, not in djgpp for instance
-# obstack_vprintf - glibc specific
-# processor_info - solaris specific
-# pstat_getprocessor - HPUX specific (10.x and up)
-# raise - an ANSI-ism, though probably almost universal by now
-# read_real_time - AIX specific
-# sigaction - not in mingw
-# sigaltstack - not in mingw, or old AIX (reputedly)
-# sigstack - not in mingw
-# strerror - not in SunOS
-# strnlen - glibc extension (some other systems too)
-# syssgi - IRIX specific
-# times - not in mingw
-#
-# clock_gettime is in librt on *-*-osf5.1. We could look for it
-# there, but that's not worth bothering with unless it has a decent
-# resolution (in a quick test clock_getres said only 1 millisecond).
-#
-# AC_FUNC_STRNLEN is not used because we don't want the AC_LIBOBJ
-# replacement setups it gives. It detects a faulty strnlen on AIX, but
-# missing out on that test is ok since our only use of strnlen is in
-# __gmp_replacement_vsnprintf which is not required on AIX since it has a
-# vsnprintf.
-#
-AC_CHECK_FUNCS(alarm attr_get clock clock_gettime cputime getpagesize getrusage gettimeofday getsysinfo localeconv memset mmap mprotect nl_langinfo obstack_vprintf popen processor_info pstat_getprocessor raise read_real_time sigaction sigaltstack sigstack syssgi strchr strerror strnlen strtol strtoul sysconf sysctl sysctlbyname times)
-
-GMP_FUNC_VSNPRINTF
-GMP_FUNC_SSCANF_WRITABLE_INPUT
-
-# Reasons for checking:
-# pst_processor psp_iticksperclktick - not in hpux 9
-#
-AC_CHECK_MEMBER(struct pst_processor.psp_iticksperclktick,
- [AC_DEFINE(HAVE_PSP_ITICKSPERCLKTICK, 1,
-[Define to 1 if <sys/pstat.h> `struct pst_processor' exists
-and contains `psp_iticksperclktick'.])],,
- [#include <sys/pstat.h>])
-
-# C++ tests, when required
-#
-if test $enable_cxx = yes; then
- AC_LANG_PUSH(C++)
-
- # Reasons for testing:
- # <sstream> - not in g++ 2.95.2
- # std::locale - not in g++ 2.95.4
- #
- AC_CHECK_HEADERS([sstream])
- AC_CHECK_TYPES([std::locale],,,[#include <locale>])
-
- AC_LANG_POP(C++)
-fi
-
-
-# Pick the correct source files in $path and link them to mpn/.
-# $gmp_mpn_functions lists all functions we need.
-#
-# The rule is to find a file with the function name and a .asm, .S,
-# .s, or .c extension. Certain multi-function files with special names
-# can provide some functions too. (mpn/Makefile.am passes
-# -DOPERATION_<func> to get them to generate the right code.)
-
-# Note: $gmp_mpn_functions must have mod_1 before pre_mod_1 so the former
-# can optionally provide the latter as an extra entrypoint. Likewise
-# divrem_1 and pre_divrem_1.
-
-gmp_mpn_functions_optional="umul udiv \
- invert_limb sqr_diagonal \
- mul_2 mul_3 mul_4 \
- addmul_2 addmul_3 addmul_4 addmul_5 addmul_6 addmul_7 addmul_8 \
- addlsh1_n sublsh1_n rsblsh1_n rsh1add_n rsh1sub_n \
- addlsh2_n sublsh2_n rsblsh2_n \
- addlsh_n sublsh_n rsblsh_n \
- add_n_sub_n addaddmul_1msb0"
-
-gmp_mpn_functions="$extra_functions \
- add add_1 add_n sub sub_1 sub_n neg com mul_1 addmul_1 \
- submul_1 lshift rshift dive_1 diveby3 divis divrem divrem_1 divrem_2 \
- fib2_ui mod_1 mod_34lsub1 mode1o pre_divrem_1 pre_mod_1 dump \
- mod_1_1 mod_1_2 mod_1_3 mod_1_4 lshiftc \
- mul mul_fft mul_n sqr mul_basecase sqr_basecase nussbaumer_mul \
- random random2 pow_1 \
- rootrem sqrtrem get_str set_str scan0 scan1 popcount hamdist cmp \
- perfsqr perfpow \
- gcd_1 gcd gcdext_1 gcdext gcd_lehmer gcd_subdiv_step \
- gcdext_lehmer gcdext_subdiv_step \
- div_q tdiv_qr jacbase get_d \
- matrix22_mul hgcd2 hgcd mullo_n mullo_basecase \
- toom22_mul toom32_mul toom42_mul toom52_mul toom62_mul \
- toom33_mul toom43_mul toom53_mul toom63_mul \
- toom44_mul \
- toom6h_mul toom6_sqr toom8h_mul toom8_sqr \
- toom_couple_handling \
- toom2_sqr toom3_sqr toom4_sqr \
- toom_eval_dgr3_pm1 toom_eval_dgr3_pm2 \
- toom_eval_pm1 toom_eval_pm2 toom_eval_pm2exp toom_eval_pm2rexp \
- toom_interpolate_5pts toom_interpolate_6pts toom_interpolate_7pts \
- toom_interpolate_8pts toom_interpolate_12pts toom_interpolate_16pts \
- invertappr invert binvert mulmod_bnm1 sqrmod_bnm1 \
- sbpi1_div_q sbpi1_div_qr sbpi1_divappr_q \
- dcpi1_div_q dcpi1_div_qr dcpi1_divappr_q \
- mu_div_qr mu_divappr_q mu_div_q \
- bdiv_q_1 \
- sbpi1_bdiv_q sbpi1_bdiv_qr \
- dcpi1_bdiv_q dcpi1_bdiv_qr \
- mu_bdiv_q mu_bdiv_qr \
- bdiv_q bdiv_qr \
- divexact bdiv_dbm1c redc_1 redc_2 redc_n powm powlo powm_sec subcnd_n \
- redc_1_sec trialdiv remove \
- and_n andn_n nand_n ior_n iorn_n nior_n xor_n xnor_n \
- copyi copyd zero \
- $gmp_mpn_functions_optional"
-
-define(GMP_MULFUNC_CHOICES,
-[# functions that can be provided by multi-function files
-tmp_mulfunc=
-case $tmp_fn in
- add_n|sub_n) tmp_mulfunc="aors_n" ;;
- addmul_1|submul_1) tmp_mulfunc="aorsmul_1" ;;
- popcount|hamdist) tmp_mulfunc="popham" ;;
- and_n|andn_n|nand_n | ior_n|iorn_n|nior_n | xor_n|xnor_n)
- tmp_mulfunc="logops_n" ;;
- lshift|rshift) tmp_mulfunc="lorrshift";;
- addlsh1_n)
- tmp_mulfunc="aorslsh1_n aorrlsh1_n";;
- sublsh1_n)
- tmp_mulfunc="aorslsh1_n sorrlsh1_n";;
- rsblsh1_n)
- tmp_mulfunc="aorrlsh1_n sorrlsh1_n";;
- addlsh2_n)
- tmp_mulfunc="aorslsh2_n aorrlsh2_n";;
- sublsh2_n)
- tmp_mulfunc="aorslsh2_n sorrlsh2_n";;
- rsblsh2_n)
- tmp_mulfunc="aorrlsh2_n sorrlsh2_n";;
- addlsh_n)
- tmp_mulfunc="aorslsh_n aorrlsh_n";;
- sublsh_n)
- tmp_mulfunc="aorslsh_n sorrlsh_n";;
- rsblsh_n)
- tmp_mulfunc="aorrlsh_n sorrlsh_n";;
- rsh1add_n|rsh1sub_n)
- tmp_mulfunc="rsh1aors_n";;
-esac
-])
-
-# the list of all object files used by mpn/Makefile.in and the
-# top-level Makefile.in, respectively
-mpn_objects=
-mpn_objs_in_libgmp=
-
-# links from the sources, to be removed by "make distclean"
-gmp_srclinks=
-
-
-# mpn_relative_top_srcdir is $top_srcdir, but for use from within the mpn
-# build directory. If $srcdir is relative then we use a relative path too,
-# so the two trees can be moved together.
-case $srcdir in
- [[\\/]* | ?:[\\/]*]) # absolute, as per autoconf
- mpn_relative_top_srcdir=$srcdir ;;
- *) # relative
- mpn_relative_top_srcdir=../$srcdir ;;
-esac
-
-
-define(MPN_SUFFIXES,[asm S s c])
-
-dnl Usage: GMP_FILE_TO_FUNCTION_BASE(func,file)
-dnl
-dnl Set $func to the function base name for $file, eg. dive_1 gives
-dnl divexact_1.
-dnl
-define(GMP_FILE_TO_FUNCTION,
-[case $$2 in
- dive_1) $1=divexact_1 ;;
- diveby3) $1=divexact_by3c ;;
- pre_divrem_1) $1=preinv_divrem_1 ;;
- mode1o) $1=modexact_1c_odd ;;
- pre_mod_1) $1=preinv_mod_1 ;;
- *) $1=$$2 ;;
-esac
-])
-
-# Fat binary setups.
-#
-# We proceed through each $fat_path directory, and look for $fat_function
-# routines there. Those found are incorporated in the build by generating a
-# little mpn/<foo>.asm or mpn/<foo>.c file in the build directory, with
-# suitable function renaming, and adding that to $mpn_objects (the same as a
-# normal mpn file).
-#
-# fat.h is generated with macros to let internal calls to each $fat_function
-# go directly through __gmpn_cpuvec, plus macros and declarations helping to
-# setup that structure, on a per-directory basis ready for
-# mpn/<cpu>/fat/fat.c.
-#
-# fat.h includes thresholds listed in $fat_thresholds, extracted from
-# gmp-mparam.h in each directory. An overall maximum for each threshold is
-# established, for use in making fixed size arrays of temporary space.
-# (Eg. MUL_TOOM33_THRESHOLD_LIMIT used by mpn/generic/mul.c.)
-#
-# It'd be possible to do some of this manually, but when there's more than a
-# few functions and a few directories it becomes very tedious, and very
-# prone to having some routine accidentally omitted. On that basis it seems
-# best to automate as much as possible, even if the code to do so is a bit
-# ugly.
-#
-
-if test -n "$fat_path"; then
- # Usually the mpn build directory is created with mpn/Makefile
- # instantiation, but we want to write to it sooner.
- mkdir mpn 2>/dev/null
-
- echo "/* fat.h - setups for fat binaries." >fat.h
- echo " Generated by configure - DO NOT EDIT. */" >>fat.h
-
- AC_DEFINE(WANT_FAT_BINARY, 1, [Define to 1 when building a fat binary.])
- GMP_DEFINE(WANT_FAT_BINARY, yes)
-
- # Don't want normal copies of fat functions
- for tmp_fn in $fat_functions; do
- GMP_REMOVE_FROM_LIST(gmp_mpn_functions, $tmp_fn)
- GMP_REMOVE_FROM_LIST(gmp_mpn_functions_optional, $tmp_fn)
- done
-
- for tmp_fn in $fat_functions; do
- GMP_FILE_TO_FUNCTION(tmp_fbase,tmp_fn)
- echo "
-#ifndef OPERATION_$tmp_fn
-#undef mpn_$tmp_fbase
-#define mpn_$tmp_fbase (*__gmpn_cpuvec.$tmp_fbase)
-#endif
-DECL_$tmp_fbase (__MPN(${tmp_fbase}_init));" >>fat.h
- # encourage various macros to use fat functions
- AC_DEFINE_UNQUOTED(HAVE_NATIVE_mpn_$tmp_fbase)
- done
-
- echo "" >>fat.h
- echo "/* variable thresholds */" >>fat.h
- for tmp_tn in $fat_thresholds; do
- echo "#undef $tmp_tn" >>fat.h
- echo "#define $tmp_tn CPUVEC_THRESHOLD (`echo $tmp_tn | tr [A-Z] [a-z]`)" >>fat.h
- done
-
- echo "
-/* Copy all fields into __gmpn_cpuvec.
- memcpy is not used because it might operate byte-wise (depending on its
- implementation), and we need the function pointer writes to be atomic.
- "volatile" discourages the compiler from trying to optimize this. */
-#define CPUVEC_INSTALL(vec) \\
- do { \\
- volatile struct cpuvec_t *p = &__gmpn_cpuvec; \\" >>fat.h
- for tmp_fn in $fat_functions; do
- GMP_FILE_TO_FUNCTION(tmp_fbase,tmp_fn)
- echo " p->$tmp_fbase = vec.$tmp_fbase; \\" >>fat.h
- done
- for tmp_tn in $fat_thresholds; do
- tmp_field_name=`echo $tmp_tn | tr [[A-Z]] [[a-z]]`
- echo " p->$tmp_field_name = vec.$tmp_field_name; \\" >>fat.h
- done
- echo " } while (0)" >>fat.h
-
- echo "
-/* A helper to check all fields are filled. */
-#define ASSERT_CPUVEC(vec) \\
- do { \\" >>fat.h
- for tmp_fn in $fat_functions; do
- GMP_FILE_TO_FUNCTION(tmp_fbase,tmp_fn)
- echo " ASSERT (vec.$tmp_fbase != NULL); \\" >>fat.h
- done
- for tmp_tn in $fat_thresholds; do
- tmp_field_name=`echo $tmp_tn | tr [[A-Z]] [[a-z]]`
- echo " ASSERT (vec.$tmp_field_name != 0); \\" >>fat.h
- done
- echo " } while (0)" >>fat.h
-
- echo "
-/* Call ITERATE(field) for each fat threshold field. */
-#define ITERATE_FAT_THRESHOLDS() \\
- do { \\" >>fat.h
- for tmp_tn in $fat_thresholds; do
- tmp_field_name=`echo $tmp_tn | tr [[A-Z]] [[a-z]]`
- echo " ITERATE ($tmp_tn, $tmp_field_name); \\" >>fat.h
- done
- echo " } while (0)" >>fat.h
-
- for tmp_dir in $fat_path; do
- CPUVEC_SETUP=
- THRESH_ASM_SETUP=
- echo "" >>fat.h
- GMP_FAT_SUFFIX(tmp_suffix, $tmp_dir)
-
- # In order to keep names unique on a DOS 8.3 filesystem, use a prefix
- # (rather than a suffix) for the generated file names, and abbreviate.
- case $tmp_suffix in
- pentium) tmp_prefix=p ;;
- pentium_mmx) tmp_prefix=pm ;;
- p6_mmx) tmp_prefix=p2 ;;
- p6_p3mmx) tmp_prefix=p3 ;;
- pentium4) tmp_prefix=p4 ;;
- pentium4_mmx) tmp_prefix=p4m ;;
- pentium4_sse2) tmp_prefix=p4s ;;
- k6_mmx) tmp_prefix=k6m ;;
- k6_k62mmx) tmp_prefix=k62 ;;
- k7_mmx) tmp_prefix=k7m ;;
- *) tmp_prefix=$tmp_suffix ;;
- esac
-
- # Extract desired thresholds from gmp-mparam.h file in this directory,
- # if present.
- tmp_mparam=$srcdir/mpn/$tmp_dir/gmp-mparam.h
- if test -f $tmp_mparam; then
- for tmp_tn in $fat_thresholds; do
- tmp_thresh=`sed -n "s/^#define $tmp_tn[ ]*\\([0-9][0-9]*\\).*$/\\1/p" $tmp_mparam`
- if test -n "$tmp_thresh"; then
- THRESH_ASM_SETUP=["${THRESH_ASM_SETUP}define($tmp_tn,$tmp_thresh)
-"]
- CPUVEC_SETUP="$CPUVEC_SETUP decided_cpuvec.`echo $tmp_tn | tr [[A-Z]] [[a-z]]` = $tmp_thresh; \\
-"
- eval tmp_limit=\$${tmp_tn}_LIMIT
- if test -z "$tmp_limit"; then
- tmp_limit=0
- fi
- if test $tmp_thresh -gt $tmp_limit; then
- eval ${tmp_tn}_LIMIT=$tmp_thresh
- fi
- fi
- done
- fi
-
- for tmp_fn in $fat_functions; do
- GMP_MULFUNC_CHOICES
-
- for tmp_base in $tmp_fn $tmp_mulfunc; do
- for tmp_ext in MPN_SUFFIXES; do
- tmp_file=$srcdir/mpn/$tmp_dir/$tmp_base.$tmp_ext
- if test -f $tmp_file; then
-
- mpn_objects="$mpn_objects ${tmp_prefix}_$tmp_fn.lo"
- mpn_objs_in_libgmp="$mpn_objs_in_libgmp mpn/${tmp_prefix}_$tmp_fn.lo"
-
- GMP_FILE_TO_FUNCTION(tmp_fbase,tmp_fn)
-
- # carry-in variant, eg. divrem_1c or modexact_1c_odd
- case $tmp_fbase in
- *_1*) tmp_fbasec=`echo $tmp_fbase | sed 's/_1/_1c/'` ;;
- *) tmp_fbasec=${tmp_fbase}c ;;
- esac
-
- # Create a little file doing an include from srcdir. The
- # OPERATION and renamings aren't all needed all the time, but
- # they don't hurt if unused.
- #
- # FIXME: Should generate these via config.status commands.
- # Would need them all in one AC_CONFIG_COMMANDS though, since
- # that macro doesn't accept a set of separate commands generated
- # by shell code.
- #
- case $tmp_ext in
- asm)
- # hide the d-n-l from autoconf's error checking
- tmp_d_n_l=d""nl
- echo ["$tmp_d_n_l mpn_$tmp_fbase - from $tmp_dir directory for fat binary.
-$tmp_d_n_l Generated by configure - DO NOT EDIT.
-
-define(OPERATION_$tmp_fn)
-define(__gmpn_$tmp_fbase, __gmpn_${tmp_fbase}_$tmp_suffix)
-define(__gmpn_$tmp_fbasec,__gmpn_${tmp_fbasec}_${tmp_suffix})
-define(__gmpn_preinv_${tmp_fbase},__gmpn_preinv_${tmp_fbase}_${tmp_suffix})
-
-$tmp_d_n_l For k6 and k7 gcd_1 calling their corresponding mpn_modexact_1_odd
-ifdef(\`__gmpn_modexact_1_odd',,
-\`define(__gmpn_modexact_1_odd,__gmpn_modexact_1_odd_${tmp_suffix})')
-
-$THRESH_ASM_SETUP
-include][($mpn_relative_top_srcdir/mpn/$tmp_dir/$tmp_base.asm)
-"] >mpn/${tmp_prefix}_$tmp_fn.asm
- ;;
- c)
- echo ["/* mpn_$tmp_fbase - from $tmp_dir directory for fat binary.
- Generated by configure - DO NOT EDIT. */
-
-#define OPERATION_$tmp_fn 1
-#define __gmpn_$tmp_fbase __gmpn_${tmp_fbase}_$tmp_suffix
-#define __gmpn_$tmp_fbasec __gmpn_${tmp_fbasec}_${tmp_suffix}
-#define __gmpn_preinv_${tmp_fbase} __gmpn_preinv_${tmp_fbase}_${tmp_suffix}
-
-#include \"$mpn_relative_top_srcdir/mpn/$tmp_dir/$tmp_base.c\"
-"] >mpn/${tmp_prefix}_$tmp_fn.c
- ;;
- esac
-
- # Prototype, and append to CPUVEC_SETUP for this directory.
- echo "DECL_$tmp_fbase (__gmpn_${tmp_fbase}_$tmp_suffix);" >>fat.h
- CPUVEC_SETUP="$CPUVEC_SETUP decided_cpuvec.$tmp_fbase = __gmpn_${tmp_fbase}_${tmp_suffix}; \\
-"
- # Ditto for any preinv variant (preinv_divrem_1, preinv_mod_1).
- if grep "^PROLOGUE(mpn_preinv_$tmp_fn)" $tmp_file >/dev/null; then
- echo "DECL_preinv_$tmp_fbase (__gmpn_preinv_${tmp_fbase}_$tmp_suffix);" >>fat.h
- CPUVEC_SETUP="$CPUVEC_SETUP decided_cpuvec.preinv_$tmp_fbase = __gmpn_preinv_${tmp_fbase}_${tmp_suffix}; \\
-"
- fi
- fi
- done
- done
- done
-
- # Emit CPUVEC_SETUP for this directory
- echo "" >>fat.h
- echo "#define CPUVEC_SETUP_$tmp_suffix \\" >>fat.h
- echo " do { \\" >>fat.h
- echo "$CPUVEC_SETUP } while (0)" >>fat.h
- done
-
- # Emit threshold limits
- echo "" >>fat.h
- for tmp_tn in $fat_thresholds; do
- eval tmp_limit=\$${tmp_tn}_LIMIT
- echo "#define ${tmp_tn}_LIMIT $tmp_limit" >>fat.h
- done
-fi
-
-
-# Normal binary setups.
-#
-
-for tmp_ext in MPN_SUFFIXES; do
- eval found_$tmp_ext=no
-done
-
-for tmp_fn in $gmp_mpn_functions; do
- for tmp_ext in MPN_SUFFIXES; do
- test "$no_create" = yes || rm -f mpn/$tmp_fn.$tmp_ext
- done
-
- # mpn_preinv_divrem_1 might have been provided by divrem_1.asm, likewise
- # mpn_preinv_mod_1 by mod_1.asm.
- case $tmp_fn in
- pre_divrem_1)
- if test "$HAVE_NATIVE_mpn_preinv_divrem_1" = yes; then continue; fi ;;
- pre_mod_1)
- if test "$HAVE_NATIVE_mpn_preinv_mod_1" = yes; then continue; fi ;;
- esac
-
- GMP_MULFUNC_CHOICES
-
- found=no
- for tmp_dir in $path; do
- for tmp_base in $tmp_fn $tmp_mulfunc; do
- for tmp_ext in MPN_SUFFIXES; do
- tmp_file=$srcdir/mpn/$tmp_dir/$tmp_base.$tmp_ext
- if test -f $tmp_file; then
-
- # For a nails build, check if the file supports our nail bits.
- # Generic code always supports all nails.
- #
- # FIXME: When a multi-function file is selected to provide one of
- # the nails-neutral routines, like logops_n for and_n, the
- # PROLOGUE grepping will create HAVE_NATIVE_mpn_<foo> defines for
- # all functions in that file, even if they haven't all been
- # nailified. Not sure what to do about this, it's only really a
- # problem for logops_n, and it's not too terrible to insist those
- # get nailified always.
- #
- if test $GMP_NAIL_BITS != 0 && test $tmp_dir != generic; then
- case $tmp_fn in
- and_n | ior_n | xor_n | andn_n | \
- copyi | copyd | \
- popcount | hamdist | \
- udiv | udiv_w_sdiv | umul | \
- cntlz | invert_limb)
- # these operations are either unaffected by nails or defined
- # to operate on full limbs
- ;;
- *)
- nails=[`sed -n 's/^[ ]*NAILS_SUPPORT(\(.*\))/\1/p' $tmp_file `]
- for n in $nails; do
- case $n in
- *-*)
- n_start=`echo "$n" | sed -n 's/\(.*\)-.*/\1/p'`
- n_end=`echo "$n" | sed -n 's/.*-\(.*\)/\1/p'`
- ;;
- *)
- n_start=$n
- n_end=$n
- ;;
- esac
- if test $GMP_NAIL_BITS -ge $n_start && test $GMP_NAIL_BITS -le $n_end; then
- found=yes
- break
- fi
- done
- if test $found != yes; then
- continue
- fi
- ;;
- esac
- fi
-
- found=yes
- eval found_$tmp_ext=yes
-
- if test $tmp_ext = c; then
- tmp_u='$U'
- else
- tmp_u=
- fi
-
- mpn_objects="$mpn_objects $tmp_fn$tmp_u.lo"
- mpn_objs_in_libgmp="$mpn_objs_in_libgmp mpn/$tmp_fn$tmp_u.lo"
- AC_CONFIG_LINKS(mpn/$tmp_fn.$tmp_ext:mpn/$tmp_dir/$tmp_base.$tmp_ext)
- gmp_srclinks="$gmp_srclinks mpn/$tmp_fn.$tmp_ext"
-
- # Duplicate AC_DEFINEs are harmless, so it doesn't matter
- # that multi-function files get grepped here repeatedly.
- # The PROLOGUE pattern excludes the optional second parameter.
- gmp_ep=[`
- sed -n 's/^[ ]*MULFUNC_PROLOGUE(\(.*\))/\1/p' $tmp_file ;
- sed -n 's/^[ ]*PROLOGUE(\([^,]*\).*)/\1/p' $tmp_file
- `]
- for gmp_tmp in $gmp_ep; do
- AC_DEFINE_UNQUOTED(HAVE_NATIVE_$gmp_tmp)
- eval HAVE_NATIVE_$gmp_tmp=yes
- done
-
- case $tmp_fn in
- sqr_basecase) sqr_basecase_source=$tmp_file ;;
- esac
-
- break
- fi
- done
- if test $found = yes; then break ; fi
- done
- if test $found = yes; then break ; fi
- done
-
- if test $found = no; then
- for tmp_optional in $gmp_mpn_functions_optional; do
- if test $tmp_optional = $tmp_fn; then
- found=yes
- fi
- done
- if test $found = no; then
- AC_MSG_ERROR([no version of $tmp_fn found in path: $path])
- fi
- fi
-done
-
-# All cycle counters are .asm files currently
-if test -n "$SPEED_CYCLECOUNTER_OBJ"; then
- found_asm=yes
-fi
-
-dnl The following list only needs to have templates for those defines which
-dnl are going to be tested by the code, there's no need to have every
-dnl possible mpn routine.
-
-AH_VERBATIM([HAVE_NATIVE],
-[/* Define to 1 each of the following for which a native (ie. CPU specific)
- implementation of the corresponding routine exists. */
-#undef HAVE_NATIVE_mpn_add_n
-#undef HAVE_NATIVE_mpn_add_n_sub_n
-#undef HAVE_NATIVE_mpn_add_nc
-#undef HAVE_NATIVE_mpn_addaddmul_1msb0
-#undef HAVE_NATIVE_mpn_addlsh1_n
-#undef HAVE_NATIVE_mpn_addlsh2_n
-#undef HAVE_NATIVE_mpn_addlsh_n
-#undef HAVE_NATIVE_mpn_addmul_1c
-#undef HAVE_NATIVE_mpn_addmul_2
-#undef HAVE_NATIVE_mpn_addmul_3
-#undef HAVE_NATIVE_mpn_addmul_4
-#undef HAVE_NATIVE_mpn_addmul_5
-#undef HAVE_NATIVE_mpn_addmul_6
-#undef HAVE_NATIVE_mpn_addmul_7
-#undef HAVE_NATIVE_mpn_addmul_8
-#undef HAVE_NATIVE_mpn_and_n
-#undef HAVE_NATIVE_mpn_andn_n
-#undef HAVE_NATIVE_mpn_bdiv_dbm1c
-#undef HAVE_NATIVE_mpn_bdiv_q_1
-#undef HAVE_NATIVE_mpn_pi1_bdiv_q_1
-#undef HAVE_NATIVE_mpn_com
-#undef HAVE_NATIVE_mpn_copyd
-#undef HAVE_NATIVE_mpn_copyi
-#undef HAVE_NATIVE_mpn_divexact_1
-#undef HAVE_NATIVE_mpn_divexact_by3c
-#undef HAVE_NATIVE_mpn_divrem_1
-#undef HAVE_NATIVE_mpn_divrem_1c
-#undef HAVE_NATIVE_mpn_divrem_2
-#undef HAVE_NATIVE_mpn_gcd_1
-#undef HAVE_NATIVE_mpn_hamdist
-#undef HAVE_NATIVE_mpn_invert_limb
-#undef HAVE_NATIVE_mpn_ior_n
-#undef HAVE_NATIVE_mpn_iorn_n
-#undef HAVE_NATIVE_mpn_lshift
-#undef HAVE_NATIVE_mpn_lshiftc
-#undef HAVE_NATIVE_mpn_lshsub_n
-#undef HAVE_NATIVE_mpn_mod_1
-#undef HAVE_NATIVE_mpn_mod_1_1p
-#undef HAVE_NATIVE_mpn_mod_1c
-#undef HAVE_NATIVE_mpn_mod_1s_2p
-#undef HAVE_NATIVE_mpn_mod_1s_4p
-#undef HAVE_NATIVE_mpn_mod_34lsub1
-#undef HAVE_NATIVE_mpn_modexact_1_odd
-#undef HAVE_NATIVE_mpn_modexact_1c_odd
-#undef HAVE_NATIVE_mpn_mul_1
-#undef HAVE_NATIVE_mpn_mul_1c
-#undef HAVE_NATIVE_mpn_mul_2
-#undef HAVE_NATIVE_mpn_mul_3
-#undef HAVE_NATIVE_mpn_mul_4
-#undef HAVE_NATIVE_mpn_mul_basecase
-#undef HAVE_NATIVE_mpn_nand_n
-#undef HAVE_NATIVE_mpn_nior_n
-#undef HAVE_NATIVE_mpn_popcount
-#undef HAVE_NATIVE_mpn_preinv_divrem_1
-#undef HAVE_NATIVE_mpn_preinv_mod_1
-#undef HAVE_NATIVE_mpn_redc_1
-#undef HAVE_NATIVE_mpn_redc_2
-#undef HAVE_NATIVE_mpn_rsblsh1_n
-#undef HAVE_NATIVE_mpn_rsblsh2_n
-#undef HAVE_NATIVE_mpn_rsblsh_n
-#undef HAVE_NATIVE_mpn_rsh1add_n
-#undef HAVE_NATIVE_mpn_rsh1add_nc
-#undef HAVE_NATIVE_mpn_rsh1sub_n
-#undef HAVE_NATIVE_mpn_rsh1sub_nc
-#undef HAVE_NATIVE_mpn_rshift
-#undef HAVE_NATIVE_mpn_sqr_basecase
-#undef HAVE_NATIVE_mpn_sqr_diagonal
-#undef HAVE_NATIVE_mpn_sub_n
-#undef HAVE_NATIVE_mpn_sub_nc
-#undef HAVE_NATIVE_mpn_sublsh1_n
-#undef HAVE_NATIVE_mpn_sublsh2_n
-#undef HAVE_NATIVE_mpn_sublsh_n
-#undef HAVE_NATIVE_mpn_submul_1c
-#undef HAVE_NATIVE_mpn_udiv_qrnnd
-#undef HAVE_NATIVE_mpn_udiv_qrnnd_r
-#undef HAVE_NATIVE_mpn_umul_ppmm
-#undef HAVE_NATIVE_mpn_umul_ppmm_r
-#undef HAVE_NATIVE_mpn_xor_n
-#undef HAVE_NATIVE_mpn_xnor_n])
-
-
-# Don't demand an m4 unless it's actually needed.
-if test $found_asm = yes; then
- GMP_PROG_M4
- GMP_M4_M4WRAP_SPURIOUS
-# else
-# It's unclear why this m4-not-needed stuff was ever done.
-# if test -z "$M4" ; then
-# M4=m4-not-needed
-# fi
-fi
-
-# Only do the GMP_ASM checks if there's a .S or .asm wanting them.
-if test $found_asm = no && test $found_S = no; then
- gmp_asm_syntax_testing=no
-fi
-
-if test "$gmp_asm_syntax_testing" != no; then
- GMP_ASM_TEXT
- GMP_ASM_DATA
- GMP_ASM_LABEL_SUFFIX
- GMP_ASM_GLOBL
- GMP_ASM_GLOBL_ATTR
- GMP_ASM_UNDERSCORE
- GMP_ASM_RODATA
- GMP_ASM_TYPE
- GMP_ASM_SIZE
- GMP_ASM_LSYM_PREFIX
- GMP_ASM_W32
- GMP_ASM_ALIGN_LOG
-
- case $host in
- hppa*-*-*)
- # for both pa32 and pa64
- GMP_INCLUDE_MPN(pa32/pa-defs.m4)
- ;;
- IA64_PATTERN)
- GMP_ASM_IA64_ALIGN_OK
- ;;
- M68K_PATTERN)
- GMP_ASM_M68K_INSTRUCTION
- GMP_ASM_M68K_ADDRESSING
- GMP_ASM_M68K_BRANCHES
- ;;
- [powerpc*-*-* | power[3-9]-*-*])
- GMP_ASM_POWERPC_PIC_ALWAYS
- GMP_ASM_POWERPC_R_REGISTERS
- GMP_INCLUDE_MPN(powerpc32/powerpc-defs.m4)
- case $host in
- *-*-aix*)
- case $ABI in
- 64 | aix64) GMP_INCLUDE_MPN(powerpc64/aix.m4) ;;
- *) GMP_INCLUDE_MPN(powerpc32/aix.m4) ;;
- esac
- ;;
- *-*-linux* | *-*-*bsd*)
- case $ABI in
- mode64) GMP_INCLUDE_MPN(powerpc64/elf.m4) ;;
- mode32 | 32) GMP_INCLUDE_MPN(powerpc32/elf.m4) ;;
- esac
- ;;
- *-*-darwin*)
- case $ABI in
- mode64) GMP_INCLUDE_MPN(powerpc64/darwin.m4) ;;
- mode32 | 32) GMP_INCLUDE_MPN(powerpc32/darwin.m4) ;;
- esac
- ;;
- *)
- # Assume unrecognized operating system is the powerpc eABI
- GMP_INCLUDE_MPN(powerpc32/eabi.m4)
- ;;
- esac
- ;;
- power*-*-aix*)
- GMP_INCLUDE_MPN(powerpc32/aix.m4)
- ;;
- sparcv9*-*-* | ultrasparc*-*-* | sparc64-*-*)
- case $ABI in
- 64)
- GMP_ASM_SPARC_REGISTER
- ;;
- esac
- ;;
- X86_PATTERN | X86_64_PATTERN)
- GMP_ASM_ALIGN_FILL_0x90
- case $ABI in
- 32)
- GMP_INCLUDE_MPN(x86/x86-defs.m4)
- AC_DEFINE(HAVE_HOST_CPU_FAMILY_x86)
- GMP_ASM_COFF_TYPE
- GMP_ASM_X86_GOT_UNDERSCORE
- GMP_ASM_X86_SHLDL_CL
- case $enable_profiling in
- prof | gprof) GMP_ASM_X86_MCOUNT ;;
- esac
- case $host in
- *-*-darwin*)
- GMP_INCLUDE_MPN(x86/darwin.m4) ;;
- esac
- ;;
- 64)
- GMP_INCLUDE_MPN(x86_64/x86_64-defs.m4)
- AC_DEFINE(HAVE_HOST_CPU_FAMILY_x86_64)
- case $host in
- *-*-darwin*)
- GMP_INCLUDE_MPN(x86_64/darwin.m4) ;;
- esac
- ;;
- esac
- ;;
- esac
-fi
-
-# For --enable-minithres, prepend "minithres" to path so that its special
-# gmp-mparam.h will be used.
-if test $enable_minithres = yes; then
- path="minithres $path"
-fi
-
-# Create link for gmp-mparam.h.
-gmp_mparam_source=
-for gmp_mparam_dir in $path; do
- test "$no_create" = yes || rm -f gmp-mparam.h
- tmp_file=$srcdir/mpn/$gmp_mparam_dir/gmp-mparam.h
- if test -f $tmp_file; then
- AC_CONFIG_LINKS(gmp-mparam.h:mpn/$gmp_mparam_dir/gmp-mparam.h)
- gmp_srclinks="$gmp_srclinks gmp-mparam.h"
- gmp_mparam_source=$tmp_file
- break
- fi
-done
-if test -z "$gmp_mparam_source"; then
- AC_MSG_ERROR([no version of gmp-mparam.h found in path: $path])
-fi
-
-# For a helpful message from tune/tuneup.c
-gmp_mparam_suggest=$gmp_mparam_source
-if test "$gmp_mparam_dir" = generic; then
- for i in $path; do break; done
- if test "$i" != generic; then
- gmp_mparam_suggest="new file $srcdir/mpn/$i/gmp-mparam.h"
- fi
-fi
-AC_DEFINE_UNQUOTED(GMP_MPARAM_H_SUGGEST, "$gmp_mparam_source",
-[The gmp-mparam.h file (a string) the tune program should suggest updating.])
-
-
-# Copy any SQR_TOOM2_THRESHOLD from gmp-mparam.h to config.m4.
-# Some versions of sqr_basecase.asm use this.
-# Fat binaries do this on a per-file basis, so skip in that case.
-#
-if test -z "$fat_path"; then
- tmp_gmp_karatsuba_sqr_threshold=`sed -n 's/^#define SQR_TOOM2_THRESHOLD[ ]*\([0-9][0-9]*\).*$/\1/p' $gmp_mparam_source`
- if test -n "$tmp_gmp_karatsuba_sqr_threshold"; then
- GMP_DEFINE_RAW(["define(<SQR_TOOM2_THRESHOLD>,<$tmp_gmp_karatsuba_sqr_threshold>)"])
- fi
-fi
-
-
-# Sizes of some types, needed at preprocessing time.
-#
-# FIXME: The assumption that GMP_LIMB_BITS is 8*sizeof(mp_limb_t) might
-# be slightly rash, but it's true everywhere we know of and ought to be true
-# of any sensible system. In a generic C build, grepping LONG_BIT out of
-# <limits.h> might be an alternative, for maximum portability.
-#
-AC_CHECK_SIZEOF(void *)
-AC_CHECK_SIZEOF(unsigned short)
-AC_CHECK_SIZEOF(unsigned)
-AC_CHECK_SIZEOF(unsigned long)
-AC_CHECK_SIZEOF(mp_limb_t, , GMP_INCLUDE_GMP_H)
-if test "$ac_cv_sizeof_mp_limb_t" = 0; then
- AC_MSG_ERROR([Oops, mp_limb_t doesn't seem to work])
-fi
-AC_SUBST(GMP_LIMB_BITS, `expr 8 \* $ac_cv_sizeof_mp_limb_t`)
-GMP_DEFINE_RAW(["define(<SIZEOF_UNSIGNED>,<$ac_cv_sizeof_unsigned>)"])
-
-# Check compiler limb size matches gmp-mparam.h
-#
-# FIXME: Some of the cycle counter objects in the tune directory depend on
-# the size of ulong, it'd be possible to check that here, though a mismatch
-# probably wouldn't want to be fatal, none of the libgmp assembler code
-# depends on ulong.
-#
-mparam_bits=[`sed -n 's/^#define GMP_LIMB_BITS[ ][ ]*\([0-9]*\).*$/\1/p' $gmp_mparam_source`]
-if test -n "$mparam_bits" && test "$mparam_bits" -ne $GMP_LIMB_BITS; then
- if test "$test_CFLAGS" = set; then
- AC_MSG_ERROR([Oops, mp_limb_t is $GMP_LIMB_BITS bits, but the assembler code
-in this configuration expects $mparam_bits bits.
-You appear to have set \$CFLAGS, perhaps you also need to tell GMP the
-intended ABI, see "ABI and ISA" in the manual.])
- else
- AC_MSG_ERROR([Oops, mp_limb_t is $GMP_LIMB_BITS bits, but the assembler code
-in this configuration expects $mparam_bits bits.])
- fi
-fi
-
-GMP_DEFINE_RAW(["define(<GMP_LIMB_BITS>,$GMP_LIMB_BITS)"])
-GMP_DEFINE_RAW(["define(<GMP_NAIL_BITS>,$GMP_NAIL_BITS)"])
-GMP_DEFINE_RAW(["define(<GMP_NUMB_BITS>,eval(GMP_LIMB_BITS-GMP_NAIL_BITS))"])
-
-
-# Exclude the mpn random functions from mpbsd since that would drag in the
-# top-level rand things, all of which are unnecessary for libmp. There's
-# other unnecessary objects too actually, if we could be bothered figuring
-# out exactly which they are.
-#
-mpn_objs_in_libmp=
-for i in $mpn_objs_in_libgmp; do
- case $i in
- *random*) ;;
- *) mpn_objs_in_libmp="$mpn_objs_in_libmp $i" ;;
- esac
-done
-AC_SUBST(mpn_objs_in_libmp)
-
-AC_SUBST(mpn_objects)
-AC_SUBST(mpn_objs_in_libgmp)
-AC_SUBST(gmp_srclinks)
-
-
-# A recompiled sqr_basecase for use in the tune program, if necessary.
-TUNE_SQR_OBJ=
-test -d tune || mkdir tune
-case $sqr_basecase_source in
- *.asm)
- sqr_max=[`sed -n 's/^def...(SQR_TOOM2_THRESHOLD_MAX, *\([0-9]*\))/\1/p' $sqr_basecase_source`]
- if test -n "$sqr_max"; then
- TUNE_SQR_OBJ=sqr_asm.o
- AC_DEFINE_UNQUOTED(TUNE_SQR_TOOM2_MAX,$sqr_max,
- [Maximum size the tune program can test for SQR_TOOM2_THRESHOLD])
- fi
- cat >tune/sqr_basecase.c <<EOF
-/* not sure that an empty file can compile, so put in a dummy */
-int sqr_basecase_dummy;
-EOF
- ;;
- *.c)
- TUNE_SQR_OBJ=
- AC_DEFINE(TUNE_SQR_TOOM2_MAX,SQR_TOOM2_MAX_GENERIC)
- cat >tune/sqr_basecase.c <<EOF
-#define TUNE_PROGRAM_BUILD 1
-#define TUNE_PROGRAM_BUILD_SQR 1
-#include "mpn/sqr_basecase.c"
-EOF
- ;;
-esac
-AC_SUBST(TUNE_SQR_OBJ)
-
-
-# Configs for demos/pexpr.c.
-#
-AC_CONFIG_FILES(demos/pexpr-config.h:demos/pexpr-config-h.in)
-GMP_SUBST_CHECK_FUNCS(clock, cputime, getrusage, gettimeofday, sigaction, sigaltstack, sigstack)
-GMP_SUBST_CHECK_HEADERS(sys/resource.h)
-AC_CHECK_TYPES([stack_t], HAVE_STACK_T_01=1, HAVE_STACK_T_01=0,
- [#include <signal.h>])
-AC_SUBST(HAVE_STACK_T_01)
-
-# Configs for demos/calc directory
-#
-# AC_SUBST+AC_CONFIG_FILES is used for calc-config.h, rather than AC_DEFINE+
-# AC_CONFIG_HEADERS, since with the latter automake (1.8) will then put the
-# directory (ie. demos/calc) into $(DEFAULT_INCLUDES) for every Makefile.in,
-# which would look very strange.
-#
-# -lcurses is required by libreadline. On a typical SVR4 style system this
-# normally doesn't have to be given explicitly, since libreadline.so will
-# have a NEEDED record for it. But if someone for some reason is using only
-# a static libreadline.a then we must give -lcurses. Readline (as of
-# version 4.3) doesn't use libtool, so we can't rely on a .la to cover
-# necessary dependencies.
-#
-# On a couple of systems we've seen libreadline available, but the headers
-# not in the default include path, so check for readline/readline.h. We've
-# also seen readline/history.h missing, not sure if that's just a broken
-# install or a very old version, but check that too.
-#
-AC_CONFIG_FILES(demos/calc/calc-config.h:demos/calc/calc-config-h.in)
-LIBCURSES=
-if test $with_readline != no; then
- AC_CHECK_LIB(ncurses, tputs, [LIBCURSES=-lncurses],
- [AC_CHECK_LIB(curses, tputs, [LIBCURSES=-lcurses])])
-fi
-AC_SUBST(LIBCURSES)
-use_readline=$with_readline
-if test $with_readline = detect; then
- use_readline=no
- AC_CHECK_LIB(readline, readline,
- [AC_CHECK_HEADER(readline/readline.h,
- [AC_CHECK_HEADER(readline/history.h, use_readline=yes)])],
- , $LIBCURSES)
- AC_MSG_CHECKING(readline detected)
- AC_MSG_RESULT($use_readline)
-fi
-if test $use_readline = yes; then
- AC_SUBST(WITH_READLINE_01, 1)
- AC_SUBST(LIBREADLINE, -lreadline)
-else
- WITH_READLINE_01=0
-fi
-AC_PROG_YACC
-AM_PROG_LEX
-
-# Configs for demos/expr directory
-#
-# Libtool already runs an AC_CHECK_TOOL for ranlib, but we give
-# AC_PROG_RANLIB anyway since automake is supposed to complain if it's not
-# called. (Automake 1.8.4 doesn't, at least not when the only library is in
-# an EXTRA_LIBRARIES.)
-#
-AC_PROG_RANLIB
-
-
-# Create config.m4.
-GMP_FINISH
-
-# Create Makefiles
-# FIXME: Upcoming version of autoconf/automake may not like broken lines.
-# Right now automake isn't accepting the new AC_CONFIG_FILES scheme.
-
-AC_OUTPUT(Makefile \
- mpbsd/Makefile mpf/Makefile mpn/Makefile mpq/Makefile \
- mpz/Makefile printf/Makefile scanf/Makefile cxx/Makefile \
- tests/Makefile tests/devel/Makefile tests/mpbsd/Makefile \
- tests/mpf/Makefile tests/mpn/Makefile tests/mpq/Makefile \
- tests/mpz/Makefile tests/rand/Makefile tests/misc/Makefile \
- tests/cxx/Makefile \
- doc/Makefile tune/Makefile \
- demos/Makefile demos/calc/Makefile demos/expr/Makefile \
- gmp.h:gmp-h.in mp.h:mp-h.in)
## Process this file with automake to generate Makefile.in
-# Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+# Copyright 2001, 2002, 2003, 2012 Free Software Foundation, Inc.
#
# This file is part of the GNU MP Library.
#
endif
libcxx_la_SOURCES = \
- isfuns.cc ismpf.cc ismpq.cc ismpz.cc ismpznw.cc \
+ isfuns.cc ismpf.cc ismpq.cc ismpz.cc ismpznw.cc limits.cc \
osdoprnti.cc osfuns.cc osmpf.cc osmpq.cc osmpz.cc
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@SET_MAKE@
-# Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+# Copyright 2001, 2002, 2003, 2012 Free Software Foundation, Inc.
#
# This file is part of the GNU MP Library.
#
# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
VPATH = @srcdir@
+am__make_dryrun = \
+ { \
+ am__dry=no; \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
+ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+ *) \
+ for am__flg in $$MAKEFLAGS; do \
+ case $$am__flg in \
+ *=*|--*) ;; \
+ *n*) am__dry=yes; break;; \
+ esac; \
+ done;; \
+ esac; \
+ test $$am__dry = yes; \
+ }
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
subdir = cxx
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/configure.in
+ $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
LTLIBRARIES = $(noinst_LTLIBRARIES)
libcxx_la_LIBADD =
am_libcxx_la_OBJECTS = isfuns.lo ismpf.lo ismpq.lo ismpz.lo ismpznw.lo \
- osdoprnti.lo osfuns.lo osmpf.lo osmpq.lo osmpz.lo
+ limits.lo osdoprnti.lo osfuns.lo osmpf.lo osmpq.lo osmpz.lo
libcxx_la_OBJECTS = $(am_libcxx_la_OBJECTS)
@WANT_CXX_TRUE@am_libcxx_la_rpath =
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
$(LDFLAGS) -o $@
SOURCES = $(libcxx_la_SOURCES)
DIST_SOURCES = $(libcxx_la_SOURCES)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
STRIP = @STRIP@
TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
U_FOR_BUILD = @U_FOR_BUILD@
VERSION = @VERSION@
WITH_READLINE_01 = @WITH_READLINE_01@
mkdir_p = @mkdir_p@
mpn_objects = @mpn_objects@
mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
INCLUDES = -D__GMP_WITHIN_GMPXX -I$(top_srcdir)
@WANT_CXX_TRUE@noinst_LTLIBRARIES = libcxx.la
libcxx_la_SOURCES = \
- isfuns.cc ismpf.cc ismpq.cc ismpz.cc ismpznw.cc \
+ isfuns.cc ismpf.cc ismpq.cc ismpz.cc ismpznw.cc limits.cc \
osdoprnti.cc osfuns.cc osmpf.cc osmpq.cc osmpz.cc
all: all-am
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-libcxx.la: $(libcxx_la_OBJECTS) $(libcxx_la_DEPENDENCIES)
+libcxx.la: $(libcxx_la_OBJECTS) $(libcxx_la_DEPENDENCIES) $(EXTRA_libcxx_la_DEPENDENCIES)
$(CXXLINK) $(am_libcxx_la_rpath) $(libcxx_la_OBJECTS) $(libcxx_la_LIBADD) $(LIBS)
mostlyclean-compile:
distclean-compile:
-rm -f *.tab.c
-$(top_builddir)/ansi2knr:
- $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
- -test "$U" = "" || rm -f *_.c
.cc.o:
$(CXXCOMPILE) -c -o $@ $<
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
mostlyclean: mostlyclean-am
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
uninstall-am:
-.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+.MAKE: install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
clean-libtool clean-noinstLTLIBRARIES ctags distclean \
install-pdf install-pdf-am install-ps install-ps-am \
install-strip installcheck installcheck-am installdirs \
maintainer-clean maintainer-clean-generic mostlyclean \
- mostlyclean-compile mostlyclean-generic mostlyclean-kr \
- mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
- uninstall-am
+ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+ pdf pdf-am ps ps-am tags uninstall uninstall-am
# Tell versions [3.59,3.63) of GNU make to not export all variables.
if (i.good()) // last character read was non-numeric
i.putback(c);
else if (i.eof() && ok) // stopped just before eof
- i.clear();
+ i.clear(ios::eofbit);
if (ok)
ASSERT_NOCARRY (mpf_set_str(f, s.c_str(), base)); // extract the number
if (i.good())
i.putback(c);
else if (i.eof())
- i.clear();
+ i.clear(ios::eofbit);
}
return i;
if (i.good()) // last character read was non-numeric
i.putback(c);
else if (i.eof() && (ok || zero)) // stopped just before eof
- i.clear();
+ i.clear(ios::eofbit);
if (ok)
ASSERT_NOCARRY (mpz_set_str (z, s.c_str(), base)); // extract the number
--- /dev/null
+/* instantiation of numeric_limits specializations.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmpxx.h"
+
+namespace std {
+#define GMPXX_INSTANTIATE_LIMITS(T) \
+ const bool numeric_limits<T>::is_specialized; \
+ const int numeric_limits<T>::digits; \
+ const int numeric_limits<T>::digits10; \
+ const int numeric_limits<T>::max_digits10; \
+ const bool numeric_limits<T>::is_signed; \
+ const bool numeric_limits<T>::is_integer; \
+ const bool numeric_limits<T>::is_exact; \
+ const int numeric_limits<T>::radix; \
+ const int numeric_limits<T>::min_exponent; \
+ const int numeric_limits<T>::min_exponent10; \
+ const int numeric_limits<T>::max_exponent; \
+ const int numeric_limits<T>::max_exponent10; \
+ const bool numeric_limits<T>::has_infinity; \
+ const bool numeric_limits<T>::has_quiet_NaN; \
+ const bool numeric_limits<T>::has_signaling_NaN; \
+ const float_denorm_style numeric_limits<T>::has_denorm; \
+ const bool numeric_limits<T>::has_denorm_loss; \
+ const bool numeric_limits<T>::is_iec559; \
+ const bool numeric_limits<T>::is_bounded; \
+ const bool numeric_limits<T>::is_modulo; \
+ const bool numeric_limits<T>::traps; \
+ const bool numeric_limits<T>::tinyness_before; \
+ const float_round_style numeric_limits<T>::round_style
+
+ GMPXX_INSTANTIATE_LIMITS(mpz_class);
+ GMPXX_INSTANTIATE_LIMITS(mpq_class);
+ GMPXX_INSTANTIATE_LIMITS(mpf_class);
+}
along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#include <iostream>
-#include <cstdarg> /* for va_list and hence doprnt_funs_t */
-#include <cstring> /* for strlen */
+#include <stdarg.h> /* for va_list and hence doprnt_funs_t */
+#include <string.h> /* for strlen */
#include "gmp.h"
#include "gmp-impl.h"
## Process this file with automake to generate Makefile.in
-# Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2002, 2012 Free Software Foundation, Inc.
#
# This file is part of the GNU MP Library.
#
SUBDIRS = calc expr
-EXTRA_DIST = perl
+EXTRA_DIST = perl primes.h
INCLUDES = -I$(top_srcdir)
LDADD = $(top_builddir)/libgmp.la
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@SET_MAKE@
-# Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2002, 2012 Free Software Foundation, Inc.
#
# This file is part of the GNU MP Library.
#
# You should have received a copy of the GNU Lesser General Public License
# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
VPATH = @srcdir@
+am__make_dryrun = \
+ { \
+ am__dry=no; \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
+ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+ *) \
+ for am__flg in $$MAKEFLAGS; do \
+ case $$am__flg in \
+ *=*|--*) ;; \
+ *n*) am__dry=yes; break;; \
+ esac; \
+ done;; \
+ esac; \
+ test $$am__dry = yes; \
+ }
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
EXTRA_PROGRAMS = factorize$(EXEEXT) isprime$(EXEEXT) pexpr$(EXEEXT) \
primes$(EXEEXT) qcn$(EXEEXT)
subdir = demos
$(srcdir)/pexpr-config-h.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/configure.in
+ $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_CLEAN_FILES = pexpr-config.h
CONFIG_CLEAN_VPATH_FILES =
factorize_SOURCES = factorize.c
-factorize_OBJECTS = factorize$U.$(OBJEXT)
+factorize_OBJECTS = factorize.$(OBJEXT)
factorize_LDADD = $(LDADD)
factorize_DEPENDENCIES = $(top_builddir)/libgmp.la
isprime_SOURCES = isprime.c
-isprime_OBJECTS = isprime$U.$(OBJEXT)
+isprime_OBJECTS = isprime.$(OBJEXT)
isprime_LDADD = $(LDADD)
isprime_DEPENDENCIES = $(top_builddir)/libgmp.la
pexpr_SOURCES = pexpr.c
-pexpr_OBJECTS = pexpr$U.$(OBJEXT)
+pexpr_OBJECTS = pexpr.$(OBJEXT)
pexpr_LDADD = $(LDADD)
pexpr_DEPENDENCIES = $(top_builddir)/libgmp.la
primes_SOURCES = primes.c
-primes_OBJECTS = primes$U.$(OBJEXT)
+primes_OBJECTS = primes.$(OBJEXT)
am__DEPENDENCIES_1 =
primes_DEPENDENCIES = $(LDADD) $(am__DEPENDENCIES_1)
qcn_SOURCES = qcn.c
-qcn_OBJECTS = qcn$U.$(OBJEXT)
+qcn_OBJECTS = qcn.$(OBJEXT)
qcn_DEPENDENCIES = $(LDADD) $(am__DEPENDENCIES_1)
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
depcomp =
install-pdf-recursive install-ps-recursive install-recursive \
installcheck-recursive installdirs-recursive pdf-recursive \
ps-recursive uninstall-recursive
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
distclean-recursive maintainer-clean-recursive
AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
STRIP = @STRIP@
TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
U_FOR_BUILD = @U_FOR_BUILD@
VERSION = @VERSION@
WITH_READLINE_01 = @WITH_READLINE_01@
mkdir_p = @mkdir_p@
mpn_objects = @mpn_objects@
mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
SUBDIRS = calc expr
-EXTRA_DIST = perl
+EXTRA_DIST = perl primes.h
INCLUDES = -I$(top_srcdir)
LDADD = $(top_builddir)/libgmp.la
qcn_LDADD = $(LDADD) $(LIBM)
$(am__aclocal_m4_deps):
pexpr-config.h: $(top_builddir)/config.status $(srcdir)/pexpr-config-h.in
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
-factorize$(EXEEXT): $(factorize_OBJECTS) $(factorize_DEPENDENCIES)
+factorize$(EXEEXT): $(factorize_OBJECTS) $(factorize_DEPENDENCIES) $(EXTRA_factorize_DEPENDENCIES)
@rm -f factorize$(EXEEXT)
$(LINK) $(factorize_OBJECTS) $(factorize_LDADD) $(LIBS)
-isprime$(EXEEXT): $(isprime_OBJECTS) $(isprime_DEPENDENCIES)
+isprime$(EXEEXT): $(isprime_OBJECTS) $(isprime_DEPENDENCIES) $(EXTRA_isprime_DEPENDENCIES)
@rm -f isprime$(EXEEXT)
$(LINK) $(isprime_OBJECTS) $(isprime_LDADD) $(LIBS)
-pexpr$(EXEEXT): $(pexpr_OBJECTS) $(pexpr_DEPENDENCIES)
+pexpr$(EXEEXT): $(pexpr_OBJECTS) $(pexpr_DEPENDENCIES) $(EXTRA_pexpr_DEPENDENCIES)
@rm -f pexpr$(EXEEXT)
$(LINK) $(pexpr_OBJECTS) $(pexpr_LDADD) $(LIBS)
-primes$(EXEEXT): $(primes_OBJECTS) $(primes_DEPENDENCIES)
+primes$(EXEEXT): $(primes_OBJECTS) $(primes_DEPENDENCIES) $(EXTRA_primes_DEPENDENCIES)
@rm -f primes$(EXEEXT)
$(LINK) $(primes_OBJECTS) $(primes_LDADD) $(LIBS)
-qcn$(EXEEXT): $(qcn_OBJECTS) $(qcn_DEPENDENCIES)
+qcn$(EXEEXT): $(qcn_OBJECTS) $(qcn_DEPENDENCIES) $(EXTRA_qcn_DEPENDENCIES)
@rm -f qcn$(EXEEXT)
$(LINK) $(qcn_OBJECTS) $(qcn_LDADD) $(LIBS)
distclean-compile:
-rm -f *.tab.c
-$(top_builddir)/ansi2knr:
- $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
- -test "$U" = "" || rm -f *_.c
.c.o:
$(COMPILE) -c $<
.c.lo:
$(LTCOMPILE) -c -o $@ $<
-factorize_.c: factorize.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/factorize.c; then echo $(srcdir)/factorize.c; else echo factorize.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-isprime_.c: isprime.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/isprime.c; then echo $(srcdir)/isprime.c; else echo isprime.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-pexpr_.c: pexpr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pexpr.c; then echo $(srcdir)/pexpr.c; else echo pexpr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-primes_.c: primes.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/primes.c; then echo $(srcdir)/primes.c; else echo primes.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-qcn_.c: qcn.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/qcn.c; then echo $(srcdir)/qcn.c; else echo qcn.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-factorize_.$(OBJEXT) factorize_.lo isprime_.$(OBJEXT) isprime_.lo \
-pexpr_.$(OBJEXT) pexpr_.lo primes_.$(OBJEXT) primes_.lo qcn_.$(OBJEXT) \
-qcn_.lo : $(ANSI2KNR)
mostlyclean-libtool:
-rm -f *.lo
done
@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
if test "$$subdir" = .; then :; else \
- test -d "$(distdir)/$$subdir" \
- || $(MKDIR_P) "$(distdir)/$$subdir" \
- || exit 1; \
- fi; \
- done
- @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
- if test "$$subdir" = .; then :; else \
+ $(am__make_dryrun) \
+ || test -d "$(distdir)/$$subdir" \
+ || $(MKDIR_P) "$(distdir)/$$subdir" \
+ || exit 1; \
dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
$(am__relativize); \
new_distdir=$$reldir; \
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
mostlyclean: mostlyclean-recursive
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-recursive
uninstall-am:
-.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) \
- $(top_builddir)/ansi2knr ctags-recursive install-am \
- install-strip tags-recursive
+.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) ctags-recursive \
+ install-am install-strip tags-recursive
.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
all all-am check check-am clean clean-generic clean-libtool \
install-pdf-am install-ps install-ps-am install-strip \
installcheck installcheck-am installdirs installdirs-am \
maintainer-clean maintainer-clean-generic mostlyclean \
- mostlyclean-compile mostlyclean-generic mostlyclean-kr \
- mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \
- uninstall uninstall-am
+ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+ pdf pdf-am ps ps-am tags tags-recursive uninstall uninstall-am
allprogs: $(EXTRA_PROGRAMS)
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# You should have received a copy of the GNU Lesser General Public License
# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
VPATH = @srcdir@
+am__make_dryrun = \
+ { \
+ am__dry=no; \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
+ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+ *) \
+ for am__flg in $$MAKEFLAGS; do \
+ case $$am__flg in \
+ *=*|--*) ;; \
+ *n*) am__dry=yes; break;; \
+ esac; \
+ done;; \
+ esac; \
+ test $$am__dry = yes; \
+ }
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
EXTRA_PROGRAMS = calc$(EXEEXT)
subdir = demos/calc
DIST_COMMON = README $(srcdir)/Makefile.am $(srcdir)/Makefile.in \
$(srcdir)/calc-config-h.in calc.c calc.h calclex.c
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/configure.in
+ $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/config.h
CONFIG_CLEAN_FILES = calc-config.h
CONFIG_CLEAN_VPATH_FILES =
-am_calc_OBJECTS = calc$U.$(OBJEXT) calclex$U.$(OBJEXT) \
- calcread$U.$(OBJEXT)
+am_calc_OBJECTS = calc.$(OBJEXT) calclex.$(OBJEXT) calcread.$(OBJEXT)
calc_OBJECTS = $(am_calc_OBJECTS)
calc_LDADD = $(LDADD)
am__DEPENDENCIES_1 =
--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
$(LDFLAGS) -o $@
@MAINTAINER_MODE_FALSE@am__skiplex = test -f $@ ||
-LEXCOMPILE = $(LEX) $(LFLAGS) $(AM_LFLAGS)
+LEXCOMPILE = $(LEX) $(AM_LFLAGS) $(LFLAGS)
LTLEXCOMPILE = $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
- --mode=compile $(LEX) $(LFLAGS) $(AM_LFLAGS)
+ --mode=compile $(LEX) $(AM_LFLAGS) $(LFLAGS)
YLWRAP = $(top_srcdir)/ylwrap
@MAINTAINER_MODE_FALSE@am__skipyacc = test -f $@ ||
-YACCCOMPILE = $(YACC) $(YFLAGS) $(AM_YFLAGS)
+YACCCOMPILE = $(YACC) $(AM_YFLAGS) $(YFLAGS)
LTYACCCOMPILE = $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
- --mode=compile $(YACC) $(YFLAGS) $(AM_YFLAGS)
+ --mode=compile $(YACC) $(AM_YFLAGS) $(YFLAGS)
SOURCES = $(calc_SOURCES)
DIST_SOURCES = $(calc_SOURCES)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
STRIP = @STRIP@
TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
U_FOR_BUILD = @U_FOR_BUILD@
VERSION = @VERSION@
WITH_READLINE_01 = @WITH_READLINE_01@
mkdir_p = @mkdir_p@
mpn_objects = @mpn_objects@
mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
calc-config.h: $(top_builddir)/config.status $(srcdir)/calc-config-h.in
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@
calc.h: calc.c
- @if test ! -f $@; then \
- rm -f calc.c; \
- $(MAKE) $(AM_MAKEFLAGS) calc.c; \
- else :; fi
-calc$(EXEEXT): $(calc_OBJECTS) $(calc_DEPENDENCIES)
+ @if test ! -f $@; then rm -f calc.c; else :; fi
+ @if test ! -f $@; then $(MAKE) $(AM_MAKEFLAGS) calc.c; else :; fi
+calc$(EXEEXT): $(calc_OBJECTS) $(calc_DEPENDENCIES) $(EXTRA_calc_DEPENDENCIES)
@rm -f calc$(EXEEXT)
$(LINK) $(calc_OBJECTS) $(calc_LDADD) $(LIBS)
distclean-compile:
-rm -f *.tab.c
-$(top_builddir)/ansi2knr:
- $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
- -test "$U" = "" || rm -f *_.c
.c.o:
$(COMPILE) -c $<
.c.lo:
$(LTCOMPILE) -c -o $@ $<
-calc_.c: calc.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/calc.c; then echo $(srcdir)/calc.c; else echo calc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-calclex_.c: calclex.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/calclex.c; then echo $(srcdir)/calclex.c; else echo calclex.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-calcread_.c: calcread.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/calcread.c; then echo $(srcdir)/calcread.c; else echo calcread.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-calc_.$(OBJEXT) calc_.lo calclex_.$(OBJEXT) calclex_.lo \
-calcread_.$(OBJEXT) calcread_.lo : $(ANSI2KNR)
.l.c:
$(am__skiplex) $(SHELL) $(YLWRAP) $< $(LEX_OUTPUT_ROOT).c $@ -- $(LEXCOMPILE)
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
mostlyclean: mostlyclean-am
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
uninstall-am:
-.MAKE: $(top_builddir)/ansi2knr all check install install-am \
- install-strip
+.MAKE: all check install install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
clean-libtool ctags distclean distclean-compile \
install-pdf-am install-ps install-ps-am install-strip \
installcheck installcheck-am installdirs maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
- mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
- pdf-am ps ps-am tags uninstall uninstall-am
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags uninstall uninstall-am
allprogs: $(EXTRA_PROGRAMS)
-/* A Bison parser, made by GNU Bison 2.5. */
+/* A Bison parser, made by GNU Bison 2.7.12-4996. */
/* Bison implementation for Yacc-like parsers in C
- Copyright (C) 1984, 1989-1990, 2000-2011 Free Software Foundation, Inc.
+ Copyright (C) 1984, 1989-1990, 2000-2013 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#define YYBISON 1
/* Bison version. */
-#define YYBISON_VERSION "2.5"
+#define YYBISON_VERSION "2.7.12-4996"
/* Skeleton name. */
#define YYSKELETON_NAME "yacc.c"
/* Pull parsers. */
#define YYPULL 1
-/* Using locations. */
-#define YYLSP_NEEDED 0
/* Copy the first part of user declarations. */
-
-/* Line 268 of yacc.c */
+/* Line 371 of yacc.c */
#line 1 "calc.y"
/* A simple integer desk calculator using yacc and gmp.
}
+/* Line 371 of yacc.c */
+#line 209 "calc.c"
-/* Line 268 of yacc.c */
-#line 213 "calc.c"
-
-/* Enabling traces. */
-#ifndef YYDEBUG
-# define YYDEBUG 0
-#endif
+# ifndef YY_NULL
+# if defined __cplusplus && 201103L <= __cplusplus
+# define YY_NULL nullptr
+# else
+# define YY_NULL 0
+# endif
+# endif
/* Enabling verbose error messages. */
#ifdef YYERROR_VERBOSE
# define YYERROR_VERBOSE 0
#endif
-/* Enabling the token table. */
-#ifndef YYTOKEN_TABLE
-# define YYTOKEN_TABLE 0
+/* In a future release of Bison, this section will be replaced
+ by #include "y.tab.h". */
+#ifndef YY_YY_Y_TAB_H_INCLUDED
+# define YY_YY_Y_TAB_H_INCLUDED
+/* Enabling traces. */
+#ifndef YYDEBUG
+# define YYDEBUG 0
+#endif
+#if YYDEBUG
+extern int yydebug;
#endif
-
/* Tokens. */
#ifndef YYTOKENTYPE
-
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
typedef union YYSTYPE
{
-
-/* Line 293 of yacc.c */
+/* Line 387 of yacc.c */
#line 142 "calc.y"
char *str;
int var;
-
-/* Line 293 of yacc.c */
-#line 316 "calc.c"
+/* Line 387 of yacc.c */
+#line 318 "calc.c"
} YYSTYPE;
# define YYSTYPE_IS_TRIVIAL 1
# define yystype YYSTYPE /* obsolescent; will be withdrawn */
# define YYSTYPE_IS_DECLARED 1
#endif
+extern YYSTYPE yylval;
-/* Copy the second part of user declarations. */
+#ifdef YYPARSE_PARAM
+#if defined __STDC__ || defined __cplusplus
+int yyparse (void *YYPARSE_PARAM);
+#else
+int yyparse ();
+#endif
+#else /* ! YYPARSE_PARAM */
+#if defined __STDC__ || defined __cplusplus
+int yyparse (void);
+#else
+int yyparse ();
+#endif
+#endif /* ! YYPARSE_PARAM */
+
+#endif /* !YY_YY_Y_TAB_H_INCLUDED */
+/* Copy the second part of user declarations. */
-/* Line 343 of yacc.c */
-#line 328 "calc.c"
+/* Line 390 of yacc.c */
+#line 346 "calc.c"
#ifdef short
# undef short
# if defined YYENABLE_NLS && YYENABLE_NLS
# if ENABLE_NLS
# include <libintl.h> /* INFRINGES ON USER NAME SPACE */
-# define YY_(msgid) dgettext ("bison-runtime", msgid)
+# define YY_(Msgid) dgettext ("bison-runtime", Msgid)
# endif
# endif
# ifndef YY_
-# define YY_(msgid) msgid
+# define YY_(Msgid) Msgid
+# endif
+#endif
+
+#ifndef __attribute__
+/* This feature is available in gcc versions 2.5 and later. */
+# if (! defined __GNUC__ || __GNUC__ < 2 \
+ || (__GNUC__ == 2 && __GNUC_MINOR__ < 5))
+# define __attribute__(Spec) /* empty */
# endif
#endif
/* Suppress unused-variable warnings by "using" E. */
#if ! defined lint || defined __GNUC__
-# define YYUSE(e) ((void) (e))
+# define YYUSE(E) ((void) (E))
#else
-# define YYUSE(e) /* empty */
+# define YYUSE(E) /* empty */
#endif
+
/* Identity function, used to suppress warnings about constant conditions. */
#ifndef lint
-# define YYID(n) (n)
+# define YYID(N) (N)
#else
#if (defined __STDC__ || defined __C99__FUNC__ \
|| defined __cplusplus || defined _MSC_VER)
# if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS && (defined __STDC__ || defined __C99__FUNC__ \
|| defined __cplusplus || defined _MSC_VER)
# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */
+ /* Use EXIT_SUCCESS as a witness for stdlib.h. */
# ifndef EXIT_SUCCESS
# define EXIT_SUCCESS 0
# endif
#endif
#if defined YYCOPY_NEEDED && YYCOPY_NEEDED
-/* Copy COUNT objects from FROM to TO. The source and destination do
+/* Copy COUNT objects from SRC to DST. The source and destination do
not overlap. */
# ifndef YYCOPY
# if defined __GNUC__ && 1 < __GNUC__
-# define YYCOPY(To, From, Count) \
- __builtin_memcpy (To, From, (Count) * sizeof (*(From)))
+# define YYCOPY(Dst, Src, Count) \
+ __builtin_memcpy (Dst, Src, (Count) * sizeof (*(Src)))
# else
-# define YYCOPY(To, From, Count) \
- do \
- { \
- YYSIZE_T yyi; \
- for (yyi = 0; yyi < (Count); yyi++) \
- (To)[yyi] = (From)[yyi]; \
- } \
+# define YYCOPY(Dst, Src, Count) \
+ do \
+ { \
+ YYSIZE_T yyi; \
+ for (yyi = 0; yyi < (Count); yyi++) \
+ (Dst)[yyi] = (Src)[yyi]; \
+ } \
while (YYID (0))
# endif
# endif
};
#endif
-#if YYDEBUG || YYERROR_VERBOSE || YYTOKEN_TABLE
+#if YYDEBUG || YYERROR_VERBOSE || 0
/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM.
First, the terminals, then, starting at YYNTOKENS, nonterminals. */
static const char *const yytname[] =
"POWM", "ROOT", "SQRT", "NUMBER", "VARIABLE", "LOR", "LAND", "'<'",
"'>'", "GE", "LE", "NE", "EQ", "RSHIFT", "LSHIFT", "'+'", "'-'", "'*'",
"'/'", "'%'", "UMINUS", "'^'", "'!'", "'='", "'('", "')'", "','",
- "$accept", "top", "statements", "statement", "e", "gcdlist", "lcmlist", 0
+ "$accept", "top", "statements", "statement", "e", "gcdlist", "lcmlist", YY_NULL
};
#endif
0, 59, 60
};
-#define yypact_value_is_default(yystate) \
- ((yystate) == (-39))
+#define yypact_value_is_default(Yystate) \
+ (!!((Yystate) == (-39)))
-#define yytable_value_is_error(yytable_value) \
- ((yytable_value) == (-8))
+#define yytable_value_is_error(Yytable_value) \
+ (!!((Yytable_value) == (-8)))
static const yytype_int8 yycheck[] =
{
#define YYRECOVERING() (!!yyerrstatus)
-#define YYBACKUP(Token, Value) \
-do \
- if (yychar == YYEMPTY && yylen == 1) \
- { \
- yychar = (Token); \
- yylval = (Value); \
- YYPOPSTACK (1); \
- goto yybackup; \
- } \
- else \
- { \
+#define YYBACKUP(Token, Value) \
+do \
+ if (yychar == YYEMPTY) \
+ { \
+ yychar = (Token); \
+ yylval = (Value); \
+ YYPOPSTACK (yylen); \
+ yystate = *yyssp; \
+ goto yybackup; \
+ } \
+ else \
+ { \
yyerror (YY_("syntax error: cannot back up")); \
YYERROR; \
} \
while (YYID (0))
-
+/* Error token number */
#define YYTERROR 1
#define YYERRCODE 256
-/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N].
- If N is 0, then set CURRENT to the empty location which ends
- the previous symbol: RHS[0] (always defined). */
-
-#define YYRHSLOC(Rhs, K) ((Rhs)[K])
-#ifndef YYLLOC_DEFAULT
-# define YYLLOC_DEFAULT(Current, Rhs, N) \
- do \
- if (YYID (N)) \
- { \
- (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \
- (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \
- (Current).last_line = YYRHSLOC (Rhs, N).last_line; \
- (Current).last_column = YYRHSLOC (Rhs, N).last_column; \
- } \
- else \
- { \
- (Current).first_line = (Current).last_line = \
- YYRHSLOC (Rhs, 0).last_line; \
- (Current).first_column = (Current).last_column = \
- YYRHSLOC (Rhs, 0).last_column; \
- } \
- while (YYID (0))
-#endif
-
-
/* This macro is provided for backward compatibility. */
-
#ifndef YY_LOCATION_PRINT
# define YY_LOCATION_PRINT(File, Loc) ((void) 0)
#endif
/* YYLEX -- calling `yylex' with the right arguments. */
-
#ifdef YYLEX_PARAM
# define YYLEX yylex (YYLEX_PARAM)
#else
YYSTYPE const * const yyvaluep;
#endif
{
+ FILE *yyo = yyoutput;
+ YYUSE (yyo);
if (!yyvaluep)
return;
# ifdef YYPRINT
# else
YYUSE (yyoutput);
# endif
- switch (yytype)
- {
- default:
- break;
- }
+ YYUSE (yytype);
}
yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg,
yytype_int16 *yyssp, int yytoken)
{
- YYSIZE_T yysize0 = yytnamerr (0, yytname[yytoken]);
+ YYSIZE_T yysize0 = yytnamerr (YY_NULL, yytname[yytoken]);
YYSIZE_T yysize = yysize0;
- YYSIZE_T yysize1;
enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 };
/* Internationalized format string. */
- const char *yyformat = 0;
+ const char *yyformat = YY_NULL;
/* Arguments of yyformat. */
char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM];
/* Number of reported tokens (one for the "unexpected", one per
break;
}
yyarg[yycount++] = yytname[yyx];
- yysize1 = yysize + yytnamerr (0, yytname[yyx]);
- if (! (yysize <= yysize1
- && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
- return 2;
- yysize = yysize1;
+ {
+ YYSIZE_T yysize1 = yysize + yytnamerr (YY_NULL, yytname[yyx]);
+ if (! (yysize <= yysize1
+ && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
+ return 2;
+ yysize = yysize1;
+ }
}
}
}
# undef YYCASE_
}
- yysize1 = yysize + yystrlen (yyformat);
- if (! (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
- return 2;
- yysize = yysize1;
+ {
+ YYSIZE_T yysize1 = yysize + yystrlen (yyformat);
+ if (! (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM))
+ return 2;
+ yysize = yysize1;
+ }
if (*yymsg_alloc < yysize)
{
yymsg = "Deleting";
YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp);
- switch (yytype)
- {
-
- default:
- break;
- }
+ YYUSE (yytype);
}
-/* Prevent warnings from -Wmissing-prototypes. */
-#ifdef YYPARSE_PARAM
-#if defined __STDC__ || defined __cplusplus
-int yyparse (void *YYPARSE_PARAM);
-#else
-int yyparse ();
-#endif
-#else /* ! YYPARSE_PARAM */
-#if defined __STDC__ || defined __cplusplus
-int yyparse (void);
-#else
-int yyparse ();
-#endif
-#endif /* ! YYPARSE_PARAM */
/* The lookahead symbol. */
int yychar;
+
+#ifndef YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
+# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
+# define YY_IGNORE_MAYBE_UNINITIALIZED_END
+#endif
+#ifndef YY_INITIAL_VALUE
+# define YY_INITIAL_VALUE(Value) /* Nothing. */
+#endif
+
/* The semantic value of the lookahead symbol. */
-YYSTYPE yylval;
+YYSTYPE yylval YY_INITIAL_VALUE(yyval_default);
/* Number of syntax errors so far. */
int yynerrs;
`yyss': related to states.
`yyvs': related to semantic values.
- Refer to the stacks thru separate pointers, to allow yyoverflow
+ Refer to the stacks through separate pointers, to allow yyoverflow
to reallocate them elsewhere. */
/* The state stack. */
int yyn;
int yyresult;
/* Lookahead token as an internal (translated) token number. */
- int yytoken;
+ int yytoken = 0;
/* The variables used to return semantic value and location from the
action routines. */
YYSTYPE yyval;
Keep to zero when no symbol should be popped. */
int yylen = 0;
- yytoken = 0;
- yyss = yyssa;
- yyvs = yyvsa;
+ yyssp = yyss = yyssa;
+ yyvsp = yyvs = yyvsa;
yystacksize = YYINITDEPTH;
YYDPRINTF ((stderr, "Starting parse\n"));
yyerrstatus = 0;
yynerrs = 0;
yychar = YYEMPTY; /* Cause a token to be read. */
-
- /* Initialize stack pointers.
- Waste one element of value and location stack
- so that they stay on the same level as the state stack.
- The wasted elements are never initialized. */
- yyssp = yyss;
- yyvsp = yyvs;
-
goto yysetstate;
/*------------------------------------------------------------.
yychar = YYEMPTY;
yystate = yyn;
+ YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
*++yyvsp = yylval;
+ YY_IGNORE_MAYBE_UNINITIALIZED_END
goto yynewstate;
switch (yyn)
{
case 6:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 173 "calc.y"
{ sp = stack[0]; yyerrok; }
break;
case 8:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 177 "calc.y"
{
mpz_out_str (stdout, obase, sp); putchar ('\n');
break;
case 9:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 182 "calc.y"
{
CHECK_VARIABLE ((yyvsp[(1) - (3)].var));
break;
case 10:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 188 "calc.y"
{ calc_help (); }
break;
case 11:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 189 "calc.y"
{ ibase = 16; obase = -16; }
break;
case 12:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 190 "calc.y"
{ ibase = 0; obase = 10; }
break;
case 13:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 191 "calc.y"
{ exit (0); }
break;
case 15:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 198 "calc.y"
{ sp--; mpz_add (sp, sp, sp+1); }
break;
case 16:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 199 "calc.y"
{ sp--; mpz_sub (sp, sp, sp+1); }
break;
case 17:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 200 "calc.y"
{ sp--; mpz_mul (sp, sp, sp+1); }
break;
case 18:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 201 "calc.y"
{ sp--; mpz_fdiv_q (sp, sp, sp+1); }
break;
case 19:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 202 "calc.y"
{ sp--; mpz_fdiv_r (sp, sp, sp+1); }
break;
case 20:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 203 "calc.y"
{ CHECK_UI ("Exponent", sp);
sp--; mpz_pow_ui (sp, sp, mpz_get_ui (sp+1)); }
break;
case 21:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 205 "calc.y"
{ CHECK_UI ("Shift count", sp);
sp--; mpz_mul_2exp (sp, sp, mpz_get_ui (sp+1)); }
break;
case 22:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 207 "calc.y"
{ CHECK_UI ("Shift count", sp);
sp--; mpz_fdiv_q_2exp (sp, sp, mpz_get_ui (sp+1)); }
break;
case 23:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 209 "calc.y"
{ CHECK_UI ("Factorial", sp);
mpz_fac_ui (sp, mpz_get_ui (sp)); }
break;
case 24:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 211 "calc.y"
{ mpz_neg (sp, sp); }
break;
case 25:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 213 "calc.y"
{ sp--; mpz_set_ui (sp, mpz_cmp (sp, sp+1) < 0); }
break;
case 26:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 214 "calc.y"
{ sp--; mpz_set_ui (sp, mpz_cmp (sp, sp+1) <= 0); }
break;
case 27:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 215 "calc.y"
{ sp--; mpz_set_ui (sp, mpz_cmp (sp, sp+1) == 0); }
break;
case 28:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 216 "calc.y"
{ sp--; mpz_set_ui (sp, mpz_cmp (sp, sp+1) != 0); }
break;
case 29:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 217 "calc.y"
{ sp--; mpz_set_ui (sp, mpz_cmp (sp, sp+1) >= 0); }
break;
case 30:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 218 "calc.y"
{ sp--; mpz_set_ui (sp, mpz_cmp (sp, sp+1) > 0); }
break;
case 31:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 220 "calc.y"
{ sp--; mpz_set_ui (sp, mpz_sgn (sp) && mpz_sgn (sp+1)); }
break;
case 32:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 221 "calc.y"
{ sp--; mpz_set_ui (sp, mpz_sgn (sp) || mpz_sgn (sp+1)); }
break;
case 33:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 223 "calc.y"
{ mpz_abs (sp, sp); }
break;
case 34:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 224 "calc.y"
{ sp--; CHECK_UI ("Binomial base", sp+1);
mpz_bin_ui (sp, sp, mpz_get_ui (sp+1)); }
break;
case 35:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 226 "calc.y"
{ CHECK_UI ("Fibonacci", sp);
mpz_fib_ui (sp, mpz_get_ui (sp)); }
break;
case 37:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 229 "calc.y"
{ sp--; mpz_set_si (sp,
mpz_kronecker (sp, sp+1)); }
break;
case 39:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 232 "calc.y"
{ CHECK_UI ("Lucas number", sp);
mpz_lucnum_ui (sp, mpz_get_ui (sp)); }
break;
case 40:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 234 "calc.y"
{ mpz_nextprime (sp, sp); }
break;
case 41:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 235 "calc.y"
{ sp -= 2; mpz_powm (sp, sp, sp+1, sp+2); }
break;
case 42:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 236 "calc.y"
{ sp--; CHECK_UI ("Nth-root", sp+1);
mpz_root (sp, sp, mpz_get_ui (sp+1)); }
break;
case 43:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 238 "calc.y"
{ mpz_sqrt (sp, sp); }
break;
case 44:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 240 "calc.y"
{
sp++;
break;
case 45:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 246 "calc.y"
{
sp++;
break;
case 47:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 258 "calc.y"
{ sp--; mpz_gcd (sp, sp, sp+1); }
break;
case 49:
-
-/* Line 1806 of yacc.c */
+/* Line 1787 of yacc.c */
#line 262 "calc.y"
{ sp--; mpz_lcm (sp, sp, sp+1); }
break;
-
-/* Line 1806 of yacc.c */
-#line 2022 "calc.c"
+/* Line 1787 of yacc.c */
+#line 1968 "calc.c"
default: break;
}
/* User semantic actions sometimes alter yychar, and that requires
YY_STACK_PRINT (yyss, yyssp);
}
+ YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
*++yyvsp = yylval;
+ YY_IGNORE_MAYBE_UNINITIALIZED_END
/* Shift the error token. */
yyresult = 1;
goto yyreturn;
-#if !defined(yyoverflow) || YYERROR_VERBOSE
+#if !defined yyoverflow || YYERROR_VERBOSE
/*-------------------------------------------------.
| yyexhaustedlab -- memory exhaustion comes here. |
`-------------------------------------------------*/
}
-
-/* Line 2067 of yacc.c */
+/* Line 2050 of yacc.c */
#line 264 "calc.y"
return yyparse ();
}
-
-/* A Bison parser, made by GNU Bison 2.5. */
+/* A Bison parser, made by GNU Bison 2.7.12-4996. */
/* Bison interface for Yacc-like parsers in C
- Copyright (C) 1984, 1989-1990, 2000-2011 Free Software Foundation, Inc.
+ Copyright (C) 1984, 1989-1990, 2000-2013 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
This special exception was added by the Free Software Foundation in
version 2.2 of Bison. */
+#ifndef YY_YY_CALC_H_INCLUDED
+# define YY_YY_CALC_H_INCLUDED
+/* Enabling traces. */
+#ifndef YYDEBUG
+# define YYDEBUG 0
+#endif
+#if YYDEBUG
+extern int yydebug;
+#endif
/* Tokens. */
#ifndef YYTOKENTYPE
-
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
typedef union YYSTYPE
{
-
-/* Line 2068 of yacc.c */
+/* Line 2053 of yacc.c */
#line 142 "calc.y"
char *str;
int var;
-
-/* Line 2068 of yacc.c */
-#line 117 "calc.h"
+/* Line 2053 of yacc.c */
+#line 123 "calc.h"
} YYSTYPE;
# define YYSTYPE_IS_TRIVIAL 1
# define yystype YYSTYPE /* obsolescent; will be withdrawn */
extern YYSTYPE yylval;
+#ifdef YYPARSE_PARAM
+#if defined __STDC__ || defined __cplusplus
+int yyparse (void *YYPARSE_PARAM);
+#else
+int yyparse ();
+#endif
+#else /* ! YYPARSE_PARAM */
+#if defined __STDC__ || defined __cplusplus
+int yyparse (void);
+#else
+int yyparse ();
+#endif
+#endif /* ! YYPARSE_PARAM */
+#endif /* !YY_YY_CALC_H_INCLUDED */
#define FLEX_SCANNER
#define YY_FLEX_MAJOR_VERSION 2
#define YY_FLEX_MINOR_VERSION 5
-#define YY_FLEX_SUBMINOR_VERSION 35
+#define YY_FLEX_SUBMINOR_VERSION 37
#if YY_FLEX_SUBMINOR_VERSION > 0
#define FLEX_BETA
#endif
typedef unsigned char flex_uint8_t;
typedef unsigned short int flex_uint16_t;
typedef unsigned int flex_uint32_t;
-#endif /* ! C99 */
/* Limits of integral types. */
#ifndef INT8_MIN
#define UINT32_MAX (4294967295U)
#endif
+#endif /* ! C99 */
+
#endif /* ! FLEXINT_H */
#ifdef __cplusplus
typedef struct yy_buffer_state *YY_BUFFER_STATE;
#endif
-extern int yyleng;
+#ifndef YY_TYPEDEF_YY_SIZE_T
+#define YY_TYPEDEF_YY_SIZE_T
+typedef size_t yy_size_t;
+#endif
+
+extern yy_size_t yyleng;
extern FILE *yyin, *yyout;
#define unput(c) yyunput( c, (yytext_ptr) )
-#ifndef YY_TYPEDEF_YY_SIZE_T
-#define YY_TYPEDEF_YY_SIZE_T
-typedef size_t yy_size_t;
-#endif
-
#ifndef YY_STRUCT_YY_BUFFER_STATE
#define YY_STRUCT_YY_BUFFER_STATE
struct yy_buffer_state
/* Number of characters read into yy_ch_buf, not including EOB
* characters.
*/
- int yy_n_chars;
+ yy_size_t yy_n_chars;
/* Whether we "own" the buffer - i.e., we know we created it,
* and can realloc() it to grow it, and should free() it to
/* yy_hold_char holds the character lost when yytext is formed. */
static char yy_hold_char;
-static int yy_n_chars; /* number of characters read into yy_ch_buf */
-int yyleng;
+static yy_size_t yy_n_chars; /* number of characters read into yy_ch_buf */
+yy_size_t yyleng;
/* Points to current character in buffer. */
static char *yy_c_buf_p = (char *) 0;
YY_BUFFER_STATE yy_scan_buffer (char *base,yy_size_t size );
YY_BUFFER_STATE yy_scan_string (yyconst char *yy_str );
-YY_BUFFER_STATE yy_scan_bytes (yyconst char *bytes,int len );
+YY_BUFFER_STATE yy_scan_bytes (yyconst char *bytes,yy_size_t len );
void *yyalloc (yy_size_t );
void *yyrealloc (void *,yy_size_t );
{ "sqrt", SQRT },
{ NULL }
};
-#line 526 "calclex.c"
+#line 527 "calclex.c"
#define INITIAL 0
void yyset_out (FILE * out_str );
-int yyget_leng (void );
+yy_size_t yyget_leng (void );
char *yyget_text (void );
/* This used to be an fputs(), but since the string might contain NUL's,
* we now use fwrite().
*/
-#define ECHO fwrite( yytext, yyleng, 1, yyout )
+#define ECHO do { if (fwrite( yytext, yyleng, 1, yyout )) {} } while (0)
#endif
/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL,
if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
{ \
int c = '*'; \
- int n; \
+ size_t n; \
for ( n = 0; n < max_size && \
(c = getc( yyin )) != EOF && c != '\n'; ++n ) \
buf[n] = (char) c; \
#line 57 "calclex.l"
-#line 711 "calclex.c"
+#line 712 "calclex.c"
if ( !(yy_init) )
{
#line 107 "calclex.l"
ECHO;
YY_BREAK
-#line 914 "calclex.c"
+#line 915 "calclex.c"
case YY_STATE_EOF(INITIAL):
yyterminate();
else
{
- int num_to_read =
+ yy_size_t num_to_read =
YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1;
while ( num_to_read <= 0 )
{ /* Not enough room in the buffer - grow it. */
/* just a shorter name for the current buffer */
- YY_BUFFER_STATE b = YY_CURRENT_BUFFER;
+ YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE;
int yy_c_buf_p_offset =
(int) ((yy_c_buf_p) - b->yy_ch_buf);
if ( b->yy_is_our_buffer )
{
- int new_size = b->yy_buf_size * 2;
+ yy_size_t new_size = b->yy_buf_size * 2;
if ( new_size <= 0 )
b->yy_buf_size += b->yy_buf_size / 8;
/* Read in more data. */
YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]),
- (yy_n_chars), (size_t) num_to_read );
+ (yy_n_chars), num_to_read );
YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
}
yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c];
yy_is_jam = (yy_current_state == 38);
- return yy_is_jam ? 0 : yy_current_state;
+ return yy_is_jam ? 0 : yy_current_state;
}
static void yyunput (int c, register char * yy_bp )
if ( yy_cp < YY_CURRENT_BUFFER_LVALUE->yy_ch_buf + 2 )
{ /* need to shift things up to make room */
/* +2 for EOB chars. */
- register int number_to_move = (yy_n_chars) + 2;
+ register yy_size_t number_to_move = (yy_n_chars) + 2;
register char *dest = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[
YY_CURRENT_BUFFER_LVALUE->yy_buf_size + 2];
register char *source =
else
{ /* need more input */
- int offset = (yy_c_buf_p) - (yytext_ptr);
+ yy_size_t offset = (yy_c_buf_p) - (yytext_ptr);
++(yy_c_buf_p);
switch ( yy_get_next_buffer( ) )
yyfree((void *) b );
}
-#ifndef __cplusplus
-extern int isatty (int );
-#endif /* __cplusplus */
-
/* Initializes or reinitializes a buffer.
* This function is sometimes called more than once on the same buffer,
* such as during a yyrestart() or at EOF.
*/
static void yyensure_buffer_stack (void)
{
- int num_to_alloc;
+ yy_size_t num_to_alloc;
if (!(yy_buffer_stack)) {
/** Setup the input buffer state to scan the given bytes. The next call to yylex() will
* scan from a @e copy of @a bytes.
- * @param bytes the byte buffer to scan
- * @param len the number of bytes in the buffer pointed to by @a bytes.
+ * @param yybytes the byte buffer to scan
+ * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes.
*
* @return the newly allocated buffer state object.
*/
-YY_BUFFER_STATE yy_scan_bytes (yyconst char * yybytes, int _yybytes_len )
+YY_BUFFER_STATE yy_scan_bytes (yyconst char * yybytes, yy_size_t _yybytes_len )
{
YY_BUFFER_STATE b;
char *buf;
/** Get the length of the current token.
*
*/
-int yyget_leng (void)
+yy_size_t yyget_leng (void)
{
return yyleng;
}
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# You should have received a copy of the GNU Lesser General Public License
# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
VPATH = @srcdir@
+am__make_dryrun = \
+ { \
+ am__dry=no; \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
+ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+ *) \
+ for am__flg in $$MAKEFLAGS; do \
+ case $$am__flg in \
+ *=*|--*) ;; \
+ *n*) am__dry=yes; break;; \
+ esac; \
+ done;; \
+ esac; \
+ test $$am__dry = yes; \
+ }
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
EXTRA_PROGRAMS = run-expr$(EXEEXT) t-expr$(EXEEXT)
subdir = demos/expr
DIST_COMMON = README $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/configure.in
+ $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_CLEAN_VPATH_FILES =
libexpr_a_AR = $(AR) $(ARFLAGS)
libexpr_a_LIBADD =
-am_libexpr_a_OBJECTS = expr$U.$(OBJEXT) exprv$U.$(OBJEXT) \
- exprz$U.$(OBJEXT) exprza$U.$(OBJEXT) exprq$U.$(OBJEXT) \
- exprqa$U.$(OBJEXT) exprf$U.$(OBJEXT) exprfa$U.$(OBJEXT)
+am_libexpr_a_OBJECTS = expr.$(OBJEXT) exprv.$(OBJEXT) exprz.$(OBJEXT) \
+ exprza.$(OBJEXT) exprq.$(OBJEXT) exprqa.$(OBJEXT) \
+ exprf.$(OBJEXT) exprfa.$(OBJEXT)
libexpr_a_OBJECTS = $(am_libexpr_a_OBJECTS)
run_expr_SOURCES = run-expr.c
-run_expr_OBJECTS = run-expr$U.$(OBJEXT)
+run_expr_OBJECTS = run-expr.$(OBJEXT)
run_expr_LDADD = $(LDADD)
run_expr_DEPENDENCIES = libexpr.a $(top_builddir)/libgmp.la
t_expr_SOURCES = t-expr.c
-t_expr_OBJECTS = t-expr$U.$(OBJEXT)
+t_expr_OBJECTS = t-expr.$(OBJEXT)
t_expr_DEPENDENCIES = $(top_builddir)/tests/libtests.la $(LDADD)
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
depcomp =
$(LDFLAGS) -o $@
SOURCES = $(libexpr_a_SOURCES) run-expr.c t-expr.c
DIST_SOURCES = $(libexpr_a_SOURCES) run-expr.c t-expr.c
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
STRIP = @STRIP@
TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
U_FOR_BUILD = @U_FOR_BUILD@
VERSION = @VERSION@
WITH_READLINE_01 = @WITH_READLINE_01@
mkdir_p = @mkdir_p@
mpn_objects = @mpn_objects@
mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
-libexpr.a: $(libexpr_a_OBJECTS) $(libexpr_a_DEPENDENCIES)
+libexpr.a: $(libexpr_a_OBJECTS) $(libexpr_a_DEPENDENCIES) $(EXTRA_libexpr_a_DEPENDENCIES)
-rm -f libexpr.a
$(libexpr_a_AR) libexpr.a $(libexpr_a_OBJECTS) $(libexpr_a_LIBADD)
$(RANLIB) libexpr.a
-run-expr$(EXEEXT): $(run_expr_OBJECTS) $(run_expr_DEPENDENCIES)
+run-expr$(EXEEXT): $(run_expr_OBJECTS) $(run_expr_DEPENDENCIES) $(EXTRA_run_expr_DEPENDENCIES)
@rm -f run-expr$(EXEEXT)
$(LINK) $(run_expr_OBJECTS) $(run_expr_LDADD) $(LIBS)
-t-expr$(EXEEXT): $(t_expr_OBJECTS) $(t_expr_DEPENDENCIES)
+t-expr$(EXEEXT): $(t_expr_OBJECTS) $(t_expr_DEPENDENCIES) $(EXTRA_t_expr_DEPENDENCIES)
@rm -f t-expr$(EXEEXT)
$(LINK) $(t_expr_OBJECTS) $(t_expr_LDADD) $(LIBS)
distclean-compile:
-rm -f *.tab.c
-$(top_builddir)/ansi2knr:
- $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
- -test "$U" = "" || rm -f *_.c
.c.o:
$(COMPILE) -c $<
.c.lo:
$(LTCOMPILE) -c -o $@ $<
-expr_.c: expr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/expr.c; then echo $(srcdir)/expr.c; else echo expr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-exprf_.c: exprf.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/exprf.c; then echo $(srcdir)/exprf.c; else echo exprf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-exprfa_.c: exprfa.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/exprfa.c; then echo $(srcdir)/exprfa.c; else echo exprfa.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-exprq_.c: exprq.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/exprq.c; then echo $(srcdir)/exprq.c; else echo exprq.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-exprqa_.c: exprqa.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/exprqa.c; then echo $(srcdir)/exprqa.c; else echo exprqa.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-exprv_.c: exprv.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/exprv.c; then echo $(srcdir)/exprv.c; else echo exprv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-exprz_.c: exprz.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/exprz.c; then echo $(srcdir)/exprz.c; else echo exprz.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-exprza_.c: exprza.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/exprza.c; then echo $(srcdir)/exprza.c; else echo exprza.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-run-expr_.c: run-expr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/run-expr.c; then echo $(srcdir)/run-expr.c; else echo run-expr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-expr_.c: t-expr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-expr.c; then echo $(srcdir)/t-expr.c; else echo t-expr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-expr_.$(OBJEXT) expr_.lo exprf_.$(OBJEXT) exprf_.lo exprfa_.$(OBJEXT) \
-exprfa_.lo exprq_.$(OBJEXT) exprq_.lo exprqa_.$(OBJEXT) exprqa_.lo \
-exprv_.$(OBJEXT) exprv_.lo exprz_.$(OBJEXT) exprz_.lo \
-exprza_.$(OBJEXT) exprza_.lo run-expr_.$(OBJEXT) run-expr_.lo \
-t-expr_.$(OBJEXT) t-expr_.lo : $(ANSI2KNR)
mostlyclean-libtool:
-rm -f *.lo
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
mostlyclean: mostlyclean-am
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
uninstall-am:
-.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+.MAKE: install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
clean-libtool ctags distclean distclean-compile \
install-pdf-am install-ps install-ps-am install-strip \
installcheck installcheck-am installdirs maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
- mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
- pdf-am ps ps-am tags uninstall uninstall-am
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags uninstall uninstall-am
allprogs: $(EXTRA_PROGRAMS)
};
typedef union mpX_t *mpX_ptr;
-typedef __gmp_const union mpX_t *mpX_srcptr;
-
-typedef void (*mpexpr_fun_one_t) __GMP_PROTO ((mpX_ptr));
-typedef unsigned long (*mpexpr_fun_ui_one_t) __GMP_PROTO ((mpX_ptr));
-
-typedef void (*mpexpr_fun_0ary_t) __GMP_PROTO ((mpX_ptr));
-typedef int (*mpexpr_fun_i_0ary_t) __GMP_PROTO ((void));
-
-typedef void (*mpexpr_fun_unary_t) __GMP_PROTO ((mpX_ptr, mpX_srcptr));
-typedef void (*mpexpr_fun_unary_ui_t) __GMP_PROTO ((mpX_ptr, unsigned long));
-typedef int (*mpexpr_fun_i_unary_t) __GMP_PROTO ((mpX_srcptr));
-typedef int (*mpexpr_fun_i_unary_ui_t) __GMP_PROTO ((unsigned long));
-
-typedef void (*mpexpr_fun_binary_t) __GMP_PROTO ((mpX_ptr, mpX_srcptr, mpX_srcptr));
-typedef void (*mpexpr_fun_binary_ui_t) __GMP_PROTO ((mpX_ptr, mpX_srcptr, unsigned long));
-typedef int (*mpexpr_fun_i_binary_t) __GMP_PROTO ((mpX_srcptr, mpX_srcptr));
-typedef int (*mpexpr_fun_i_binary_ui_t) __GMP_PROTO ((mpX_srcptr, unsigned long));
-
-typedef void (*mpexpr_fun_ternary_t)
- __GMP_PROTO ((mpX_ptr, mpX_srcptr, mpX_srcptr, mpX_srcptr));
-typedef void (*mpexpr_fun_ternary_ui_t)
- __GMP_PROTO ((mpX_ptr, mpX_srcptr, mpX_srcptr, unsigned long));
-typedef int (*mpexpr_fun_i_ternary_t)
- __GMP_PROTO ((mpX_srcptr, mpX_srcptr, mpX_srcptr));
-typedef int (*mpexpr_fun_i_ternary_ui_t)
- __GMP_PROTO ((mpX_srcptr, mpX_srcptr, unsigned long));
-
-typedef size_t (*mpexpr_fun_number_t)
- __GMP_PROTO ((mpX_ptr, __gmp_const char *str, size_t len, int base));
-typedef void (*mpexpr_fun_swap_t) __GMP_PROTO ((mpX_ptr, mpX_ptr));
-typedef unsigned long (*mpexpr_fun_get_ui_t) __GMP_PROTO ((mpX_srcptr));
-typedef void (*mpexpr_fun_set_si_t) __GMP_PROTO ((mpX_srcptr, long));
+typedef const union mpX_t *mpX_srcptr;
+
+typedef void (*mpexpr_fun_one_t) (mpX_ptr);
+typedef unsigned long (*mpexpr_fun_ui_one_t) (mpX_ptr);
+
+typedef void (*mpexpr_fun_0ary_t) (mpX_ptr);
+typedef int (*mpexpr_fun_i_0ary_t) (void);
+
+typedef void (*mpexpr_fun_unary_t) (mpX_ptr, mpX_srcptr);
+typedef void (*mpexpr_fun_unary_ui_t) (mpX_ptr, unsigned long);
+typedef int (*mpexpr_fun_i_unary_t) (mpX_srcptr);
+typedef int (*mpexpr_fun_i_unary_ui_t) (unsigned long);
+
+typedef void (*mpexpr_fun_binary_t) (mpX_ptr, mpX_srcptr, mpX_srcptr);
+typedef void (*mpexpr_fun_binary_ui_t) (mpX_ptr, mpX_srcptr, unsigned long);
+typedef int (*mpexpr_fun_i_binary_t) (mpX_srcptr, mpX_srcptr);
+typedef int (*mpexpr_fun_i_binary_ui_t) (mpX_srcptr, unsigned long);
+
+typedef void (*mpexpr_fun_ternary_t) (mpX_ptr, mpX_srcptr, mpX_srcptr, mpX_srcptr);
+typedef void (*mpexpr_fun_ternary_ui_t) (mpX_ptr, mpX_srcptr, mpX_srcptr, unsigned long);
+typedef int (*mpexpr_fun_i_ternary_t) (mpX_srcptr, mpX_srcptr, mpX_srcptr);
+typedef int (*mpexpr_fun_i_ternary_ui_t) (mpX_srcptr, mpX_srcptr, unsigned long);
+
+typedef size_t (*mpexpr_fun_number_t) (mpX_ptr, const char *str, size_t len, int base);
+typedef void (*mpexpr_fun_swap_t) (mpX_ptr, mpX_ptr);
+typedef unsigned long (*mpexpr_fun_get_ui_t) (mpX_srcptr);
+typedef void (*mpexpr_fun_set_si_t) (mpX_srcptr, long);
struct mpexpr_control_t {
- __gmp_const struct mpexpr_operator_t *op;
- int argcount;
+ const struct mpexpr_operator_t *op;
+ int argcount;
};
#define MPEXPR_VARIABLES 26
struct mpexpr_parse_t {
- __gmp_const struct mpexpr_operator_t *table;
-
- mpX_ptr res;
- int base;
- unsigned long prec;
- __gmp_const char *e;
- size_t elen;
- mpX_srcptr *var;
- int error_code;
-
- int token;
- __gmp_const struct mpexpr_operator_t *token_op;
-
- union mpX_t *data_stack;
- int data_top;
- int data_alloc;
- int data_inited;
-
- struct mpexpr_control_t *control_stack;
- int control_top;
- int control_alloc;
-
-
- mpexpr_fun_0ary_t mpX_clear;
- mpexpr_fun_i_unary_t mpX_ulong_p;
- mpexpr_fun_get_ui_t mpX_get_ui;
- mpexpr_fun_unary_ui_t mpX_init;
- mpexpr_fun_number_t mpX_number;
- mpexpr_fun_unary_t mpX_set;
- mpexpr_fun_unary_t mpX_set_or_swap;
- mpexpr_fun_set_si_t mpX_set_si;
- mpexpr_fun_swap_t mpX_swap;
+ const struct mpexpr_operator_t *table;
+
+ mpX_ptr res;
+ int base;
+ unsigned long prec;
+ const char *e;
+ size_t elen;
+ mpX_srcptr *var;
+ int error_code;
+
+ int token;
+ const struct mpexpr_operator_t *token_op;
+
+ union mpX_t *data_stack;
+ int data_top;
+ int data_alloc;
+ int data_inited;
+
+ struct mpexpr_control_t *control_stack;
+ int control_top;
+ int control_alloc;
+
+ mpexpr_fun_0ary_t mpX_clear;
+ mpexpr_fun_i_unary_t mpX_ulong_p;
+ mpexpr_fun_get_ui_t mpX_get_ui;
+ mpexpr_fun_unary_ui_t mpX_init;
+ mpexpr_fun_number_t mpX_number;
+ mpexpr_fun_unary_t mpX_set;
+ mpexpr_fun_unary_t mpX_set_or_swap;
+ mpexpr_fun_set_si_t mpX_set_si;
+ mpexpr_fun_swap_t mpX_swap;
};
-int mpexpr_evaluate __GMP_PROTO ((struct mpexpr_parse_t *p));
-int mpexpr_va_to_var __GMP_PROTO ((void *var[], va_list ap));
-size_t mpexpr_mpz_number __GMP_PROTO ((mpz_ptr res,
- __gmp_const char *e, size_t elen, int base));
+int mpexpr_evaluate (struct mpexpr_parse_t *p);
+int mpexpr_va_to_var (void *var[], va_list ap);
+size_t mpexpr_mpz_number (mpz_ptr res, const char *e, size_t elen, int base);
static int
lookahead (struct mpexpr_parse_t *p, int prefix)
{
- __gmp_const struct mpexpr_operator_t *op, *op_found;
+ const struct mpexpr_operator_t *op, *op_found;
size_t oplen, oplen_found, wlen;
int i;
a reference through CP. */
#define CONTROL_PUSH(opptr,args) \
do { \
- __gmp_const struct mpexpr_operator_t *op = opptr; \
+ const struct mpexpr_operator_t *op = opptr; \
struct mpexpr_control_t *cp; \
CONTROL_SPACE (); \
p->control_top++; \
/* "done" is a special sentinel at the bottom of the control stack,
precedence -1 is lower than any normal operator. */
{
- static __gmp_const struct mpexpr_operator_t operator_done
+ static const struct mpexpr_operator_t operator_done
= { "DONE", NULL, MPEXPR_TYPE_DONE, -1 };
p->control_alloc = 20;
#define MPEXPR_TYPE_OPERATOR 0x2000
-typedef void (*mpexpr_fun_t) __GMP_PROTO ((void));
+typedef void (*mpexpr_fun_t) (void);
struct mpexpr_operator_t {
- __gmp_const char *name;
- mpexpr_fun_t fun;
- int type;
- int precedence;
+ const char *name;
+ mpexpr_fun_t fun;
+ int type;
+ int precedence;
};
-int mpf_expr_a __GMP_PROTO ((__gmp_const struct mpexpr_operator_t *table,
- mpf_ptr res, int base, unsigned long prec,
- __gmp_const char *e, size_t elen,
- mpf_srcptr var[26]));
-int mpf_expr __GMP_PROTO ((mpf_ptr res, int base, __gmp_const char *e, ...));
+int mpf_expr_a (const struct mpexpr_operator_t *, mpf_ptr, int,
+ unsigned long, const char *, size_t, mpf_srcptr [26]);
+int mpf_expr (mpf_ptr, int, const char *, ...);
-int mpq_expr_a __GMP_PROTO ((__gmp_const struct mpexpr_operator_t *table,
- mpq_ptr res, int base,
- __gmp_const char *e, size_t elen,
- mpq_srcptr var[26]));
-int mpq_expr __GMP_PROTO ((mpq_ptr res, int base, __gmp_const char *e, ...));
+int mpq_expr_a (const struct mpexpr_operator_t *, mpq_ptr,
+ int, const char *, size_t, mpq_srcptr [26]);
+int mpq_expr (mpq_ptr, int, const char *, ...);
-int mpz_expr_a __GMP_PROTO ((__gmp_const struct mpexpr_operator_t *table,
- mpz_ptr res, int base,
- __gmp_const char *e, size_t elen,
- mpz_srcptr var[26]));
-int mpz_expr __GMP_PROTO ((mpz_ptr res, int base, __gmp_const char *e, ...));
+int mpz_expr_a (const struct mpexpr_operator_t *, mpz_ptr, int,
+ const char *, size_t, mpz_srcptr [26]);
+int mpz_expr (mpz_ptr, int, const char *, ...);
#endif
}
-static __gmp_const struct mpexpr_operator_t _mpf_expr_standard_table[] = {
+static const struct mpexpr_operator_t _mpf_expr_standard_table[] = {
{ "**", (mpexpr_fun_t) mpf_pow_ui,
MPEXPR_TYPE_BINARY_UI | MPEXPR_TYPE_RIGHTASSOC, 220 },
{ NULL }
};
-__gmp_const struct mpexpr_operator_t * __gmp_const mpf_expr_standard_table
+const struct mpexpr_operator_t * const mpf_expr_standard_table
= _mpf_expr_standard_table;
int
#if HAVE_STDARG
-mpf_expr (mpf_ptr res, int base, __gmp_const char *e, ...)
+mpf_expr (mpf_ptr res, int base, const char *e, ...)
#else
mpf_expr (va_alist)
va_dcl
#if HAVE_STDARG
va_start (ap, e);
#else
- mpf_ptr res;
- int base;
- __gmp_const char *e;
+ mpf_ptr res;
+ int base;
+ const char *e;
va_start (ap);
res = va_arg (ap, mpf_ptr);
base = va_arg (ap, int);
- e = va_arg (ap, __gmp_const char *);
+ e = va_arg (ap, const char *);
#endif
TRACE (printf ("mpf_expr(): base %d, %s\n", base, e));
static size_t
-e_mpf_number (mpf_ptr res, __gmp_const char *e, size_t elen, int base)
+e_mpf_number (mpf_ptr res, const char *e, size_t elen, int base)
{
char *edup;
size_t i, ret, extra=0;
int
-mpf_expr_a (__gmp_const struct mpexpr_operator_t *table,
+mpf_expr_a (const struct mpexpr_operator_t *table,
mpf_ptr res, int base, unsigned long prec,
- __gmp_const char *e, size_t elen,
+ const char *e, size_t elen,
mpf_srcptr var[26])
{
struct mpexpr_parse_t p;
}
-static __gmp_const struct mpexpr_operator_t _mpq_expr_standard_table[] = {
+static const struct mpexpr_operator_t _mpq_expr_standard_table[] = {
{ "**", (mpexpr_fun_t) e_mpq_pow_ui,
MPEXPR_TYPE_BINARY_UI | MPEXPR_TYPE_RIGHTASSOC, 220 },
{ NULL }
};
-__gmp_const struct mpexpr_operator_t * __gmp_const mpq_expr_standard_table
+const struct mpexpr_operator_t * const mpq_expr_standard_table
= _mpq_expr_standard_table;
int
#if HAVE_STDARG
-mpq_expr (mpq_ptr res, int base, __gmp_const char *e, ...)
+mpq_expr (mpq_ptr res, int base, const char *e, ...)
#else
mpq_expr (va_alist)
va_dcl
#if HAVE_STDARG
va_start (ap, e);
#else
- mpq_ptr res;
- int base;
- __gmp_const char *e;
+ mpq_ptr res;
+ int base;
+ const char *e;
va_start (ap);
res = va_arg (ap, mpq_ptr);
base = va_arg (ap, int);
- e = va_arg (ap, __gmp_const char *);
+ e = va_arg (ap, const char *);
#endif
TRACE (printf ("mpq_expr(): base %d, %s\n", base, e));
/* The same as mpz, but putting the result in the numerator. Negatives and
fractions aren't parsed here because '-' and '/' are operators. */
static size_t
-e_mpq_number (mpq_ptr res, __gmp_const char *e, size_t elen, int base)
+e_mpq_number (mpq_ptr res, const char *e, size_t elen, int base)
{
mpz_set_ui (mpq_denref (res), 1L);
return mpexpr_mpz_number (mpq_numref (res), e, elen, base);
}
int
-mpq_expr_a (__gmp_const struct mpexpr_operator_t *table,
+mpq_expr_a (const struct mpexpr_operator_t *table,
mpq_ptr res, int base,
- __gmp_const char *e, size_t elen,
+ const char *e, size_t elen,
mpq_srcptr var[26])
{
struct mpexpr_parse_t p;
mpz_clrbit (w, n);
}
-static __gmp_const struct mpexpr_operator_t _mpz_expr_standard_table[] = {
+static const struct mpexpr_operator_t _mpz_expr_standard_table[] = {
{ "**", (mpexpr_fun_t) mpz_pow_ui,
MPEXPR_TYPE_BINARY_UI | MPEXPR_TYPE_RIGHTASSOC, 220 },
/* The table is available globally only through a pointer, so the table size
can change without breaking binary compatibility. */
-__gmp_const struct mpexpr_operator_t * __gmp_const mpz_expr_standard_table
+const struct mpexpr_operator_t * const mpz_expr_standard_table
= _mpz_expr_standard_table;
int
#if HAVE_STDARG
-mpz_expr (mpz_ptr res, int base, __gmp_const char *e, ...)
+mpz_expr (mpz_ptr res, int base, const char *e, ...)
#else
mpz_expr (va_alist)
va_dcl
#if HAVE_STDARG
va_start (ap, e);
#else
- mpz_ptr res;
- int base;
- __gmp_const char *e;
+ mpz_ptr res;
+ int base;
+ const char *e;
va_start (ap);
res = va_arg (ap, mpz_ptr);
base = va_arg (ap, int);
- e = va_arg (ap, __gmp_const char *);
+ e = va_arg (ap, const char *);
#endif
TRACE (printf ("mpz_expr(): base %d, %s\n", base, e));
/* No need to parse '-' since that's handled as an operator.
This function also by mpq_expr_a, so it's not static. */
size_t
-mpexpr_mpz_number (mpz_ptr res, __gmp_const char *e, size_t elen, int base)
+mpexpr_mpz_number (mpz_ptr res, const char *e, size_t elen, int base)
{
char *edup;
size_t i, ret;
}
int
-mpz_expr_a (__gmp_const struct mpexpr_operator_t *table,
+mpz_expr_a (const struct mpexpr_operator_t *table,
mpz_ptr res, int base,
- __gmp_const char *e, size_t elen,
+ const char *e, size_t elen,
mpz_srcptr var[26])
{
struct mpexpr_parse_t p;
/* Factoring with Pollard's rho method.
-Copyright 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2005, 2009
+Copyright 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2005, 2009, 2012
Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify it under
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
+#include <inttypes.h>
#include "gmp.h"
+static unsigned char primes_diff[] = {
+#define P(a,b,c) a,
+#include "primes.h"
+#undef P
+};
+#define PRIMES_PTAB_ENTRIES (sizeof(primes_diff) / sizeof(primes_diff[0]))
+
int flag_verbose = 0;
-static unsigned add[] = {4, 2, 4, 2, 4, 6, 2, 6};
+/* Prove primality or run probabilistic tests. */
+int flag_prove_primality = 1;
+
+/* Number of Miller-Rabin tests to run when not proving primality. */
+#define MR_REPS 25
+
+struct factors
+{
+ mpz_t *p;
+ unsigned long *e;
+ long nfactors;
+};
+
+void factor (mpz_t, struct factors *);
void
-factor_using_division (mpz_t t, unsigned int limit)
+factor_init (struct factors *factors)
{
- mpz_t q, r;
- unsigned long int f;
- int ai;
- unsigned *addv = add;
- unsigned int failures;
+ factors->p = malloc (1);
+ factors->e = malloc (1);
+ factors->nfactors = 0;
+}
- if (flag_verbose > 0)
+void
+factor_clear (struct factors *factors)
+{
+ int i;
+
+ for (i = 0; i < factors->nfactors; i++)
+ mpz_clear (factors->p[i]);
+
+ free (factors->p);
+ free (factors->e);
+}
+
+void
+factor_insert (struct factors *factors, mpz_t prime)
+{
+ long nfactors = factors->nfactors;
+ mpz_t *p = factors->p;
+ unsigned long *e = factors->e;
+ long i, j;
+
+ /* Locate position for insert new or increment e. */
+ for (i = nfactors - 1; i >= 0; i--)
{
- printf ("[trial division (%u)] ", limit);
- fflush (stdout);
+ if (mpz_cmp (p[i], prime) <= 0)
+ break;
}
- mpz_init (q);
- mpz_init (r);
+ if (i < 0 || mpz_cmp (p[i], prime) != 0)
+ {
+ p = realloc (p, (nfactors + 1) * sizeof p[0]);
+ e = realloc (e, (nfactors + 1) * sizeof e[0]);
+
+ mpz_init (p[nfactors]);
+ for (j = nfactors - 1; j > i; j--)
+ {
+ mpz_set (p[j + 1], p[j]);
+ e[j + 1] = e[j];
+ }
+ mpz_set (p[i + 1], prime);
+ e[i + 1] = 1;
- f = mpz_scan1 (t, 0);
- mpz_div_2exp (t, t, f);
- while (f)
+ factors->p = p;
+ factors->e = e;
+ factors->nfactors = nfactors + 1;
+ }
+ else
{
- printf ("2 ");
- fflush (stdout);
- --f;
+ e[i] += 1;
}
+}
- for (;;)
+void
+factor_insert_ui (struct factors *factors, unsigned long prime)
+{
+ mpz_t pz;
+
+ mpz_init_set_ui (pz, prime);
+ factor_insert (factors, pz);
+ mpz_clear (pz);
+}
+
+
+void
+factor_using_division (mpz_t t, struct factors *factors)
+{
+ mpz_t q;
+ unsigned long int p;
+ int i;
+
+ if (flag_verbose > 0)
{
- mpz_tdiv_qr_ui (q, r, t, 3);
- if (mpz_cmp_ui (r, 0) != 0)
- break;
- mpz_set (t, q);
- printf ("3 ");
- fflush (stdout);
+ printf ("[trial division] ");
}
- for (;;)
+ mpz_init (q);
+
+ p = mpz_scan1 (t, 0);
+ mpz_div_2exp (t, t, p);
+ while (p)
{
- mpz_tdiv_qr_ui (q, r, t, 5);
- if (mpz_cmp_ui (r, 0) != 0)
- break;
- mpz_set (t, q);
- printf ("5 ");
- fflush (stdout);
+ factor_insert_ui (factors, 2);
+ --p;
}
- failures = 0;
- f = 7;
- ai = 0;
- while (mpz_cmp_ui (t, 1) != 0)
+ p = 3;
+ for (i = 1; i <= PRIMES_PTAB_ENTRIES;)
{
- mpz_tdiv_qr_ui (q, r, t, f);
- if (mpz_cmp_ui (r, 0) != 0)
+ if (! mpz_divisible_ui_p (t, p))
{
- f += addv[ai];
- if (mpz_cmp_ui (q, f) < 0)
- break;
- ai = (ai + 1) & 7;
- failures++;
- if (failures > limit)
+ p += primes_diff[i++];
+ if (mpz_cmp_ui (t, p * p) < 0)
break;
}
else
{
- mpz_swap (t, q);
- printf ("%lu ", f);
- fflush (stdout);
- failures = 0;
+ mpz_tdiv_q_ui (t, t, p);
+ factor_insert_ui (factors, p);
}
}
- mpz_clears (q, r, NULL);
+ mpz_clear (q);
}
-void
-factor_using_division_2kp (mpz_t t, unsigned int limit, unsigned long p)
+static int
+mp_millerrabin (mpz_srcptr n, mpz_srcptr nm1, mpz_ptr x, mpz_ptr y,
+ mpz_srcptr q, unsigned long int k)
{
- mpz_t r;
- mpz_t f;
- unsigned int k;
+ unsigned long int i;
- if (flag_verbose > 0)
+ mpz_powm (y, x, q, n);
+
+ if (mpz_cmp_ui (y, 1) == 0 || mpz_cmp (y, nm1) == 0)
+ return 1;
+
+ for (i = 1; i < k; i++)
+ {
+ mpz_powm_ui (y, y, 2, n);
+ if (mpz_cmp (y, nm1) == 0)
+ return 1;
+ if (mpz_cmp_ui (y, 1) == 0)
+ return 0;
+ }
+ return 0;
+}
+
+int
+mp_prime_p (mpz_t n)
+{
+ int k, r, is_prime;
+ mpz_t q, a, nm1, tmp;
+ struct factors factors;
+
+ if (mpz_cmp_ui (n, 1) <= 0)
+ return 0;
+
+ /* We have already casted out small primes. */
+ if (mpz_cmp_ui (n, (long) FIRST_OMITTED_PRIME * FIRST_OMITTED_PRIME) < 0)
+ return 1;
+
+ mpz_inits (q, a, nm1, tmp, NULL);
+
+ /* Precomputation for Miller-Rabin. */
+ mpz_sub_ui (nm1, n, 1);
+
+ /* Find q and k, where q is odd and n = 1 + 2**k * q. */
+ k = mpz_scan1 (nm1, 0);
+ mpz_tdiv_q_2exp (q, nm1, k);
+
+ mpz_set_ui (a, 2);
+
+ /* Perform a Miller-Rabin test, finds most composites quickly. */
+ if (!mp_millerrabin (n, nm1, a, tmp, q, k))
{
- printf ("[trial division (%u)] ", limit);
- fflush (stdout);
+ is_prime = 0;
+ goto ret2;
}
- mpz_init (r);
- mpz_init_set_ui (f, 2 * p);
- mpz_add_ui (f, f, 1);
- for (k = 1; k < limit; k++)
+ if (flag_prove_primality)
{
- mpz_tdiv_r (r, t, f);
- while (mpz_cmp_ui (r, 0) == 0)
+ /* Factor n-1 for Lucas. */
+ mpz_set (tmp, nm1);
+ factor (tmp, &factors);
+ }
+
+ /* Loop until Lucas proves our number prime, or Miller-Rabin proves our
+ number composite. */
+ for (r = 0; r < PRIMES_PTAB_ENTRIES; r++)
+ {
+ int i;
+
+ if (flag_prove_primality)
+ {
+ is_prime = 1;
+ for (i = 0; i < factors.nfactors && is_prime; i++)
+ {
+ mpz_divexact (tmp, nm1, factors.p[i]);
+ mpz_powm (tmp, a, tmp, n);
+ is_prime = mpz_cmp_ui (tmp, 1) != 0;
+ }
+ }
+ else
{
- mpz_tdiv_q (t, t, f);
- mpz_tdiv_r (r, t, f);
- mpz_out_str (stdout, 10, f);
- fflush (stdout);
- fputc (' ', stdout);
+ /* After enough Miller-Rabin runs, be content. */
+ is_prime = (r == MR_REPS - 1);
+ }
+
+ if (is_prime)
+ goto ret1;
+
+ mpz_add_ui (a, a, primes_diff[r]); /* Establish new base. */
+
+ if (!mp_millerrabin (n, nm1, a, tmp, q, k))
+ {
+ is_prime = 0;
+ goto ret1;
}
- mpz_add_ui (f, f, 2 * p);
}
- mpz_clears (f, r, NULL);
+ fprintf (stderr, "Lucas prime test failure. This should not happen\n");
+ abort ();
+
+ ret1:
+ if (flag_prove_primality)
+ factor_clear (&factors);
+ ret2:
+ mpz_clears (q, a, nm1, tmp, NULL);
+
+ return is_prime;
}
void
-factor_using_pollard_rho (mpz_t n, unsigned long a, unsigned long p)
+factor_using_pollard_rho (mpz_t n, unsigned long a, struct factors *factors)
{
- mpz_t x, x1, y, P;
- mpz_t t1, t2;
+ mpz_t x, z, y, P;
+ mpz_t t, t2;
unsigned long long k, l, i;
if (flag_verbose > 0)
{
printf ("[pollard-rho (%lu)] ", a);
- fflush (stdout);
}
- mpz_inits (t1, t2, NULL);
+ mpz_inits (t, t2, NULL);
mpz_init_set_si (y, 2);
mpz_init_set_si (x, 2);
- mpz_init_set_si (x1, 2);
+ mpz_init_set_si (z, 2);
mpz_init_set_ui (P, 1);
k = 1;
l = 1;
{
do
{
- if (p != 0)
- {
- mpz_powm_ui (x, x, p, n);
- mpz_add_ui (x, x, a);
- }
- else
- {
- mpz_mul (t1, x, x);
- mpz_mod (x, t1, n);
- mpz_add_ui (x, x, a);
- }
+ mpz_mul (t, x, x);
+ mpz_mod (x, t, n);
+ mpz_add_ui (x, x, a);
- mpz_sub (t1, x1, x);
- mpz_mul (t2, P, t1);
+ mpz_sub (t, z, x);
+ mpz_mul (t2, P, t);
mpz_mod (P, t2, n);
if (k % 32 == 1)
{
- mpz_gcd (t1, P, n);
- if (mpz_cmp_ui (t1, 1) != 0)
+ mpz_gcd (t, P, n);
+ if (mpz_cmp_ui (t, 1) != 0)
goto factor_found;
mpz_set (y, x);
}
}
while (--k != 0);
- mpz_gcd (t1, P, n);
- if (mpz_cmp_ui (t1, 1) != 0)
- goto factor_found;
-
- mpz_set (x1, x);
+ mpz_set (z, x);
k = l;
l = 2 * l;
for (i = 0; i < k; i++)
{
- if (p != 0)
- {
- mpz_powm_ui (x, x, p, n);
- mpz_add_ui (x, x, a);
- }
- else
- {
- mpz_mul (t1, x, x);
- mpz_mod (x, t1, n);
- mpz_add_ui (x, x, a);
- }
+ mpz_mul (t, x, x);
+ mpz_mod (x, t, n);
+ mpz_add_ui (x, x, a);
}
mpz_set (y, x);
}
factor_found:
do
{
- if (p != 0)
- {
- mpz_powm_ui (y, y, p, n); mpz_add_ui (y, y, a);
- }
- else
- {
- mpz_mul (t1, y, y);
- mpz_mod (y, t1, n);
- mpz_add_ui (y, y, a);
- }
- mpz_sub (t1, x1, y);
- mpz_gcd (t1, t1, n);
+ mpz_mul (t, y, y);
+ mpz_mod (y, t, n);
+ mpz_add_ui (y, y, a);
+
+ mpz_sub (t, z, y);
+ mpz_gcd (t, t, n);
}
- while (mpz_cmp_ui (t1, 1) == 0);
+ while (mpz_cmp_ui (t, 1) == 0);
- mpz_divexact (n, n, t1); /* divide by t1, before t1 is overwritten */
+ mpz_divexact (n, n, t); /* divide by t, before t is overwritten */
- if (!mpz_probab_prime_p (t1, 25))
+ if (!mp_prime_p (t))
{
- do
- {
- mp_limb_t a_limb;
- mpn_random (&a_limb, (mp_size_t) 1);
- a = a_limb;
- }
- while (a == 0);
-
if (flag_verbose > 0)
{
printf ("[composite factor--restarting pollard-rho] ");
- fflush (stdout);
}
- factor_using_pollard_rho (t1, a, p);
+ factor_using_pollard_rho (t, a + 1, factors);
}
else
{
- mpz_out_str (stdout, 10, t1);
- fflush (stdout);
- fputc (' ', stdout);
+ factor_insert (factors, t);
}
- mpz_mod (x, x, n);
- mpz_mod (x1, x1, n);
- mpz_mod (y, y, n);
- if (mpz_probab_prime_p (n, 25))
+
+ if (mp_prime_p (n))
{
- mpz_out_str (stdout, 10, n);
- fflush (stdout);
- fputc (' ', stdout);
+ factor_insert (factors, n);
break;
}
+
+ mpz_mod (x, x, n);
+ mpz_mod (z, z, n);
+ mpz_mod (y, y, n);
}
- mpz_clears (P, t2, t1, x1, x, y, NULL);
+ mpz_clears (P, t2, t, z, x, y, NULL);
}
void
-factor (mpz_t t, unsigned long p)
+factor (mpz_t t, struct factors *factors)
{
- unsigned int division_limit;
-
- if (mpz_sgn (t) == 0)
- return;
-
- /* Set the trial division limit according the size of t. */
- division_limit = mpz_sizeinbase (t, 2);
- if (division_limit > 1000)
- division_limit = 1000 * 1000;
- else
- division_limit = division_limit * division_limit;
+ factor_init (factors);
- if (p != 0)
- factor_using_division_2kp (t, division_limit / 10, p);
- else
- factor_using_division (t, division_limit);
-
- if (mpz_cmp_ui (t, 1) != 0)
+ if (mpz_sgn (t) != 0)
{
- if (flag_verbose > 0)
+ factor_using_division (t, factors);
+
+ if (mpz_cmp_ui (t, 1) != 0)
{
- printf ("[is number prime?] ");
- fflush (stdout);
+ if (flag_verbose > 0)
+ {
+ printf ("[is number prime?] ");
+ }
+ if (mp_prime_p (t))
+ factor_insert (factors, t);
+ else
+ factor_using_pollard_rho (t, 1, factors);
}
- if (mpz_probab_prime_p (t, 25))
- mpz_out_str (stdout, 10, t);
- else
- factor_using_pollard_rho (t, 1L, p);
}
}
main (int argc, char *argv[])
{
mpz_t t;
- unsigned long p;
- int i;
+ int i, j, k;
+ struct factors factors;
- if (argc > 1 && !strcmp (argv[1], "-v"))
+ while (argc > 1)
{
- flag_verbose = 1;
- argv++;
- argc--;
- }
- if (argc > 1 && !strcmp (argv[1], "-q"))
- {
- flag_verbose = -1;
+ if (!strcmp (argv[1], "-v"))
+ flag_verbose = 1;
+ else if (!strcmp (argv[1], "-w"))
+ flag_prove_primality = 0;
+ else
+ break;
+
argv++;
argc--;
}
mpz_init (t);
if (argc > 1)
{
- p = 0;
for (i = 1; i < argc; i++)
{
- if (!strncmp (argv[i], "-Mp", 3))
- {
- p = atoi (argv[i] + 3);
- mpz_set_ui (t, 1);
- mpz_mul_2exp (t, t, p);
- mpz_sub_ui (t, t, 1);
- }
- else if (!strncmp (argv[i], "-2kp", 4))
- {
- p = atoi (argv[i] + 4);
- continue;
- }
- else
- {
- mpz_set_str (t, argv[i], 0);
- }
+ mpz_set_str (t, argv[i], 0);
- if (mpz_cmp_ui (t, 0) == 0)
- puts ("-");
- else
- {
- factor (t, p);
- puts ("");
- }
+ gmp_printf ("%Zd:", t);
+ factor (t, &factors);
+
+ for (j = 0; j < factors.nfactors; j++)
+ for (k = 0; k < factors.e[j]; k++)
+ gmp_printf (" %Zd", factors.p[j]);
+
+ puts ("");
+ factor_clear (&factors);
}
}
else
mpz_inp_str (t, stdin, 0);
if (feof (stdin))
break;
- if (flag_verbose >= 0)
- {
- mpz_out_str (stdout, 10, t); printf (" = ");
- }
- factor (t, 0);
+
+ gmp_printf ("%Zd:", t);
+ factor (t, &factors);
+
+ for (j = 0; j < factors.nfactors; j++)
+ for (k = 0; k < factors.e[j]; k++)
+ gmp_printf (" %Zd", factors.p[j]);
+
puts ("");
+ factor_clear (&factors);
}
}
/* Classify numbers as probable primes, primes or composites.
With -q return true if the following argument is a (probable) prime.
-Copyright 1999, 2000, 2002, 2005 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2002, 2005, 2012 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
/* Program for computing integer expressions using the GNU Multiple Precision
Arithmetic Library.
-Copyright 1997, 1999, 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
+Copyright 1997, 1999, 2000, 2001, 2002, 2005, 2008, 2012 Free Software
+Foundation, Inc.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
typedef struct expr *expr_t;
-void cleanup_and_exit __GMP_PROTO ((int));
-
-char *skipspace __GMP_PROTO ((char *));
-void makeexp __GMP_PROTO ((expr_t *, enum op_t, expr_t, expr_t));
-void free_expr __GMP_PROTO ((expr_t));
-char *expr __GMP_PROTO ((char *, expr_t *));
-char *term __GMP_PROTO ((char *, expr_t *));
-char *power __GMP_PROTO ((char *, expr_t *));
-char *factor __GMP_PROTO ((char *, expr_t *));
-int match __GMP_PROTO ((char *, char *));
-int matchp __GMP_PROTO ((char *, char *));
-int cputime __GMP_PROTO ((void));
-
-void mpz_eval_expr __GMP_PROTO ((mpz_ptr, expr_t));
-void mpz_eval_mod_expr __GMP_PROTO ((mpz_ptr, expr_t, mpz_ptr));
+void cleanup_and_exit (int);
+
+char *skipspace (char *);
+void makeexp (expr_t *, enum op_t, expr_t, expr_t);
+void free_expr (expr_t);
+char *expr (char *, expr_t *);
+char *term (char *, expr_t *);
+char *power (char *, expr_t *);
+char *factor (char *, expr_t *);
+int match (char *, char *);
+int matchp (char *, char *);
+int cputime (void);
+
+void mpz_eval_expr (mpz_ptr, expr_t);
+void mpz_eval_mod_expr (mpz_ptr, expr_t, mpz_ptr);
char *error;
int flag_print = 1;
Written by tege while on holiday in Rodupp, August 2001.
Between 10 and 500 times faster than previous program.
-Copyright 2001, 2002, 2006 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2006, 2012 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
struct primes *primes;
unsigned long n_primes;
-void find_primes __GMP_PROTO ((unsigned char *, mpz_t, unsigned long, mpz_t));
-void sieve_region __GMP_PROTO ((unsigned char *, mpz_t, unsigned long));
-void make_primelist __GMP_PROTO ((unsigned long));
+void find_primes (unsigned char *, mpz_t, unsigned long, mpz_t);
+void sieve_region (unsigned char *, mpz_t, unsigned long);
+void make_primelist (unsigned long);
int flag_print = 1;
int flag_count = 0;
--- /dev/null
+P( 1, 0xaaaaaaaaaaaaaaabUL, 0x5555555555555555UL) /* 3 */
+P( 2, 0xcccccccccccccccdUL, 0x3333333333333333UL) /* 5 */
+P( 2, 0x6db6db6db6db6db7UL, 0x2492492492492492UL) /* 7 */
+P( 4, 0x2e8ba2e8ba2e8ba3UL, 0x1745d1745d1745d1UL) /* 11 */
+P( 2, 0x4ec4ec4ec4ec4ec5UL, 0x13b13b13b13b13b1UL) /* 13 */
+P( 4, 0xf0f0f0f0f0f0f0f1UL, 0x0f0f0f0f0f0f0f0fUL) /* 17 */
+P( 2, 0x86bca1af286bca1bUL, 0x0d79435e50d79435UL) /* 19 */
+P( 4, 0xd37a6f4de9bd37a7UL, 0x0b21642c8590b216UL) /* 23 */
+P( 6, 0x34f72c234f72c235UL, 0x08d3dcb08d3dcb08UL) /* 29 */
+P( 2, 0xef7bdef7bdef7bdfUL, 0x0842108421084210UL) /* 31 */
+P( 6, 0x14c1bacf914c1badUL, 0x06eb3e45306eb3e4UL) /* 37 */
+P( 4, 0x8f9c18f9c18f9c19UL, 0x063e7063e7063e70UL) /* 41 */
+P( 2, 0x82fa0be82fa0be83UL, 0x05f417d05f417d05UL) /* 43 */
+P( 4, 0x51b3bea3677d46cfUL, 0x0572620ae4c415c9UL) /* 47 */
+P( 6, 0x21cfb2b78c13521dUL, 0x04d4873ecade304dUL) /* 53 */
+P( 6, 0xcbeea4e1a08ad8f3UL, 0x0456c797dd49c341UL) /* 59 */
+P( 2, 0x4fbcda3ac10c9715UL, 0x04325c53ef368eb0UL) /* 61 */
+P( 6, 0xf0b7672a07a44c6bUL, 0x03d226357e16ece5UL) /* 67 */
+P( 4, 0x193d4bb7e327a977UL, 0x039b0ad12073615aUL) /* 71 */
+P( 2, 0x7e3f1f8fc7e3f1f9UL, 0x0381c0e070381c0eUL) /* 73 */
+P( 6, 0x9b8b577e613716afUL, 0x033d91d2a2067b23UL) /* 79 */
+P( 4, 0xa3784a062b2e43dbUL, 0x03159721ed7e7534UL) /* 83 */
+P( 6, 0xf47e8fd1fa3f47e9UL, 0x02e05c0b81702e05UL) /* 89 */
+P( 8, 0xa3a0fd5c5f02a3a1UL, 0x02a3a0fd5c5f02a3UL) /* 97 */
+P( 4, 0x3a4c0a237c32b16dUL, 0x0288df0cac5b3f5dUL) /* 101 */
+P( 2, 0xdab7ec1dd3431b57UL, 0x027c45979c95204fUL) /* 103 */
+P( 4, 0x77a04c8f8d28ac43UL, 0x02647c69456217ecUL) /* 107 */
+P( 2, 0xa6c0964fda6c0965UL, 0x02593f69b02593f6UL) /* 109 */
+P( 4, 0x90fdbc090fdbc091UL, 0x0243f6f0243f6f02UL) /* 113 */
+P(14, 0x7efdfbf7efdfbf7fUL, 0x0204081020408102UL) /* 127 */
+P( 4, 0x03e88cb3c9484e2bUL, 0x01f44659e4a42715UL) /* 131 */
+P( 6, 0xe21a291c077975b9UL, 0x01de5d6e3f8868a4UL) /* 137 */
+P( 2, 0x3aef6ca970586723UL, 0x01d77b654b82c339UL) /* 139 */
+P(10, 0xdf5b0f768ce2cabdUL, 0x01b7d6c3dda338b2UL) /* 149 */
+P( 2, 0x6fe4dfc9bf937f27UL, 0x01b2036406c80d90UL) /* 151 */
+P( 6, 0x5b4fe5e92c0685b5UL, 0x01a16d3f97a4b01aUL) /* 157 */
+P( 6, 0x1f693a1c451ab30bUL, 0x01920fb49d0e228dUL) /* 163 */
+P( 4, 0x8d07aa27db35a717UL, 0x01886e5f0abb0499UL) /* 167 */
+P( 6, 0x882383b30d516325UL, 0x017ad2208e0ecc35UL) /* 173 */
+P( 6, 0xed6866f8d962ae7bUL, 0x016e1f76b4337c6cUL) /* 179 */
+P( 2, 0x3454dca410f8ed9dUL, 0x016a13cd15372904UL) /* 181 */
+P(10, 0x1d7ca632ee936f3fUL, 0x01571ed3c506b39aUL) /* 191 */
+P( 2, 0x70bf015390948f41UL, 0x015390948f40feacUL) /* 193 */
+P( 4, 0xc96bdb9d3d137e0dUL, 0x014cab88725af6e7UL) /* 197 */
+P( 2, 0x2697cc8aef46c0f7UL, 0x0149539e3b2d066eUL) /* 199 */
+P(12, 0xc0e8f2a76e68575bUL, 0x013698df3de07479UL) /* 211 */
+P(12, 0x687763dfdb43bb1fUL, 0x0125e22708092f11UL) /* 223 */
+P( 4, 0x1b10ea929ba144cbUL, 0x0120b470c67c0d88UL) /* 227 */
+P( 2, 0x1d10c4c0478bbcedUL, 0x011e2ef3b3fb8744UL) /* 229 */
+P( 4, 0x63fb9aeb1fdcd759UL, 0x0119453808ca29c0UL) /* 233 */
+P( 6, 0x64afaa4f437b2e0fUL, 0x0112358e75d30336UL) /* 239 */
+P( 2, 0xf010fef010fef011UL, 0x010fef010fef010fUL) /* 241 */
+P(10, 0x28cbfbeb9a020a33UL, 0x0105197f7d734041UL) /* 251 */
+P( 6, 0xff00ff00ff00ff01UL, 0x00ff00ff00ff00ffUL) /* 257 */
+P( 6, 0xd624fd1470e99cb7UL, 0x00f92fb2211855a8UL) /* 263 */
+P( 6, 0x8fb3ddbd6205b5c5UL, 0x00f3a0d52cba8723UL) /* 269 */
+P( 2, 0xd57da36ca27acdefUL, 0x00f1d48bcee0d399UL) /* 271 */
+P( 6, 0xee70c03b25e4463dUL, 0x00ec979118f3fc4dUL) /* 277 */
+P( 4, 0xc5b1a6b80749cb29UL, 0x00e939651fe2d8d3UL) /* 281 */
+P( 2, 0x47768073c9b97113UL, 0x00e79372e225fe30UL) /* 283 */
+P(10, 0x2591e94884ce32adUL, 0x00dfac1f74346c57UL) /* 293 */
+P(14, 0xf02806abc74be1fbUL, 0x00d578e97c3f5fe5UL) /* 307 */
+P( 4, 0x7ec3e8f3a7198487UL, 0x00d2ba083b445250UL) /* 311 */
+P( 2, 0x58550f8a39409d09UL, 0x00d161543e28e502UL) /* 313 */
+P( 4, 0xec9e48ae6f71de15UL, 0x00cebcf8bb5b4169UL) /* 317 */
+P(14, 0x2ff3a018bfce8063UL, 0x00c5fe740317f9d0UL) /* 331 */
+P( 6, 0x7f9ec3fcf61fe7b1UL, 0x00c2780613c0309eUL) /* 337 */
+P(10, 0x89f5abe570e046d3UL, 0x00bcdd535db1cc5bUL) /* 347 */
+P( 2, 0xda971b23f1545af5UL, 0x00bbc8408cd63069UL) /* 349 */
+P( 4, 0x79d5f00b9a7862a1UL, 0x00b9a7862a0ff465UL) /* 353 */
+P( 6, 0x4dba1df32a128a57UL, 0x00b68d31340e4307UL) /* 359 */
+P( 8, 0x87530217b7747d8fUL, 0x00b2927c29da5519UL) /* 367 */
+P( 6, 0x30baae53bb5e06ddUL, 0x00afb321a1496fdfUL) /* 373 */
+P( 6, 0xee70206c12e9b5b3UL, 0x00aceb0f891e6551UL) /* 379 */
+P( 4, 0xcdde9462ec9dbe7fUL, 0x00ab1cbdd3e2970fUL) /* 383 */
+P( 6, 0xafb64b05ec41cf4dUL, 0x00a87917088e262bUL) /* 389 */
+P( 8, 0x02944ff5aec02945UL, 0x00a513fd6bb00a51UL) /* 397 */
+P( 4, 0x2cb033128382df71UL, 0x00a36e71a2cb0331UL) /* 401 */
+P( 8, 0x1ccacc0c84b1c2a9UL, 0x00a03c1688732b30UL) /* 409 */
+P(10, 0x19a93db575eb3a0bUL, 0x009c69169b30446dUL) /* 419 */
+P( 2, 0xcebeef94fa86fe2dUL, 0x009baade8e4a2f6eUL) /* 421 */
+P(10, 0x6faa77fb3f8df54fUL, 0x00980e4156201301UL) /* 431 */
+P( 2, 0x68a58af00975a751UL, 0x00975a750ff68a58UL) /* 433 */
+P( 6, 0xd56e36d0c3efac07UL, 0x009548e4979e0829UL) /* 439 */
+P( 4, 0xd8b44c47a8299b73UL, 0x0093efd1c50e726bUL) /* 443 */
+P( 6, 0x02d9ccaf9ba70e41UL, 0x0091f5bcb8bb02d9UL) /* 449 */
+P( 8, 0x0985e1c023d9e879UL, 0x008f67a1e3fdc261UL) /* 457 */
+P( 4, 0x2a343316c494d305UL, 0x008e2917e0e702c6UL) /* 461 */
+P( 2, 0x70cb7916ab67652fUL, 0x008d8be33f95d715UL) /* 463 */
+P( 4, 0xd398f132fb10fe5bUL, 0x008c55841c815ed5UL) /* 467 */
+P(12, 0x6f2a38a6bf54fa1fUL, 0x0088d180cd3a4133UL) /* 479 */
+P( 8, 0x211df689b98f81d7UL, 0x00869222b1acf1ceUL) /* 487 */
+P( 4, 0x0e994983e90f1ec3UL, 0x0085797b917765abUL) /* 491 */
+P( 8, 0xad671e44bed87f3bUL, 0x008355ace3c897dbUL) /* 499 */
+P( 4, 0xf9623a0516e70fc7UL, 0x00824a4e60b3262bUL) /* 503 */
+P( 6, 0x4b7129be9dece355UL, 0x0080c121b28bd1baUL) /* 509 */
+P(12, 0x190f3b7473f62c39UL, 0x007dc9f3397d4c29UL) /* 521 */
+P( 2, 0x63dacc9aad46f9a3UL, 0x007d4ece8fe88139UL) /* 523 */
+P(18, 0xc1108fda24e8d035UL, 0x0079237d65bcce50UL) /* 541 */
+P( 6, 0xb77578472319bd8bUL, 0x0077cf53c5f7936cUL) /* 547 */
+P(10, 0x473d20a1c7ed9da5UL, 0x0075a8accfbdd11eUL) /* 557 */
+P( 6, 0xfbe85af0fea2c8fbUL, 0x007467ac557c228eUL) /* 563 */
+P( 6, 0x58a1f7e6ce0f4c09UL, 0x00732d70ed8db8e9UL) /* 569 */
+P( 2, 0x1a00e58c544986f3UL, 0x0072c62a24c3797fUL) /* 571 */
+P( 6, 0x7194a17f55a10dc1UL, 0x007194a17f55a10dUL) /* 577 */
+P(10, 0x7084944785e33763UL, 0x006fa549b41da7e7UL) /* 587 */
+P( 6, 0xba10679bd84886b1UL, 0x006e8419e6f61221UL) /* 593 */
+P( 6, 0xebe9c6bb31260967UL, 0x006d68b5356c207bUL) /* 599 */
+P( 2, 0x97a3fe4bd1ff25e9UL, 0x006d0b803685c01bUL) /* 601 */
+P( 6, 0x6c6388395b84d99fUL, 0x006bf790a8b2d207UL) /* 607 */
+P( 6, 0x8c51da6a1335df6dUL, 0x006ae907ef4b96c2UL) /* 613 */
+P( 4, 0x46f3234475d5add9UL, 0x006a37991a23aeadUL) /* 617 */
+P( 2, 0x905605ca3c619a43UL, 0x0069dfbdd4295b66UL) /* 619 */
+P(12, 0xcee8dff304767747UL, 0x0067dc4c45c8033eUL) /* 631 */
+P(10, 0xff99c27f00663d81UL, 0x00663d80ff99c27fUL) /* 641 */
+P( 2, 0xacca407f671ddc2bUL, 0x0065ec17e3559948UL) /* 643 */
+P( 4, 0xe71298bac1e12337UL, 0x00654ac835cfba5cUL) /* 647 */
+P( 6, 0xfa1e94309cd09045UL, 0x00645c854ae10772UL) /* 653 */
+P( 6, 0xbebccb8e91496b9bUL, 0x006372990e5f901fUL) /* 659 */
+P( 2, 0x312fa30cc7d7b8bdUL, 0x006325913c07beefUL) /* 661 */
+P(12, 0x6160ff9e9f006161UL, 0x006160ff9e9f0061UL) /* 673 */
+P( 4, 0x6b03673b5e28152dUL, 0x0060cdb520e5e88eUL) /* 677 */
+P( 6, 0xfe802ffa00bfe803UL, 0x005ff4017fd005ffUL) /* 683 */
+P( 8, 0xe66fe25c9e907c7bUL, 0x005ed79e31a4dccdUL) /* 691 */
+P(10, 0x3f8b236c76528895UL, 0x005d7d42d48ac5efUL) /* 701 */
+P( 8, 0xf6f923bf01ce2c0dUL, 0x005c6f35ccba5028UL) /* 709 */
+P(10, 0x6c3d3d98bed7c42fUL, 0x005b2618ec6ad0a5UL) /* 719 */
+P( 8, 0x30981efcd4b010e7UL, 0x005a2553748e42e7UL) /* 727 */
+P( 6, 0x6f691fc81ebbe575UL, 0x0059686cf744cd5bUL) /* 733 */
+P( 6, 0xb10480ddb47b52cbUL, 0x0058ae97bab79976UL) /* 739 */
+P( 4, 0x74cd59ed64f3f0d7UL, 0x0058345f1876865fUL) /* 743 */
+P( 8, 0x0105cb81316d6c0fUL, 0x005743d5bb24795aUL) /* 751 */
+P( 6, 0x9be64c6d91c1195dUL, 0x005692c4d1ab74abUL) /* 757 */
+P( 4, 0x71b3f945a27b1f49UL, 0x00561e46a4d5f337UL) /* 761 */
+P( 8, 0x77d80d50e508fd01UL, 0x005538ed06533997UL) /* 769 */
+P( 4, 0xa5eb778e133551cdUL, 0x0054c807f2c0bec2UL) /* 773 */
+P(14, 0x18657d3c2d8a3f1bUL, 0x005345efbc572d36UL) /* 787 */
+P(10, 0x2e40e220c34ad735UL, 0x00523a758f941345UL) /* 797 */
+P(12, 0xa76593c70a714919UL, 0x005102370f816c89UL) /* 809 */
+P( 2, 0x1eef452124eea383UL, 0x0050cf129fb94acfUL) /* 811 */
+P(10, 0x38206dc242ba771dUL, 0x004fd31941cafdd1UL) /* 821 */
+P( 2, 0x4cd4c35807772287UL, 0x004fa1704aa75945UL) /* 823 */
+P( 4, 0x83de917d5e69ddf3UL, 0x004f3ed6d45a63adUL) /* 827 */
+P( 2, 0x882ef0403b4a6c15UL, 0x004f0de57154ebedUL) /* 829 */
+P(10, 0xf8fb6c51c606b677UL, 0x004e1cae8815f811UL) /* 839 */
+P(14, 0xb4abaac446d3e1fdUL, 0x004cd47ba5f6ff19UL) /* 853 */
+P( 4, 0xa9f83bbe484a14e9UL, 0x004c78ae734df709UL) /* 857 */
+P( 2, 0x0bebbc0d1ce874d3UL, 0x004c4b19ed85cfb8UL) /* 859 */
+P( 4, 0xbd418eaf0473189fUL, 0x004bf093221d1218UL) /* 863 */
+P(14, 0x44e3af6f372b7e65UL, 0x004aba3c21dc633fUL) /* 877 */
+P( 4, 0xc87fdace4f9e5d91UL, 0x004a6360c344de00UL) /* 881 */
+P( 2, 0xec93479c446bd9bbUL, 0x004a383e9f74d68aUL) /* 883 */
+P( 4, 0xdac4d592e777c647UL, 0x0049e28fbabb9940UL) /* 887 */
+P(20, 0xa63ea8c8f61f0c23UL, 0x0048417b57c78cd7UL) /* 907 */
+P( 4, 0xe476062ea5cbbb6fUL, 0x0047f043713f3a2bUL) /* 911 */
+P( 8, 0xdf68761c69daac27UL, 0x00474ff2a10281cfUL) /* 919 */
+P(10, 0xb813d737637aa061UL, 0x00468b6f9a978f91UL) /* 929 */
+P( 8, 0xa3a77aac1fb15099UL, 0x0045f13f1caff2e2UL) /* 937 */
+P( 4, 0x17f0c3e0712c5825UL, 0x0045a5228cec23e9UL) /* 941 */
+P( 6, 0xfd912a70ff30637bUL, 0x0045342c556c66b9UL) /* 947 */
+P( 6, 0xfbb3b5dc01131289UL, 0x0044c4a23feeced7UL) /* 953 */
+P(14, 0x856d560a0f5acdf7UL, 0x0043c5c20d3c9fe6UL) /* 967 */
+P( 4, 0x96472f314d3f89e3UL, 0x00437e494b239798UL) /* 971 */
+P( 6, 0xa76f5c7ed2253531UL, 0x0043142d118e47cbUL) /* 977 */
+P( 6, 0x816eae7c7bf69fe7UL, 0x0042ab5c73a13458UL) /* 983 */
+P( 8, 0xb6a2bea4cfb1781fUL, 0x004221950db0f3dbUL) /* 991 */
+P( 6, 0xa3900c53318e81edUL, 0x0041bbb2f80a4553UL) /* 997 */
+P(12, 0x60aa7f5d9f148d11UL, 0x0040f391612c6680UL) /* 1009 */
+P( 4, 0x6be8c0102c7a505dUL, 0x0040b1e94173fefdUL) /* 1013 */
+P( 6, 0x8ff3f0ed28728f33UL, 0x004050647d9d0445UL) /* 1019 */
+P( 2, 0x680e0a87e5ec7155UL, 0x004030241b144f3bUL) /* 1021 */
+P(10, 0xbbf70fa49fe829b7UL, 0x003f90c2ab542cb1UL) /* 1031 */
+P( 2, 0xd69d1e7b6a50ca39UL, 0x003f71412d59f597UL) /* 1033 */
+P( 6, 0x1a1e0f46b6d26aefUL, 0x003f137701b98841UL) /* 1039 */
+P(10, 0x7429f9a7a8251829UL, 0x003e79886b60e278UL) /* 1049 */
+P( 2, 0xd9c2219d1b863613UL, 0x003e5b1916a7181dUL) /* 1051 */
+P(10, 0x91406c1820d077adUL, 0x003dc4a50968f524UL) /* 1061 */
+P( 2, 0x521f4ec02e3d2b97UL, 0x003da6e4c9550321UL) /* 1063 */
+P( 6, 0xbb8283b63dc8eba5UL, 0x003d4e4f06f1def3UL) /* 1069 */
+P(18, 0x431eda153229ebbfUL, 0x003c4a6bdd24f9a4UL) /* 1087 */
+P( 4, 0xaf0bf78d7e01686bUL, 0x003c11d54b525c73UL) /* 1091 */
+P( 2, 0xa9ced0742c086e8dUL, 0x003bf5b1c5721065UL) /* 1093 */
+P( 4, 0xc26458ad9f632df9UL, 0x003bbdb9862f23b4UL) /* 1097 */
+P( 6, 0xbbff1255dff892afUL, 0x003b6a8801db5440UL) /* 1103 */
+P( 6, 0xcbd49a333f04d8fdUL, 0x003b183cf0fed886UL) /* 1109 */
+P( 8, 0xec84ed6f9cfdeff5UL, 0x003aabe394bdc3f4UL) /* 1117 */
+P( 6, 0x97980cc40bda9d4bUL, 0x003a5ba3e76156daUL) /* 1123 */
+P( 6, 0x777f34d524f5cbd9UL, 0x003a0c3e953378dbUL) /* 1129 */
+P(22, 0x2797051d94cbbb7fUL, 0x0038f03561320b1eUL) /* 1151 */
+P( 2, 0xea769051b4f43b81UL, 0x0038d6ecaef5908aUL) /* 1153 */
+P(10, 0xce7910f3034d4323UL, 0x003859cf221e6069UL) /* 1163 */
+P( 8, 0x92791d1374f5b99bUL, 0x0037f7415dc9588aUL) /* 1171 */
+P(10, 0x89a5645cc68ea1b5UL, 0x00377df0d3902626UL) /* 1181 */
+P( 6, 0x5f8aacf796c0cf0bUL, 0x00373622136907faUL) /* 1187 */
+P( 6, 0xf2e90a15e33edf99UL, 0x0036ef0c3b39b92fUL) /* 1193 */
+P( 8, 0x8e99e5feb897c451UL, 0x0036915f47d55e6dUL) /* 1201 */
+P(12, 0xaca2eda38fb91695UL, 0x0036072cf3f866fdUL) /* 1213 */
+P( 4, 0x5d9b737be5ea8b41UL, 0x0035d9b737be5ea8UL) /* 1217 */
+P( 6, 0x4aefe1db93fd7cf7UL, 0x0035961559cc81c7UL) /* 1223 */
+P( 6, 0xa0994ef20b3f8805UL, 0x0035531c897a4592UL) /* 1229 */
+P( 2, 0x103890bda912822fUL, 0x00353ceebd3e98a4UL) /* 1231 */
+P( 6, 0xb441659d13a9147dUL, 0x0034fad381585e5eUL) /* 1237 */
+P(12, 0x1e2134440c4c3f21UL, 0x00347884d1103130UL) /* 1249 */
+P(10, 0x263a27727a6883c3UL, 0x00340dd3ac39bf56UL) /* 1259 */
+P(18, 0x78e221472ab33855UL, 0x003351fdfecc140cUL) /* 1277 */
+P( 2, 0x95eac88e82e6faffUL, 0x00333d72b089b524UL) /* 1279 */
+P( 4, 0xf66c258317be8dabUL, 0x0033148d44d6b261UL) /* 1283 */
+P( 6, 0x09ee202c7cb91939UL, 0x0032d7aef8412458UL) /* 1289 */
+P( 2, 0x8d2fca1042a09ea3UL, 0x0032c3850e79c0f1UL) /* 1291 */
+P( 6, 0x82779c856d8b8bf1UL, 0x00328766d59048a2UL) /* 1297 */
+P( 4, 0x3879361cba8a223dUL, 0x00325fa18cb11833UL) /* 1301 */
+P( 2, 0xf23f43639c3182a7UL, 0x00324bd659327e22UL) /* 1303 */
+P( 4, 0xa03868fc474bcd13UL, 0x0032246e784360f4UL) /* 1307 */
+P(12, 0x651e78b8c5311a97UL, 0x0031afa5f1a33a08UL) /* 1319 */
+P( 2, 0x8ffce639c00c6719UL, 0x00319c63ff398e70UL) /* 1321 */
+P( 6, 0xf7b460754b0b61cfUL, 0x003162f7519a86a7UL) /* 1327 */
+P(34, 0x7b03f3359b8e63b1UL, 0x0030271fc9d3fc3cUL) /* 1361 */
+P( 6, 0xa55c5326041eb667UL, 0x002ff104ae89750bUL) /* 1367 */
+P( 6, 0x647f88ab896a76f5UL, 0x002fbb62a236d133UL) /* 1373 */
+P( 8, 0x8fd971434a55a46dUL, 0x002f74997d2070b4UL) /* 1381 */
+P(18, 0x9fbf969958046447UL, 0x002ed84aa8b6fce3UL) /* 1399 */
+P(10, 0x9986feba69be3a81UL, 0x002e832df7a46dbdUL) /* 1409 */
+P(14, 0xa668b3e6d053796fUL, 0x002e0e0846857cabUL) /* 1423 */
+P( 4, 0x97694e6589f4e09bUL, 0x002decfbdfb55ee6UL) /* 1427 */
+P( 2, 0x37890c00b7721dbdUL, 0x002ddc876f3ff488UL) /* 1429 */
+P( 4, 0x5ac094a235f37ea9UL, 0x002dbbc1d4c482c4UL) /* 1433 */
+P( 6, 0x31cff775f2d5d65fUL, 0x002d8af0e0de0556UL) /* 1439 */
+P( 8, 0xddad8e6b36505217UL, 0x002d4a7b7d14b30aUL) /* 1447 */
+P( 4, 0x5a27df897062cd03UL, 0x002d2a85073bcf4eUL) /* 1451 */
+P( 2, 0xe2396fe0fdb5a625UL, 0x002d1a9ab13e8be4UL) /* 1453 */
+P( 6, 0xb352a4957e82317bUL, 0x002ceb1eb4b9fd8bUL) /* 1459 */
+P(12, 0xd8ab3f2c60c2ea3fUL, 0x002c8d503a79794cUL) /* 1471 */
+P(10, 0x6893f702f0452479UL, 0x002c404d708784edUL) /* 1481 */
+P( 2, 0x9686fdc182acf7e3UL, 0x002c31066315ec52UL) /* 1483 */
+P( 4, 0x6854037173dce12fUL, 0x002c1297d80f2664UL) /* 1487 */
+P( 2, 0x7f0ded1685c27331UL, 0x002c037044c55f6bUL) /* 1489 */
+P( 4, 0xeeda72e1fe490b7dUL, 0x002be5404cd13086UL) /* 1493 */
+P( 6, 0x9e7bfc959a8e6e53UL, 0x002bb845adaf0cceUL) /* 1499 */
+P(12, 0x49b314d6d4753dd7UL, 0x002b5f62c639f16dUL) /* 1511 */
+P(12, 0x2e8f8c5ac4aa1b3bUL, 0x002b07e6734f2b88UL) /* 1523 */
+P( 8, 0xb8ef723481163d33UL, 0x002ace569d8342b7UL) /* 1531 */
+P(12, 0x6a2ec96a594287b7UL, 0x002a791d5dbd4dcfUL) /* 1543 */
+P( 6, 0xdba41c6d13aab8c5UL, 0x002a4eff8113017cUL) /* 1549 */
+P( 4, 0xc2adbe648dc3aaf1UL, 0x002a3319e156df32UL) /* 1553 */
+P( 6, 0x87a2bade565f91a7UL, 0x002a0986286526eaUL) /* 1559 */
+P( 8, 0x4d6fe8798c01f5dfUL, 0x0029d29551d91e39UL) /* 1567 */
+P( 4, 0x3791310c8c23d98bUL, 0x0029b7529e109f0aUL) /* 1571 */
+P( 8, 0xf80e446b01228883UL, 0x00298137491ea465UL) /* 1579 */
+P( 4, 0x9aed1436fbf500cfUL, 0x0029665e1eb9f9daUL) /* 1583 */
+P(14, 0x7839b54cc8b24115UL, 0x002909752e019a5eUL) /* 1597 */
+P( 4, 0xc128c646ad0309c1UL, 0x0028ef35e2e5efb0UL) /* 1601 */
+P( 6, 0x14de631624a3c377UL, 0x0028c815aa4b8278UL) /* 1607 */
+P( 2, 0x3f7b9fe68b0ecbf9UL, 0x0028bb1b867199daUL) /* 1609 */
+P( 4, 0x284ffd75ec00a285UL, 0x0028a13ff5d7b002UL) /* 1613 */
+P( 6, 0x37803cb80dea2ddbUL, 0x00287ab3f173e755UL) /* 1619 */
+P( 2, 0x86b63f7c9ac4c6fdUL, 0x00286dead67713bdUL) /* 1621 */
+P( 6, 0x8b6851d1bd99b9d3UL, 0x002847bfcda6503eUL) /* 1627 */
+P(10, 0xb62fda77ca343b6dUL, 0x002808c1ea6b4777UL) /* 1637 */
+P(20, 0x1f0dc009e34383c9UL, 0x00278d0e0f23ff61UL) /* 1657 */
+P( 6, 0x496dc21ddd35b97fUL, 0x002768863c093c7fUL) /* 1663 */
+P( 4, 0xb0e96ce17090f82bUL, 0x0027505115a73ca8UL) /* 1667 */
+P( 2, 0xaadf05acdd7d024dUL, 0x00274441a61dc1b9UL) /* 1669 */
+P(24, 0xcb138196746eafb5UL, 0x0026b5c166113cf0UL) /* 1693 */
+P( 4, 0x347f523736755d61UL, 0x00269e65ad07b18eUL) /* 1697 */
+P( 2, 0xd14a48a051f7dd0bUL, 0x002692c25f877560UL) /* 1699 */
+P(10, 0x474d71b1ce914d25UL, 0x002658fa7523cd11UL) /* 1709 */
+P(12, 0x386063f5e28c1f89UL, 0x0026148710cf0f9eUL) /* 1721 */
+P( 2, 0x1db7325e32d04e73UL, 0x002609363b22524fUL) /* 1723 */
+P(10, 0xfef748d3893b880dUL, 0x0025d1065a1c1122UL) /* 1733 */
+P( 8, 0x2f3351506e935605UL, 0x0025a48a382b863fUL) /* 1741 */
+P( 6, 0x7a3637fa2376415bUL, 0x0025837190eccdbcUL) /* 1747 */
+P( 6, 0x4ac525d2baa21969UL, 0x00256292e95d510cUL) /* 1753 */
+P( 6, 0x3a11c16b42cd351fUL, 0x002541eda98d068cUL) /* 1759 */
+P(18, 0x6c7abde0049c2a11UL, 0x0024e15087fed8f5UL) /* 1777 */
+P( 6, 0x54dad0303e069ac7UL, 0x0024c18b20979e5dUL) /* 1783 */
+P( 4, 0xebf1ac9fdfe91433UL, 0x0024ac7b336de0c5UL) /* 1787 */
+P( 2, 0xfafdda8237cec655UL, 0x0024a1fc478c60bbUL) /* 1789 */
+P(12, 0xdce3ff6e71ffb739UL, 0x002463801231c009UL) /* 1801 */
+P(10, 0xbed5737d6286db1bUL, 0x0024300fd506ed33UL) /* 1811 */
+P(12, 0xe479e431fe08b4dfUL, 0x0023f314a494da81UL) /* 1823 */
+P( 8, 0x9dd9b0dd7742f897UL, 0x0023cadedd2fad3aUL) /* 1831 */
+P(16, 0x8f09d7402c5a5e87UL, 0x00237b7ed2664a03UL) /* 1847 */
+P(14, 0x9216d5c4d958738dUL, 0x0023372967dbaf1dUL) /* 1861 */
+P( 6, 0xb3139ba11d34ca63UL, 0x00231a308a371f20UL) /* 1867 */
+P( 4, 0x47d54f7ed644afafUL, 0x002306fa63e1e600UL) /* 1871 */
+P( 2, 0x92a81d85cf11a1b1UL, 0x0022fd6731575684UL) /* 1873 */
+P( 4, 0x754b26533253bdfdUL, 0x0022ea507805749cUL) /* 1877 */
+P( 2, 0xbbe0efc980bfd467UL, 0x0022e0cce8b3d720UL) /* 1879 */
+P(10, 0xc0d8d594f024dca1UL, 0x0022b1887857d161UL) /* 1889 */
+P(12, 0x8238d43bcaac1a65UL, 0x00227977fcc49cc0UL) /* 1901 */
+P( 6, 0x27779c1fae6175bbUL, 0x00225db37b5e5f4fUL) /* 1907 */
+P( 6, 0xa746ca9af708b2c9UL, 0x0022421b91322ed6UL) /* 1913 */
+P(18, 0x93f3cd9f389be823UL, 0x0021f05b35f52102UL) /* 1931 */
+P( 2, 0x5cb4a4c04c489345UL, 0x0021e75de5c70d60UL) /* 1933 */
+P(16, 0xbf6047743e85b6b5UL, 0x0021a01d6c19be96UL) /* 1949 */
+P( 2, 0x61c147831563545fUL, 0x0021974a6615c81aUL) /* 1951 */
+P(22, 0xedb47c0ae62dee9dUL, 0x00213767697cf36aUL) /* 1973 */
+P( 6, 0x0a3824386673a573UL, 0x00211d9f7fad35f1UL) /* 1979 */
+P( 8, 0xa4a77d19e575a0ebUL, 0x0020fb7d9dd36c18UL) /* 1987 */
+P( 6, 0xa2bee045e066c279UL, 0x0020e2123d661e0eUL) /* 1993 */
+P( 4, 0xc23618de8ab43d05UL, 0x0020d135b66ae990UL) /* 1997 */
+P( 2, 0x266b515216cb9f2fUL, 0x0020c8cded4d7a8eUL) /* 1999 */
+P( 4, 0xe279edd9e9c2e85bUL, 0x0020b80b3f43ddbfUL) /* 2003 */
+P( 8, 0xd0c591c221dc9c53UL, 0x002096b9180f46a6UL) /* 2011 */
+P( 6, 0x06da8ee9c9ee7c21UL, 0x00207de7e28de5daUL) /* 2017 */
+P(10, 0x9dfebcaf4c27e8c3UL, 0x002054dec8cf1fb3UL) /* 2027 */
+P( 2, 0x49aeff9f19dd6de5UL, 0x00204cb630b3aab5UL) /* 2029 */
+P(10, 0x86976a57a296e9c7UL, 0x00202428adc37bebUL) /* 2039 */
+P(14, 0xa3b9abf4872b84cdUL, 0x001fec0c7834def4UL) /* 2053 */
+P(10, 0x34fca6483895e6efUL, 0x001fc46fae98a1d0UL) /* 2063 */
+P( 6, 0x34b5a333988f873dUL, 0x001facda430ff619UL) /* 2069 */
+P(12, 0xd9dd4f19b5f17be1UL, 0x001f7e17dd8e15e5UL) /* 2081 */
+P( 2, 0xb935b507fd0ce78bUL, 0x001f765a3556a4eeUL) /* 2083 */
+P( 4, 0xb450f5540660e797UL, 0x001f66ea49d802f1UL) /* 2087 */
+P( 2, 0x63ff82831ffc1419UL, 0x001f5f3800faf9c0UL) /* 2089 */
+P(10, 0x8992f718c22a32fbUL, 0x001f38f4e6c0f1f9UL) /* 2099 */
+P(12, 0x5f3253ad0d37e7bfUL, 0x001f0b8546752578UL) /* 2111 */
+P( 2, 0x007c0ffe0fc007c1UL, 0x001f03ff83f001f0UL) /* 2113 */
+P(16, 0x4d8ebadc0c0640b1UL, 0x001ec853b0a3883cUL) /* 2129 */
+P( 2, 0xe2729af831037bdbUL, 0x001ec0ee573723ebUL) /* 2131 */
+P( 6, 0xb8f64bf30feebfe9UL, 0x001eaad38e6f6894UL) /* 2137 */
+P( 4, 0xda93124b544c0bf5UL, 0x001e9c28a765fe53UL) /* 2141 */
+P( 2, 0x9cf7ff0b593c539fUL, 0x001e94d8758c2003UL) /* 2143 */
+P(10, 0xd6bd8861fa0e07d9UL, 0x001e707ba8f65e68UL) /* 2153 */
+P( 8, 0x5cfe75c0bd8ab891UL, 0x001e53a2a68f574eUL) /* 2161 */
+P(18, 0x43e808757c2e862bUL, 0x001e1380a56b438dUL) /* 2179 */
+P(24, 0x90caa96d595c9d93UL, 0x001dbf9f513a3802UL) /* 2203 */
+P( 4, 0x8fd550625d07135fUL, 0x001db1d1d58bc600UL) /* 2207 */
+P( 6, 0x76b010a86e209f2dUL, 0x001d9d358f53de38UL) /* 2213 */
+P( 8, 0xecc0426447769b25UL, 0x001d81e6df6165c7UL) /* 2221 */
+P(16, 0xe381339caabe3295UL, 0x001d4bdf7fd40e30UL) /* 2237 */
+P( 2, 0xd1b190a2d0c7673fUL, 0x001d452c7a1c958dUL) /* 2239 */
+P( 4, 0xc3bce3cf26b0e7ebUL, 0x001d37cf9b902659UL) /* 2243 */
+P( 8, 0x5f87e76f56c61ce3UL, 0x001d1d3a5791e97bUL) /* 2251 */
+P(16, 0xc06c6857a124b353UL, 0x001ce89fe6b47416UL) /* 2267 */
+P( 2, 0x38c040fcba630f75UL, 0x001ce219f3235071UL) /* 2269 */
+P( 4, 0xd078bc4fbd533b21UL, 0x001cd516dcf92139UL) /* 2273 */
+P( 8, 0xde8e15c5dd354f59UL, 0x001cbb33bd1c2b8bUL) /* 2281 */
+P( 6, 0xca61d53d7414260fUL, 0x001ca7e7d2546688UL) /* 2287 */
+P( 6, 0xb56bf5ba8eae635dUL, 0x001c94b5c1b3dbd3UL) /* 2293 */
+P( 4, 0x44a72cb0fb6e3949UL, 0x001c87f7f9c241c1UL) /* 2297 */
+P(12, 0x879839a714f45bcdUL, 0x001c6202706c35a9UL) /* 2309 */
+P( 2, 0x02a8994fde5314b7UL, 0x001c5bb8a9437632UL) /* 2311 */
+P(22, 0xb971920cf2b90135UL, 0x001c174343b4111eUL) /* 2333 */
+P( 6, 0x8a8fd0b7df9a6e8bUL, 0x001c04d0d3e46b42UL) /* 2339 */
+P( 2, 0xb31f9a84c1c6eaadUL, 0x001bfeb00fbf4308UL) /* 2341 */
+P( 6, 0x92293b02823c6d83UL, 0x001bec5dce0b202dUL) /* 2347 */
+P( 4, 0xeee77ff20fe5ddcfUL, 0x001be03444620037UL) /* 2351 */
+P( 6, 0x0e1ea0f6c496c11dUL, 0x001bce09c66f6fc3UL) /* 2357 */
+P(14, 0xfdf2d3d6f88ccb6bUL, 0x001ba40228d02b30UL) /* 2371 */
+P( 6, 0xfa9d74a3457738f9UL, 0x001b9225b1cf8919UL) /* 2377 */
+P( 4, 0xefc3ca3db71a5785UL, 0x001b864a2ff3f53fUL) /* 2381 */
+P( 2, 0x8e2071718d0d6dafUL, 0x001b80604150e49bUL) /* 2383 */
+P( 6, 0xbc0fdbfeb6cfabfdUL, 0x001b6eb1aaeaacf3UL) /* 2389 */
+P( 4, 0x1eeab613e5e5aee9UL, 0x001b62f48da3c8ccUL) /* 2393 */
+P( 6, 0x2d2388e90e9e929fUL, 0x001b516babe96092UL) /* 2399 */
+P(12, 0x81dbafba588ddb43UL, 0x001b2e9cef1e0c87UL) /* 2411 */
+P( 6, 0x52eebc51c4799791UL, 0x001b1d56bedc849bUL) /* 2417 */
+P( 6, 0x1c6bc4693b45a047UL, 0x001b0c267546aec0UL) /* 2423 */
+P(14, 0x06eee0974498874dUL, 0x001ae45f62024fa0UL) /* 2437 */
+P( 4, 0xd85b7377a9953cb9UL, 0x001ad917631b5f54UL) /* 2441 */
+P( 6, 0x4b6df412d4caf56fUL, 0x001ac83d18cb608fUL) /* 2447 */
+P(12, 0x6b8afbbb4a053493UL, 0x001aa6c7ad8c063fUL) /* 2459 */
+P( 8, 0xcc5299c96ac7720bUL, 0x001a90a7b1228e2aUL) /* 2467 */
+P( 6, 0xadce84b5c710aa99UL, 0x001a8027c03ba059UL) /* 2473 */
+P( 4, 0x9d673f5aa3804225UL, 0x001a7533289deb89UL) /* 2477 */
+P(26, 0xe6541268efbce7f7UL, 0x001a2ed7ce16b49fUL) /* 2503 */
+P(18, 0xfcf41e76cf5be669UL, 0x0019fefc0a279a73UL) /* 2521 */
+P(10, 0x5c3eb5dc31c383cbUL, 0x0019e4b0cd873b5fUL) /* 2531 */
+P( 8, 0x301832d11d8ad6c3UL, 0x0019cfcdfd60e514UL) /* 2539 */
+P( 4, 0x2e9c0942f1ce450fUL, 0x0019c56932d66c85UL) /* 2543 */
+P( 6, 0x97f3f2be37a39a5dUL, 0x0019b5e1ab6fc7c2UL) /* 2549 */
+P( 2, 0xe8b7d8a9654187c7UL, 0x0019b0b8a62f2a73UL) /* 2551 */
+P( 6, 0xb5d024d7da5b1b55UL, 0x0019a149fc98942cUL) /* 2557 */
+P(22, 0xb8ba9d6e7ae3501bUL, 0x001969517ec25b85UL) /* 2579 */
+P(12, 0xf50865f71b90f1dfUL, 0x00194b3083360ba8UL) /* 2591 */
+P( 2, 0x739c1682847df9e1UL, 0x00194631f4bebdc1UL) /* 2593 */
+P(16, 0xc470a4d842b90ed1UL, 0x00191e84127268fdUL) /* 2609 */
+P( 8, 0x1fb1be11698cc409UL, 0x00190adbb543984fUL) /* 2617 */
+P( 4, 0xd8d5512a7cd35d15UL, 0x001901130bd18200UL) /* 2621 */
+P(12, 0xa5496821723e07f9UL, 0x0018e3e6b889ac94UL) /* 2633 */
+P(14, 0xbcc8c6d7abaa8167UL, 0x0018c233420e1ec1UL) /* 2647 */
+P(10, 0x52c396c95eb619a1UL, 0x0018aa5872d92bd6UL) /* 2657 */
+P( 2, 0x6eb7e380878ec74bUL, 0x0018a5989945ccf9UL) /* 2659 */
+P( 4, 0x3d5513b504537157UL, 0x00189c1e60b57f60UL) /* 2663 */
+P( 8, 0x314391f8862e948fUL, 0x0018893fbc8690b9UL) /* 2671 */
+P( 6, 0xdc0b17cfcd81f5ddUL, 0x00187b2bb3e1041cUL) /* 2677 */
+P( 6, 0x2f6bea3ec89044b3UL, 0x00186d27c9cdcfb8UL) /* 2683 */
+P( 4, 0xce13a05869f1b57fUL, 0x001863d8bf4f2c1cUL) /* 2687 */
+P( 2, 0x7593474e8ace3581UL, 0x00185f33e2ad7593UL) /* 2689 */
+P( 4, 0x07fc329295a05e4dUL, 0x001855ef75973e13UL) /* 2693 */
+P( 6, 0xb05377cba4908d23UL, 0x001848160153f134UL) /* 2699 */
+P( 8, 0xe7b2131a628aa39bUL, 0x001835b72e6f0656UL) /* 2707 */
+P( 4, 0x9031dbed7de01527UL, 0x00182c922d83eb39UL) /* 2711 */
+P( 2, 0x76844b1c670aa9a9UL, 0x0018280243c0365aUL) /* 2713 */
+P( 6, 0x6a03f4533b08915fUL, 0x00181a5cd5898e73UL) /* 2719 */
+P(10, 0x1dbca579db0a3999UL, 0x001803c0961773aaUL) /* 2729 */
+P( 2, 0x002ffe800bffa003UL, 0x0017ff4005ffd001UL) /* 2731 */
+P(10, 0x478ab1a3e936139dUL, 0x0017e8d670433edbUL) /* 2741 */
+P( 8, 0x66e722bc4c5cc095UL, 0x0017d7066cf4bb5dUL) /* 2749 */
+P( 4, 0x7a8f63c717278541UL, 0x0017ce285b806b1fUL) /* 2753 */
+P(14, 0xdf6eee24d292bc2fUL, 0x0017af52cdf27e02UL) /* 2767 */
+P(10, 0x9fc20d17237dd569UL, 0x0017997d47d01039UL) /* 2777 */
+P(12, 0xcdf9932356bda2edUL, 0x00177f7ec2c6d0baUL) /* 2789 */
+P( 2, 0x97b5e332e80f68d7UL, 0x00177b2f3cd00756UL) /* 2791 */
+P( 6, 0x46eee26fd875e2e5UL, 0x00176e4a22f692a0UL) /* 2797 */
+P( 4, 0x3548a8e65157a611UL, 0x001765b94271e11bUL) /* 2801 */
+P( 2, 0xc288d03be9b71e3bUL, 0x001761732b044ae4UL) /* 2803 */
+P(16, 0x8151186db38937abUL, 0x00173f7a5300a2bcUL) /* 2819 */
+P(14, 0x7800b910895a45f1UL, 0x001722112b48be1fUL) /* 2833 */
+P( 4, 0xaee0b024182eec3dUL, 0x001719b7a16eb843UL) /* 2837 */
+P( 6, 0x96323eda173b5713UL, 0x00170d3c99cc5052UL) /* 2843 */
+P( 8, 0x0ed0dbd03ae77c8bUL, 0x0016fcad7aed3bb6UL) /* 2851 */
+P( 6, 0xf73800b7828dc119UL, 0x0016f051b8231ffdUL) /* 2857 */
+P( 4, 0x1b61715ec22b7ca5UL, 0x0016e81beae20643UL) /* 2861 */
+P(18, 0xa8533a991ead64bfUL, 0x0016c3721584c1d8UL) /* 2879 */
+P( 8, 0x7f6c7290e46c2e77UL, 0x0016b34c2ba09663UL) /* 2887 */
+P(10, 0x6325e8d907b01db1UL, 0x00169f3ce292ddcdUL) /* 2897 */
+P( 6, 0x28909f70152a1067UL, 0x00169344b2220a0dUL) /* 2903 */
+P( 6, 0xea7077af0997a0f5UL, 0x001687592593c1b1UL) /* 2909 */
+P( 8, 0x7e605cad10c32e6dUL, 0x00167787f1418ec9UL) /* 2917 */
+P(10, 0x471b33570635b38fUL, 0x001663e190395ff2UL) /* 2927 */
+P(12, 0xab559fa997a61bb3UL, 0x00164c7a4b6eb5b3UL) /* 2939 */
+P(14, 0xad4bdae562bddab9UL, 0x0016316a061182fdUL) /* 2953 */
+P( 4, 0x055e1b2f2ed62f45UL, 0x001629ba914584e4UL) /* 2957 */
+P( 6, 0x03cd328b1a2dca9bUL, 0x00161e3d57de21b2UL) /* 2963 */
+P( 6, 0xd28f4e08733218a9UL, 0x001612cc01b977f0UL) /* 2969 */
+P( 2, 0xb6800b077f186293UL, 0x00160efe30c525ffUL) /* 2971 */
+P(28, 0x6fbd138c3fd9c207UL, 0x0015da45249ec5deUL) /* 2999 */
+P( 2, 0xb117ccd12ae88a89UL, 0x0015d68ab4acff92UL) /* 3001 */
+P(10, 0x2f1a1a044046bcebUL, 0x0015c3f989d1eb15UL) /* 3011 */
+P( 8, 0x548aba0b060541e3UL, 0x0015b535ad11b8f0UL) /* 3019 */
+P( 4, 0xcf4e808cea111b2fUL, 0x0015addb3f424ec1UL) /* 3023 */
+P(14, 0xdbec1b4fa855a475UL, 0x00159445cb91be6bUL) /* 3037 */
+P( 4, 0xe3f794eb600d7821UL, 0x00158d0199771e63UL) /* 3041 */
+P( 8, 0x34fae0d9a11f7c59UL, 0x00157e87d9b69e04UL) /* 3049 */
+P(12, 0xf006b0ccbbac085dUL, 0x001568f58bc01ac3UL) /* 3061 */
+P( 6, 0x3f45076dc3114733UL, 0x00155e3c993fda9bUL) /* 3067 */
+P(12, 0xeef49bfa58a1a1b7UL, 0x001548eacc5e1e6eUL) /* 3079 */
+P( 4, 0x12c4218bea691fa3UL, 0x001541d8f91ba6a7UL) /* 3083 */
+P( 6, 0xbc7504e3bd5e64f1UL, 0x00153747060cc340UL) /* 3089 */
+P(20, 0x4ee21c292bb92fadUL, 0x001514569f93f7c4UL) /* 3109 */
+P(10, 0x34338b7327a4bacfUL, 0x00150309705d3d79UL) /* 3119 */
+P( 2, 0x3fe5c0833d6fccd1UL, 0x0014ff97020cf5bfUL) /* 3121 */
+P(16, 0xb1e70743535203c1UL, 0x0014e42c114cf47eUL) /* 3137 */
+P(26, 0xefbb5dcdfb4e43d3UL, 0x0014b835bdcb6447UL) /* 3163 */
+P( 4, 0xca68467ca5394f9fUL, 0x0014b182b53a9ab7UL) /* 3167 */
+P( 2, 0x8c51c081408b97a1UL, 0x0014ae2ad094a3d3UL) /* 3169 */
+P(12, 0x3275a899dfa5dd65UL, 0x00149a320ea59f96UL) /* 3181 */
+P( 6, 0x9e674cb62e1b78bbUL, 0x001490441de1a2fbUL) /* 3187 */
+P( 4, 0xa37ff5bb2a998d47UL, 0x001489aacce57200UL) /* 3191 */
+P(12, 0x792a999db131a22bUL, 0x001475f82ad6ff99UL) /* 3203 */
+P( 6, 0x1b48841bc30d29b9UL, 0x00146c2cfe53204fUL) /* 3209 */
+P( 8, 0xf06721d2011d3471UL, 0x00145f2ca490d4a1UL) /* 3217 */
+P( 4, 0x93fd2386dff85ebdUL, 0x001458b2aae0ec87UL) /* 3221 */
+P( 8, 0x4ce72f54c07ed9b5UL, 0x00144bcb0a3a3150UL) /* 3229 */
+P(22, 0xd6d0fd3e71dd827bUL, 0x001428a1e65441d4UL) /* 3251 */
+P( 2, 0x856405fb1eed819dUL, 0x00142575a6c210d7UL) /* 3253 */
+P( 4, 0x8ea8aceb7c443989UL, 0x00141f2025ba5c46UL) /* 3257 */
+P( 2, 0x34a13026f62e5873UL, 0x00141bf6e35420fdUL) /* 3259 */
+P(12, 0x1eea0208ec0af4f7UL, 0x001409141d1d313aUL) /* 3271 */
+P(28, 0x63679853cea598cbUL, 0x0013dd8bc19c3513UL) /* 3299 */
+P( 2, 0xc30b3ebd61f2d0edUL, 0x0013da76f714dc8fUL) /* 3301 */
+P( 6, 0x7eb9037bc7f43bc3UL, 0x0013d13e50f8f49eUL) /* 3307 */
+P( 6, 0xa583e6f6ce016411UL, 0x0013c80e37ca3819UL) /* 3313 */
+P( 6, 0xf1938d895f1a74c7UL, 0x0013bee69fa99ccfUL) /* 3319 */
+P( 4, 0x80cf1491c1e81e33UL, 0x0013b8d0ede55835UL) /* 3323 */
+P( 6, 0x3c0f12886ba8f301UL, 0x0013afb7680bb054UL) /* 3329 */
+P( 2, 0x0e4b786e0dfcc5abUL, 0x0013acb0c3841c96UL) /* 3331 */
+P(12, 0x672684c93f2d41efUL, 0x00139a9c5f434fdeUL) /* 3343 */
+P( 4, 0xe00757badb35c51bUL, 0x0013949cf33a0d9dUL) /* 3347 */
+P(12, 0xd6d84afe66472edfUL, 0x001382b4a00c31b0UL) /* 3359 */
+P( 2, 0xfbbc0eedcbbfb6e1UL, 0x00137fbbc0eedcbbUL) /* 3361 */
+P(10, 0x250f43aa08a84983UL, 0x001370ecf047b069UL) /* 3371 */
+P( 2, 0x04400e927b1acaa5UL, 0x00136df9790e3155UL) /* 3373 */
+P(16, 0x56572be34b9d3215UL, 0x0013567dd8defd5bUL) /* 3389 */
+P( 2, 0x87964ef7781c62bfUL, 0x0013539261fdbc34UL) /* 3391 */
+P(16, 0x29ed84051c06e9afUL, 0x00133c564292d28aUL) /* 3407 */
+P( 6, 0xb00acd11ed3f87fdUL, 0x001333ae178d6388UL) /* 3413 */
+P(20, 0x06307881744152d9UL, 0x0013170ad00d1fd7UL) /* 3433 */
+P(16, 0x7a786459f5c1ccc9UL, 0x0013005f01db0947UL) /* 3449 */
+P( 8, 0x1308125d74563281UL, 0x0012f51d40342210UL) /* 3457 */
+P( 4, 0x395310a480b3e34dUL, 0x0012ef815e4ed950UL) /* 3461 */
+P( 2, 0x35985baa8b202837UL, 0x0012ecb4abccd827UL) /* 3463 */
+P( 4, 0x96304a6e052b3223UL, 0x0012e71dc1d3d820UL) /* 3467 */
+P( 2, 0xbd8265fc9af8fd45UL, 0x0012e45389a16495UL) /* 3469 */
+P(22, 0x1b6d0b383ec58e0bUL, 0x0012c5d9226476ccUL) /* 3491 */
+P( 8, 0xc21a7c3b68b28503UL, 0x0012badc391156fdUL) /* 3499 */
+P(12, 0x236fa180fbfd6007UL, 0x0012aa78e412f522UL) /* 3511 */
+P( 6, 0xc42accd440ed9595UL, 0x0012a251f5f47fd1UL) /* 3517 */
+P(10, 0x7acf7128236ba3f7UL, 0x001294cb85c53534UL) /* 3527 */
+P( 2, 0xf909367a987b9c79UL, 0x0012921963beb65eUL) /* 3529 */
+P( 4, 0xb64efb252bfba705UL, 0x00128cb777c69ca8UL) /* 3533 */
+P( 6, 0x980d4f5a7e4cd25bUL, 0x001284aa6cf07294UL) /* 3539 */
+P( 2, 0xe1ecc4ef27b0c37dUL, 0x001281fcf6ac7f87UL) /* 3541 */
+P( 6, 0x9111aebb81d72653UL, 0x001279f937367db9UL) /* 3547 */
+P(10, 0x8951f985cb2c67edUL, 0x00126cad0488be94UL) /* 3557 */
+P( 2, 0xc439d4fc54e0b5d7UL, 0x00126a06794646a2UL) /* 3559 */
+P(12, 0xe857bf31896d533bUL, 0x00125a2f2bcd3e95UL) /* 3571 */
+P(10, 0xb614bb4cb5023755UL, 0x00124d108389e6b1UL) /* 3581 */
+P( 2, 0x938a89e5473bf1ffUL, 0x00124a73083771acUL) /* 3583 */
+P(10, 0xeac481aca34de039UL, 0x00123d6acda0620aUL) /* 3593 */
+P(14, 0x14b961badf4809a7UL, 0x00122b4b2917eafdUL) /* 3607 */
+P( 6, 0x76784fecba352435UL, 0x00122391bfce1e2fUL) /* 3613 */
+P( 4, 0xefa689bb58aef5e1UL, 0x00121e6f1ea579f2UL) /* 3617 */
+P( 6, 0xb2b2c4db9c3a8197UL, 0x001216c09e471568UL) /* 3623 */
+P( 8, 0x2503bc992279f8cfUL, 0x00120c8cb9d93909UL) /* 3631 */
+P( 6, 0xd2ab9aec5ca1541dUL, 0x001204ed58e64ef9UL) /* 3637 */
+P( 6, 0x3e78ba1460f99af3UL, 0x0011fd546578f00cUL) /* 3643 */
+P(16, 0x0a01426572cfcb63UL, 0x0011e9310b8b4c9cUL) /* 3659 */
+P(12, 0xbea857968f3cbd67UL, 0x0011da3405db9911UL) /* 3671 */
+P( 2, 0x78db213eefe659e9UL, 0x0011d7b6f4eb055dUL) /* 3673 */
+P( 4, 0x963e8541a74d35f5UL, 0x0011d2bee748c145UL) /* 3677 */
+P(14, 0x9e22d152776f2e43UL, 0x0011c1706ddce7a7UL) /* 3691 */
+P( 6, 0x05d10d39d1e1f291UL, 0x0011ba0fed2a4f14UL) /* 3697 */
+P( 4, 0x374468dccaced1ddUL, 0x0011b528538ed64aUL) /* 3701 */
+P( 8, 0x8d145c7d110c5ad5UL, 0x0011ab61404242acUL) /* 3709 */
+P(10, 0x3251a39f5acb5737UL, 0x00119f378ce81d2fUL) /* 3719 */
+P( 8, 0xa66e50171443506fUL, 0x001195889ece79daUL) /* 3727 */
+P( 6, 0x124f69ad91dd4cbdUL, 0x00118e4c65387077UL) /* 3733 */
+P( 6, 0xec24f8f2a61a2793UL, 0x001187161d70e725UL) /* 3739 */
+P(22, 0xb472148e656b7a51UL, 0x00116cd6d1c85239UL) /* 3761 */
+P( 6, 0x0adf9570e1142f07UL, 0x001165bbe7ce86b1UL) /* 3767 */
+P( 2, 0x89bf33b065119789UL, 0x0011635ee344ce36UL) /* 3769 */
+P(10, 0x8f0149803cb291ebUL, 0x0011579767b6d679UL) /* 3779 */
+P(14, 0x8334b63afd190a31UL, 0x00114734711e2b54UL) /* 3793 */
+P( 4, 0x920908d50d6aba7dUL, 0x0011428b90147f05UL) /* 3797 */
+P( 6, 0x57d8b018c5a33d53UL, 0x00113b92f3021636UL) /* 3803 */
+P(18, 0xea1773092dc27ee5UL, 0x001126cabc886884UL) /* 3821 */
+P( 2, 0xcae5f38b7bf2e00fUL, 0x0011247eb1b85976UL) /* 3823 */
+P(10, 0x2bd02df34f695349UL, 0x0011190bb01efd65UL) /* 3833 */
+P(14, 0xddfecd5be62e2eb7UL, 0x0011091de0fd679cUL) /* 3847 */
+P( 4, 0xdbf849ebec96c4a3UL, 0x001104963c7e4e0bUL) /* 3851 */
+P( 2, 0xda31d4d0187357c5UL, 0x00110253516420b0UL) /* 3853 */
+P(10, 0xe34e21cc2d5418a7UL, 0x0010f70db7c41797UL) /* 3863 */
+P(14, 0x68ca5137a9e574adUL, 0x0010e75ee2bf9ecdUL) /* 3877 */
+P( 4, 0x3eaa0d0f804bfd19UL, 0x0010e2e91c6e0676UL) /* 3881 */
+P( 8, 0x554fb753cc20e9d1UL, 0x0010da049b9d428dUL) /* 3889 */
+P(18, 0x797afcca1300756bUL, 0x0010c6248fe3b1a2UL) /* 3907 */
+P( 4, 0x8b8d950b52eeea77UL, 0x0010c1c03ed690ebUL) /* 3911 */
+P( 6, 0xfb6cd166acabc185UL, 0x0010bb2e1379e3a2UL) /* 3917 */
+P( 2, 0x4eb6c5ed9437a7afUL, 0x0010b8fe7f61228eUL) /* 3919 */
+P( 4, 0xd1eddbd91b790cdbUL, 0x0010b4a10d60a4f7UL) /* 3923 */
+P( 6, 0x93d714ea4d8948e9UL, 0x0010ae192681ec0fUL) /* 3929 */
+P( 2, 0x3ca13ed8145188d3UL, 0x0010abecfbe5b0aeUL) /* 3931 */
+P(12, 0x829086016da89c57UL, 0x00109eefd568b96dUL) /* 3943 */
+P( 4, 0xd7da1f432124a543UL, 0x00109a9ff178b40cUL) /* 3947 */
+P(20, 0x7ead5581632fb07fUL, 0x00108531e22f9ff9UL) /* 3967 */
+P(22, 0x35443837f63ec3bdUL, 0x00106ddec1af4417UL) /* 3989 */
+
+#undef FIRST_OMITTED_PRIME
+#define FIRST_OMITTED_PRIME 4001
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# You should have received a copy of the GNU Lesser General Public License
# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
VPATH = @srcdir@
+am__make_dryrun = \
+ { \
+ am__dry=no; \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
+ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+ *) \
+ for am__flg in $$MAKEFLAGS; do \
+ case $$am__flg in \
+ *=*|--*) ;; \
+ *n*) am__dry=yes; break;; \
+ esac; \
+ done;; \
+ esac; \
+ test $$am__dry = yes; \
+ }
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
$(srcdir)/version.texi mdate-sh texinfo.tex
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/configure.in
+ $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
MAKEINFOHTML = $(MAKEINFO) --html
AM_MAKEINFOHTMLFLAGS = $(AM_MAKEINFOFLAGS)
DVIPS = dvips
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
am__installdirs = "$(DESTDIR)$(infodir)"
am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
am__vpath_adj = case $$p in \
am__base_list = \
sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ABI = @ABI@
ACLOCAL = @ACLOCAL@
SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
STRIP = @STRIP@
TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
U_FOR_BUILD = @U_FOR_BUILD@
VERSION = @VERSION@
WITH_READLINE_01 = @WITH_READLINE_01@
mkdir_p = @mkdir_p@
mpn_objects = @mpn_objects@
mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
uninstall-info-am:
@$(PRE_UNINSTALL)
- @if test -d '$(DESTDIR)$(infodir)' && \
- (install-info --version && \
- install-info --version 2>&1 | sed 1q | grep -i -v debian) >/dev/null 2>&1; then \
+ @if test -d '$(DESTDIR)$(infodir)' && $(am__can_run_installinfo); then \
list='$(INFO_DEPS)'; \
for file in $$list; do \
relfile=`echo "$$file" | sed 's|^.*/||'`; \
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
install-dvi-am: $(DVIS)
@$(NORMAL_INSTALL)
- test -z "$(dvidir)" || $(MKDIR_P) "$(DESTDIR)$(dvidir)"
@list='$(DVIS)'; test -n "$(dvidir)" || list=; \
+ if test -n "$$list"; then \
+ echo " $(MKDIR_P) '$(DESTDIR)$(dvidir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(dvidir)" || exit 1; \
+ fi; \
for p in $$list; do \
if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
echo "$$d$$p"; \
install-html-am: $(HTMLS)
@$(NORMAL_INSTALL)
- test -z "$(htmldir)" || $(MKDIR_P) "$(DESTDIR)$(htmldir)"
@list='$(HTMLS)'; list2=; test -n "$(htmldir)" || list=; \
+ if test -n "$$list"; then \
+ echo " $(MKDIR_P) '$(DESTDIR)$(htmldir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(htmldir)" || exit 1; \
+ fi; \
for p in $$list; do \
if test -f "$$p" || test -d "$$p"; then d=; else d="$(srcdir)/"; fi; \
$(am__strip_dir) \
- if test -d "$$d$$p"; then \
+ d2=$$d$$p; \
+ if test -d "$$d2"; then \
echo " $(MKDIR_P) '$(DESTDIR)$(htmldir)/$$f'"; \
$(MKDIR_P) "$(DESTDIR)$(htmldir)/$$f" || exit 1; \
- echo " $(INSTALL_DATA) '$$d$$p'/* '$(DESTDIR)$(htmldir)/$$f'"; \
- $(INSTALL_DATA) "$$d$$p"/* "$(DESTDIR)$(htmldir)/$$f" || exit $$?; \
+ echo " $(INSTALL_DATA) '$$d2'/* '$(DESTDIR)$(htmldir)/$$f'"; \
+ $(INSTALL_DATA) "$$d2"/* "$(DESTDIR)$(htmldir)/$$f" || exit $$?; \
else \
- list2="$$list2 $$d$$p"; \
+ list2="$$list2 $$d2"; \
fi; \
done; \
test -z "$$list2" || { echo "$$list2" | $(am__base_list) | \
install-info-am: $(INFO_DEPS)
@$(NORMAL_INSTALL)
- test -z "$(infodir)" || $(MKDIR_P) "$(DESTDIR)$(infodir)"
@srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; \
list='$(INFO_DEPS)'; test -n "$(infodir)" || list=; \
+ if test -n "$$list"; then \
+ echo " $(MKDIR_P) '$(DESTDIR)$(infodir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(infodir)" || exit 1; \
+ fi; \
for file in $$list; do \
case $$file in \
$(srcdir)/*) file=`echo "$$file" | sed "s|^$$srcdirstrip/||"`;; \
echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(infodir)'"; \
$(INSTALL_DATA) $$files "$(DESTDIR)$(infodir)" || exit $$?; done
@$(POST_INSTALL)
- @if (install-info --version && \
- install-info --version 2>&1 | sed 1q | grep -i -v debian) >/dev/null 2>&1; then \
+ @if $(am__can_run_installinfo); then \
list='$(INFO_DEPS)'; test -n "$(infodir)" || list=; \
for file in $$list; do \
relfile=`echo "$$file" | sed 's|^.*/||'`; \
install-pdf-am: $(PDFS)
@$(NORMAL_INSTALL)
- test -z "$(pdfdir)" || $(MKDIR_P) "$(DESTDIR)$(pdfdir)"
@list='$(PDFS)'; test -n "$(pdfdir)" || list=; \
+ if test -n "$$list"; then \
+ echo " $(MKDIR_P) '$(DESTDIR)$(pdfdir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(pdfdir)" || exit 1; \
+ fi; \
for p in $$list; do \
if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
echo "$$d$$p"; \
install-ps-am: $(PSS)
@$(NORMAL_INSTALL)
- test -z "$(psdir)" || $(MKDIR_P) "$(DESTDIR)$(psdir)"
@list='$(PSS)'; test -n "$(psdir)" || list=; \
+ if test -n "$$list"; then \
+ echo " $(MKDIR_P) '$(DESTDIR)$(psdir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(psdir)" || exit 1; \
+ fi; \
for p in $$list; do \
if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
echo "$$d$$p"; \
iii) If HAVE_NATIVE_func is going to be used, then add a #undef to
the AH_VERBATIM([HAVE_NATIVE] block in configure.in.
- iv) Add file.c to nodist_libdummy_la_SOURCES in mpn/Makefile.am (in
- order to get an ansi2knr rule). If the file is only in
- assembler then this step is unnecessary, but do it anyway so as
- not to forget if later a .c version is added.
-
- v) If the function can be provided by a multi-function file, then
- add to the "case" statement in configure.in which lists each
- multi-function filename and what function files it can provide.
+ iv) If the function can be provided by a multi-function file, then
+ add to the "case" statement in configure.in which lists each
+ multi-function filename and what function files it can provide.
** Adding a test program
ltmain.sh comes from libtool. Remove it and run "libtoolize --copy",
or just copy the file by hand.
-ansi2knr.c, ansi2knr.1, install-sh and doc/mdate-sh come from automake
-and can be updated by copying or by removing and running "automake
---add-missing --copy".
-
texinfo.tex can be updated from ftp.gnu.org. Check it still works
with "make gmp.dvi", "make gmp.ps" and "make gmp.pdf".
--disable-shared will make builds go much faster, though of course
shared or shared+static should be tested too.
---enable-mpbsd grabs various bits of mpz, which might need to be
-adjusted if things in those routines are changed. Building mpbsd all
-the time doesn't cost much.
-
--prefix to a dummy directory followed by "make install" will show
what's installed.
used in such cases, for the benefit of K&R compilers with int!=long
and where the difference matters in function calls.
-** K&R support
-
-Function definitions must be in the GNU stylized form to work. See
-the ansi2knr.1 man page (included in the GMP sources).
-
-__GMP_PROTO is used for function prototypes, other ANSI / K&R
-differences are conditionalized in various places.
-
-Proper testing of the K&R support requires a compiler which gives an
-error for ANSI-isms. Configuring with --host=none is a good idea, to
-test all the generic C code.
-
-When using an ANSI compiler, the ansi2knr setups can be partially
-tested with
-
- ./configure am_cv_prog_cc_stdc=no ac_cv_prog_cc_stdc=no
-
-This will test the use of $U and the like in the makefiles, but not
-much else.
-
-Forcing the cache variables can be used with a compiler like HP C
-which is K&R by default but to which configure normally adds ANSI mode
-flags. This then should be a good full K&R test.
-
* Other Notes
** Compatibility
will get this in the mangled name because C++ "sees though" the
typedef mpz_t to the underlying struct.
- Incidentally, this probably means for C++ that our mp.h is not
- compatible with an original BSD mp.h, since we use struct
- __mpz_struct for MINT in ours. Maybe we could change to whatever
- the original did, but it seems unlikely anyone would be using C++
- with mp.h.
-
__gmpn - note that glibc defines some __mpn symbols, old versions of
some mpn routines, which it uses for floating point printfs.
../../gmp/doc/gmp.texi.
This manual describes how to install and use the GNU multiple precision
-arithmetic library, version 5.0.5.
+arithmetic library, version 5.1.3.
Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
-Free Software Foundation, Inc.
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
+2013 Free Software Foundation, Inc.
Permission is granted to copy, distribute and/or modify this
document under the terms of the GNU Free Documentation License, Version
\1f
Indirect:
-gmp.info-1: 991
-gmp.info-2: 299801
+gmp.info-1: 997
+gmp.info-2: 300733
\1f
Tag Table:
(Indirect)
-Node: Top\7f991
-Node: Copying\7f3233
-Node: Introduction to GMP\7f5084
-Node: Installing GMP\7f7795
-Node: Build Options\7f8527
-Node: ABI and ISA\7f24620
-Node: Notes for Package Builds\7f34306
-Node: Notes for Particular Systems\7f37393
-Node: Known Build Problems\7f43950
-Node: Performance optimization\7f47484
-Node: GMP Basics\7f48613
-Node: Headers and Libraries\7f49261
-Node: Nomenclature and Types\7f50685
-Node: Function Classes\7f52681
-Node: Variable Conventions\7f54374
-Node: Parameter Conventions\7f55983
-Node: Memory Management\7f58039
-Node: Reentrancy\7f59167
-Node: Useful Macros and Constants\7f61040
-Node: Compatibility with older versions\7f62038
-Node: Demonstration Programs\7f62999
-Node: Efficiency\7f64864
-Node: Debugging\7f72488
-Node: Profiling\7f79386
-Node: Autoconf\7f83377
-Node: Emacs\7f85156
-Node: Reporting Bugs\7f85762
-Node: Integer Functions\7f88305
-Node: Initializing Integers\7f89081
-Node: Assigning Integers\7f91228
-Node: Simultaneous Integer Init & Assign\7f92815
-Node: Converting Integers\7f94440
-Node: Integer Arithmetic\7f97364
-Node: Integer Division\7f98950
-Node: Integer Exponentiation\7f105260
-Node: Integer Roots\7f106700
-Node: Number Theoretic Functions\7f108374
-Node: Integer Comparisons\7f115063
-Node: Integer Logic and Bit Fiddling\7f116441
-Node: I/O of Integers\7f118988
-Node: Integer Random Numbers\7f121957
-Node: Integer Import and Export\7f124568
-Node: Miscellaneous Integer Functions\7f128578
-Node: Integer Special Functions\7f130438
-Node: Rational Number Functions\7f133525
-Node: Initializing Rationals\7f134718
-Node: Rational Conversions\7f137179
-Node: Rational Arithmetic\7f138911
-Node: Comparing Rationals\7f140215
-Node: Applying Integer Functions\7f141582
-Node: I/O of Rationals\7f143065
-Node: Floating-point Functions\7f145107
-Node: Initializing Floats\7f147992
-Node: Assigning Floats\7f152079
-Node: Simultaneous Float Init & Assign\7f154646
-Node: Converting Floats\7f156174
-Node: Float Arithmetic\7f159424
-Node: Float Comparison\7f161437
-Node: I/O of Floats\7f163018
-Node: Miscellaneous Float Functions\7f165701
-Node: Low-level Functions\7f167643
-Node: Random Number Functions\7f192196
-Node: Random State Initialization\7f193264
-Node: Random State Seeding\7f196123
-Node: Random State Miscellaneous\7f197512
-Node: Formatted Output\7f198154
-Node: Formatted Output Strings\7f198399
-Node: Formatted Output Functions\7f203778
-Node: C++ Formatted Output\7f207853
-Node: Formatted Input\7f210535
-Node: Formatted Input Strings\7f210771
-Node: Formatted Input Functions\7f215423
-Node: C++ Formatted Input\7f218392
-Node: C++ Class Interface\7f220295
-Node: C++ Interface General\7f221296
-Node: C++ Interface Integers\7f224366
-Node: C++ Interface Rationals\7f227711
-Node: C++ Interface Floats\7f231298
-Node: C++ Interface Random Numbers\7f237000
-Node: C++ Interface Limitations\7f239404
-Node: BSD Compatible Functions\7f242224
-Node: Custom Allocation\7f246935
-Node: Language Bindings\7f251295
-Node: Algorithms\7f255250
-Node: Multiplication Algorithms\7f255950
-Node: Basecase Multiplication\7f257039
-Node: Karatsuba Multiplication\7f258947
-Node: Toom 3-Way Multiplication\7f262573
-Node: Toom 4-Way Multiplication\7f268992
-Node: Higher degree Toom'n'half\7f270371
-Node: FFT Multiplication\7f271656
-Node: Other Multiplication\7f276991
-Node: Unbalanced Multiplication\7f279465
-Node: Division Algorithms\7f280253
-Node: Single Limb Division\7f280632
-Node: Basecase Division\7f283522
-Node: Divide and Conquer Division\7f284725
-Node: Block-Wise Barrett Division\7f286794
-Node: Exact Division\7f287446
-Node: Exact Remainder\7f290611
-Node: Small Quotient Division\7f292838
-Node: Greatest Common Divisor Algorithms\7f294436
-Node: Binary GCD\7f294733
-Node: Lehmer's Algorithm\7f297582
-Node: Subquadratic GCD\7f299801
-Node: Extended GCD\7f302258
-Node: Jacobi Symbol\7f303570
-Node: Powering Algorithms\7f304486
-Node: Normal Powering Algorithm\7f304749
-Node: Modular Powering Algorithm\7f305277
-Node: Root Extraction Algorithms\7f306057
-Node: Square Root Algorithm\7f306372
-Node: Nth Root Algorithm\7f308513
-Node: Perfect Square Algorithm\7f309298
-Node: Perfect Power Algorithm\7f311385
-Node: Radix Conversion Algorithms\7f312006
-Node: Binary to Radix\7f312382
-Node: Radix to Binary\7f316312
-Node: Other Algorithms\7f318400
-Node: Prime Testing Algorithm\7f318752
-Node: Factorial Algorithm\7f319936
-Node: Binomial Coefficients Algorithm\7f321339
-Node: Fibonacci Numbers Algorithm\7f322233
-Node: Lucas Numbers Algorithm\7f324707
-Node: Random Number Algorithms\7f325428
-Node: Assembly Coding\7f327549
-Node: Assembly Code Organisation\7f328509
-Node: Assembly Basics\7f329476
-Node: Assembly Carry Propagation\7f330626
-Node: Assembly Cache Handling\7f332457
-Node: Assembly Functional Units\7f334618
-Node: Assembly Floating Point\7f336231
-Node: Assembly SIMD Instructions\7f340009
-Node: Assembly Software Pipelining\7f340991
-Node: Assembly Loop Unrolling\7f342053
-Node: Assembly Writing Guide\7f344268
-Node: Internals\7f347033
-Node: Integer Internals\7f347545
-Node: Rational Internals\7f349801
-Node: Float Internals\7f351039
-Node: Raw Output Internals\7f358453
-Node: C++ Interface Internals\7f359647
-Node: Contributors\7f362933
-Node: References\7f367884
-Node: GNU Free Documentation License\7f373639
-Node: Concept Index\7f398808
-Node: Function Index\7f445058
+Node: Top\7f997
+Node: Copying\7f3183
+Node: Introduction to GMP\7f5034
+Node: Installing GMP\7f7745
+Node: Build Options\7f8477
+Node: ABI and ISA\7f24261
+Node: Notes for Package Builds\7f33832
+Node: Notes for Particular Systems\7f36919
+Node: Known Build Problems\7f43516
+Node: Performance optimization\7f47050
+Node: GMP Basics\7f48179
+Node: Headers and Libraries\7f48827
+Node: Nomenclature and Types\7f50251
+Node: Function Classes\7f52247
+Node: Variable Conventions\7f53781
+Node: Parameter Conventions\7f55390
+Node: Memory Management\7f57446
+Node: Reentrancy\7f58574
+Node: Useful Macros and Constants\7f60447
+Node: Compatibility with older versions\7f61438
+Node: Demonstration Programs\7f62349
+Node: Efficiency\7f64214
+Node: Debugging\7f71838
+Node: Profiling\7f78863
+Node: Autoconf\7f82854
+Node: Emacs\7f84633
+Node: Reporting Bugs\7f85239
+Node: Integer Functions\7f87865
+Node: Initializing Integers\7f88641
+Node: Assigning Integers\7f91017
+Node: Simultaneous Integer Init & Assign\7f92628
+Node: Converting Integers\7f94275
+Node: Integer Arithmetic\7f97239
+Node: Integer Division\7f98975
+Node: Integer Exponentiation\7f105727
+Node: Integer Roots\7f107221
+Node: Number Theoretic Functions\7f108941
+Node: Integer Comparisons\7f116416
+Node: Integer Logic and Bit Fiddling\7f117854
+Node: I/O of Integers\7f120499
+Node: Integer Random Numbers\7f123490
+Node: Integer Import and Export\7f126107
+Node: Miscellaneous Integer Functions\7f130123
+Node: Integer Special Functions\7f132037
+Node: Rational Number Functions\7f135136
+Node: Initializing Rationals\7f136329
+Node: Rational Conversions\7f138808
+Node: Rational Arithmetic\7f140558
+Node: Comparing Rationals\7f141970
+Node: Applying Integer Functions\7f143378
+Node: I/O of Rationals\7f144897
+Node: Floating-point Functions\7f146955
+Node: Initializing Floats\7f149840
+Node: Assigning Floats\7f153933
+Node: Simultaneous Float Init & Assign\7f156524
+Node: Converting Floats\7f158074
+Node: Float Arithmetic\7f161364
+Node: Float Comparison\7f163517
+Node: I/O of Floats\7f165177
+Node: Miscellaneous Float Functions\7f167866
+Node: Low-level Functions\7f169868
+Node: Random Number Functions\7f194385
+Node: Random State Initialization\7f195453
+Node: Random State Seeding\7f198318
+Node: Random State Miscellaneous\7f199723
+Node: Formatted Output\7f200365
+Node: Formatted Output Strings\7f200610
+Node: Formatted Output Functions\7f205989
+Node: C++ Formatted Output\7f210064
+Node: Formatted Input\7f212746
+Node: Formatted Input Strings\7f212982
+Node: Formatted Input Functions\7f217634
+Node: C++ Formatted Input\7f220603
+Node: C++ Class Interface\7f222506
+Node: C++ Interface General\7f223500
+Node: C++ Interface Integers\7f226570
+Node: C++ Interface Rationals\7f230285
+Node: C++ Interface Floats\7f234302
+Node: C++ Interface Random Numbers\7f240306
+Node: C++ Interface Limitations\7f242708
+Node: Custom Allocation\7f245528
+Node: Language Bindings\7f249747
+Node: Algorithms\7f253702
+Node: Multiplication Algorithms\7f254402
+Node: Basecase Multiplication\7f255491
+Node: Karatsuba Multiplication\7f257399
+Node: Toom 3-Way Multiplication\7f261025
+Node: Toom 4-Way Multiplication\7f267444
+Node: Higher degree Toom'n'half\7f268823
+Node: FFT Multiplication\7f270108
+Node: Other Multiplication\7f275443
+Node: Unbalanced Multiplication\7f277917
+Node: Division Algorithms\7f278705
+Node: Single Limb Division\7f279084
+Node: Basecase Division\7f281974
+Node: Divide and Conquer Division\7f283177
+Node: Block-Wise Barrett Division\7f285246
+Node: Exact Division\7f285898
+Node: Exact Remainder\7f289063
+Node: Small Quotient Division\7f291313
+Node: Greatest Common Divisor Algorithms\7f292911
+Node: Binary GCD\7f293208
+Node: Lehmer's Algorithm\7f296057
+Node: Subquadratic GCD\7f298276
+Node: Extended GCD\7f300733
+Node: Jacobi Symbol\7f302045
+Node: Powering Algorithms\7f303060
+Node: Normal Powering Algorithm\7f303323
+Node: Modular Powering Algorithm\7f303851
+Node: Root Extraction Algorithms\7f304633
+Node: Square Root Algorithm\7f304948
+Node: Nth Root Algorithm\7f307089
+Node: Perfect Square Algorithm\7f307874
+Node: Perfect Power Algorithm\7f309961
+Node: Radix Conversion Algorithms\7f310582
+Node: Binary to Radix\7f310958
+Node: Radix to Binary\7f314888
+Node: Other Algorithms\7f316976
+Node: Prime Testing Algorithm\7f317328
+Node: Factorial Algorithm\7f318512
+Node: Binomial Coefficients Algorithm\7f320902
+Node: Fibonacci Numbers Algorithm\7f321796
+Node: Lucas Numbers Algorithm\7f324270
+Node: Random Number Algorithms\7f324991
+Node: Assembly Coding\7f327113
+Node: Assembly Code Organisation\7f328073
+Node: Assembly Basics\7f329040
+Node: Assembly Carry Propagation\7f330190
+Node: Assembly Cache Handling\7f332021
+Node: Assembly Functional Units\7f334182
+Node: Assembly Floating Point\7f335795
+Node: Assembly SIMD Instructions\7f339573
+Node: Assembly Software Pipelining\7f340555
+Node: Assembly Loop Unrolling\7f341617
+Node: Assembly Writing Guide\7f343832
+Node: Internals\7f346597
+Node: Integer Internals\7f347109
+Node: Rational Internals\7f349365
+Node: Float Internals\7f350603
+Node: Raw Output Internals\7f358017
+Node: C++ Interface Internals\7f359211
+Node: Contributors\7f362497
+Node: References\7f368247
+Node: GNU Free Documentation License\7f374002
+Node: Concept Index\7f399171
+Node: Function Index\7f445060
\1f
End Tag Table
../../gmp/doc/gmp.texi.
This manual describes how to install and use the GNU multiple precision
-arithmetic library, version 5.0.5.
+arithmetic library, version 5.1.3.
Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
-Free Software Foundation, Inc.
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
+2013 Free Software Foundation, Inc.
Permission is granted to copy, distribute and/or modify this
document under the terms of the GNU Free Documentation License, Version
******
This manual describes how to install and use the GNU multiple
-precision arithmetic library, version 5.0.5.
+precision arithmetic library, version 5.1.3.
Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
-Free Software Foundation, Inc.
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
+2013 Free Software Foundation, Inc.
Permission is granted to copy, distribute and/or modify this
document under the terms of the GNU Free Documentation License, Version
* Formatted Output:: `printf' style output.
* Formatted Input:: `scanf' style input.
* C++ Class Interface:: Class wrappers around GMP types.
-* BSD Compatible Functions:: All functions found in BSD MP.
* Custom Allocation:: How to customize the internal allocation.
* Language Bindings:: Using GMP from other languages.
* Algorithms:: What happens behind the scenes.
directory. For example
cd /my/build/dir
- /my/sources/gmp-5.0.5/configure
+ /my/sources/gmp-5.1.3/configure
Not all `make' programs have the necessary features (`VPATH') to
support this. In particular, SunOS and Slowaris `make' have bugs
The best idea is always to build GMP for the exact machine type
you intend to run it on.
- The following CPUs have specific support. See `configure.in' for
+ The following CPUs have specific support. See `configure.ac' for
details of what code and compiler options they select.
* Alpha: alpha, alphaev5, alphaev56, alphapca56, alphapca57,
Generic C Build
If some of the assembly code causes problems, or if otherwise
- desired, the generic C code can be selected with CPU `none'. For
- example,
-
- ./configure --host=none-unknown-freebsd3.5
+ desired, the generic C code can be selected with the configure
+ `--disable-assembly'.
Note that this will run quite slowly, but it should be portable
and should at least make it possible to get something running if
Compiling is done with both `CPPFLAGS' and `CFLAGS', but
preprocessing uses just `CPPFLAGS'. This distinction is because
most preprocessors won't accept all the flags the compiler does.
- Preprocessing is done separately in some configure tests, and in
- the `ansi2knr' support for K&R compilers.
+ Preprocessing is done separately in some configure tests.
`CC_FOR_BUILD'
Some build-time programs are compiled and run to generate
to very large operands and can be disabled to save code size if
desired.
-Berkeley MP, `--enable-mpbsd'
- The Berkeley MP compatibility library (`libmp') and header file
- (`mp.h') are built and installed only if `--enable-mpbsd' is used.
- *Note BSD Compatible Functions::.
-
Assertion Checking, `--enable-assert'
This option enables some consistency checking within the library.
This can be of use while debugging, *note Debugging::.
PowerPC 64 (`powerpc64', `powerpc620', `powerpc630', `powerpc970', `power4', `power5')
- `ABI=aix64'
+ `ABI=mode64'
The AIX 64 ABI uses 64-bit limbs and pointers and is the
default on PowerPC 64 `*-*-aix*' systems. Applications must
be compiled with
gcc -maix64
xlc -q64
- `ABI=mode64'
- The `mode64' ABI uses 64-bit limbs and pointers, and is the
- default on 64-bit GNU/Linux, BSD, and Mac OS X/Darwin
- systems. Applications must be compiled with
+ On 64-bit GNU/Linux, BSD, and Mac OS X/Darwin systems, the
+ applications must be compiled with
gcc -m64
chip still in 32-bit mode and using 32-bit calling
conventions. This is the default for systems where the true
64-bit ABI is unavailable. No special compiler options are
- typically needed for applications.
+ typically needed for applications. This ABI is not available
+ under AIX.
`ABI=32'
This is the basic 32-bit PowerPC ABI, with a 32-bit limb. No
special compiler options are needed for applications.
- GMP's speed is greatest for `aix64' and `mode64'. In `ABI=32'
- only the 32-bit ISA is used and this doesn't make full use of a
- 64-bit chip. On a suitable system we could perhaps use more of
- the ISA, but there are no plans to do so.
+ GMP's speed is greatest for the `mode64' ABI, the `mode32' ABI is
+ 2nd best. In `ABI=32' only the 32-bit ISA is used and this
+ doesn't make full use of a 64-bit chip.
Sparc V9 (`sparc64', `sparcv9', `ultrasparc*')
choose the right one for the CPU that will be used. Currently GMP
has no assembly code support for using just the common instruction
subset. To get executables that run on both, the current
- suggestion is to use the generic C code (CPU `none'), possibly
- with appropriate compiler options (like `-mcpu=common' for `gcc').
- CPU `rs6000' (which is not a CPU but a family of workstations) is
- accepted by `config.sub', but is currently equivalent to `none'.
+ suggestion is to use the generic C code (`--disable-assembly'),
+ possibly with appropriate compiler options (like `-mcpu=common' for
+ `gcc'). CPU `rs6000' (which is not a CPU but a family of
+ workstations) is accepted by `config.sub', but is currently
+ equivalent to `--disable-assembly'.
Sparc CPU Types
`sparcv8' or `supersparc' on relevant systems will give a
`-mcmodel=embmedany' (which uses `g4' as a data segment pointer),
and for applications wanting to use those registers for special
purposes. In these cases the only suggestion currently is to
- build GMP with CPU `none' to avoid the assembly code.
+ build GMP with `--disable-assembly' to avoid the assembly code.
SunOS 4
`/usr/bin/m4' lacks various features needed to process `.asm'
`mpf_'. The associated type is `mpf_t'. There are about 60
functions is this class. (*note Floating-point Functions::)
- 4. Functions compatible with Berkeley MP, such as `itom', `madd', and
- `mult'. The associated type is `MINT'. (*note BSD Compatible
- Functions::)
-
- 5. Fast low-level functions that operate on natural numbers. These
+ 4. Fast low-level functions that operate on natural numbers. These
are used by the functions in the preceding groups, and you can
also call them directly from very time-critical user programs.
These functions' names begin with `mpn_'. The associated type is
array of `mp_limb_t'. There are about 30 (hard-to-use) functions
in this class. (*note Low-level Functions::)
- 6. Miscellaneous functions. Functions for setting up custom
+ 5. Miscellaneous functions. Functions for setting up custom
allocation and functions for generating random numbers. (*note
Custom Allocation::, and *note Random Number Functions::)
-- Global Constant: const char * const gmp_version
The GMP version number, as a null-terminated string, in the form
- "i.j.k". This release is "5.0.5". Note that the format "i.j" was
- used when k was zero was used before version 4.3.0.
+ "i.j.k". This release is "5.1.3". Note that the format "i.j" was
+ used, before version 4.3.0, when k was zero.
-- Macro: __GMP_CC
-- Macro: __GMP_CFLAGS
3.9 Compatibility with older versions
=====================================
-This version of GMP is upwardly binary compatible with all 4.x and 3.x
-versions, and upwardly compatible at the source level with all 2.x
-versions, with the following exceptions.
+This version of GMP is upwardly binary compatible with all 5.x, 4.x,
+and 3.x versions, and upwardly compatible at the source level with all
+2.x versions, with the following exceptions.
* `mpn_gcd' had its source arguments swapped as of GMP 3.0, for
consistency with other `mpn' functions.
* `mpf_get_prec' counted precision slightly differently in GMP 3.0
and 3.0.1, but in 3.1 reverted to the 2.x style.
+ * `mpn_bdivmod', documented as preliminary in GMP 4, has been
+ removed.
+
There are a number of compatibility issues between GMP 1 and GMP 2
that of course also apply when porting applications from GMP 1 to GMP
-4. Please see the GMP 2 manual for details.
-
- The Berkeley MP compatibility library (*note BSD Compatible
-Functions::) is source and binary compatible with the standard `libmp'.
+5. Please see the GMP 2 manual for details.
\1f
File: gmp.info, Node: Demonstration Programs, Next: Efficiency, Prev: Compatibility with older versions, Up: GMP Basics
path to the source directory.
cd /my/build/dir
- /my/source/dir/gmp-5.0.5/configure
+ /my/source/dir/gmp-5.1.3/configure
This works via `VPATH', and might require GNU `make'. Alternately
it might be possible to change the `.c.lo' rules appropriately.
benefit from `--enable-assert' since it adds checks on the
parameters of most such functions, many of which have subtle
restrictions on their usage. Note however that only the generic C
- code has checks, not the assembly code, so CPU `none' should be
- used for maximum checking.
+ code has checks, not the assembly code, so `--disable-assembly'
+ should be used for maximum checking.
Temporary Memory Checking
The build option `--enable-alloca=debug' arranges that each block
would be
./configure --disable-shared --enable-assert \
- --enable-alloca=debug --host=none CFLAGS=-g
+ --enable-alloca=debug --disable-assembly CFLAGS=-g
For C++, add `--enable-cxx CXXFLAGS=-g'.
A build of GMP with checking within GMP itself can be made. This
will run very very slowly. On GNU/Linux for example,
- ./configure --host=none-pc-linux-gnu CC=checkergcc
+ ./configure --disable-assembly CC=checkergcc
- `--host=none' must be used, since the GMP assembly code doesn't
- support the checking scheme. The GMP C++ features cannot be used,
- since current versions of checker (0.9.9.1) don't yet support the
- standard C++ library.
+ `--disable-assembly' must be used, since the GMP assembly code
+ doesn't support the checking scheme. The GMP C++ features cannot
+ be used, since current versions of checker (0.9.9.1) don't yet
+ support the standard C++ library.
Valgrind
- The valgrind program (`http://valgrind.org/') is a memory checker
- for x86s. It translates and emulates machine instructions to do
- strong checks for uninitialized data (at the level of individual
- bits), memory accesses through bad pointers, and memory leaks.
+ Valgrind (`http://valgrind.org/') is a memory checker for x86,
+ ARM, MIPS, PowerPC, and S/390. It translates and emulates machine
+ instructions to do strong checks for uninitialized data (at the
+ level of individual bits), memory accesses through bad pointers,
+ and memory leaks.
- Recent versions of Valgrind are getting support for MMX and
- SSE/SSE2 instructions, for past versions GMP will need to be
- configured not to use those, i.e. for an x86 without them (for
- instance plain `i486').
+ Valgrind does not always support every possible instruction, in
+ particular ones recently added to an ISA. Valgrind might
+ therefore be incompatible with a recent GMP or even a less recent
+ GMP which is compiled using a recent GCC.
GMP's assembly code sometimes promotes a read of the limbs to some
larger size, for efficiency. GMP will do this even at the start
- and end of a multilimb operand, using naturaly aligned operations
+ and end of a multilimb operand, using naturally aligned operations
on the larger type. This may lead to benign reads outside of
- allocated areas, triggering complants from Valgrind.
+ allocated areas, triggering complaints from Valgrind. Valgrind's
+ option `--partial-loads-ok=yes' should help.
Other Problems
Any suspected bug in GMP itself should be isolated to make sure
* Please do not send core dumps, executables or `strace's.
- * The configuration options you used when building GMP, if any.
+ * The `configure' options you used when building GMP, if any.
+
+ * The output from `configure', as printed to stdout, with any
+ options used.
* The name of the compiler and its version. For `gcc', get the
version with `gcc -v', otherwise perhaps `what `which cc`', or
necessary; reallocation is handled automatically by GMP when
needed.
- N is only the initial space, X will grow automatically in the
+ While N defines the initial space, X will grow automatically in the
normal way, if necessary, for subsequent values stored.
`mpz_init2' makes it possible to avoid such reallocations if a
maximum size is known in advance.
+ In preparation for an operation, GMP often allocates one limb more
+ than ultimately needed. To make sure GMP will not perform
+ reallocation for X, you need to add the number of bits in
+ `mp_limb_t' to N.
+
-- Function: void mpz_clear (mpz_t X)
Free the space occupied by X. Call this function for all `mpz_t'
variables when you are done with them.
These functions assign new values to already initialized integers
(*note Initializing Integers::).
- -- Function: void mpz_set (mpz_t ROP, mpz_t OP)
+ -- Function: void mpz_set (mpz_t ROP, const mpz_t OP)
-- Function: void mpz_set_ui (mpz_t ROP, unsigned long int OP)
-- Function: void mpz_set_si (mpz_t ROP, signed long int OP)
-- Function: void mpz_set_d (mpz_t ROP, double OP)
- -- Function: void mpz_set_q (mpz_t ROP, mpq_t OP)
- -- Function: void mpz_set_f (mpz_t ROP, mpf_t OP)
+ -- Function: void mpz_set_q (mpz_t ROP, const mpq_t OP)
+ -- Function: void mpz_set_f (mpz_t ROP, const mpf_t OP)
Set the value of ROP from OP.
`mpz_set_d', `mpz_set_q' and `mpz_set_f' truncate OP to make it an
integer.
- -- Function: int mpz_set_str (mpz_t ROP, char *STR, int BASE)
+ -- Function: int mpz_set_str (mpz_t ROP, const char *STR, int BASE)
Set the value of ROP from STR, a null-terminated C string in base
BASE. White space is allowed in the string, and is simply ignored.
ordinary integer functions. Don't use an initialize-and-set function
on a variable already initialized!
- -- Function: void mpz_init_set (mpz_t ROP, mpz_t OP)
+ -- Function: void mpz_init_set (mpz_t ROP, const mpz_t OP)
-- Function: void mpz_init_set_ui (mpz_t ROP, unsigned long int OP)
-- Function: void mpz_init_set_si (mpz_t ROP, signed long int OP)
-- Function: void mpz_init_set_d (mpz_t ROP, double OP)
Initialize ROP with limb space and set the initial numeric value
from OP.
- -- Function: int mpz_init_set_str (mpz_t ROP, char *STR, int BASE)
+ -- Function: int mpz_init_set_str (mpz_t ROP, const char *STR, int
+ BASE)
Initialize ROP and set its value like `mpz_set_str' (see its
documentation above for details).
standard C types. Functions for converting _to_ GMP integers are
described in *note Assigning Integers:: and *note I/O of Integers::.
- -- Function: unsigned long int mpz_get_ui (mpz_t OP)
+ -- Function: unsigned long int mpz_get_ui (const mpz_t OP)
Return the value of OP as an `unsigned long'.
If OP is too big to fit an `unsigned long' then just the least
significant bits that do fit are returned. The sign of OP is
ignored, only the absolute value is used.
- -- Function: signed long int mpz_get_si (mpz_t OP)
+ -- Function: signed long int mpz_get_si (const mpz_t OP)
If OP fits into a `signed long int' return the value of OP.
Otherwise return the least significant part of OP, with the same
sign as OP.
result is probably not very useful. To find out if the value will
fit, use the function `mpz_fits_slong_p'.
- -- Function: double mpz_get_d (mpz_t OP)
+ -- Function: double mpz_get_d (const mpz_t OP)
Convert OP to a `double', truncating if necessary (i.e. rounding
towards zero).
system dependent. An infinity is returned where available. A
hardware overflow trap may or may not occur.
- -- Function: double mpz_get_d_2exp (signed long int *EXP, mpz_t OP)
+ -- Function: double mpz_get_d_2exp (signed long int *EXP, const mpz_t
+ OP)
Convert OP to a `double', truncating if necessary (i.e. rounding
towards zero), and returning the exponent separately.
This is similar to the standard C `frexp' function (*note
Normalization Functions: (libc)Normalization Functions.).
- -- Function: char * mpz_get_str (char *STR, int BASE, mpz_t OP)
+ -- Function: char * mpz_get_str (char *STR, int BASE, const mpz_t OP)
Convert OP to a string of digits in base BASE. The base argument
may vary from 2 to 62 or from -2 to -36.
5.5 Arithmetic Functions
========================
- -- Function: void mpz_add (mpz_t ROP, mpz_t OP1, mpz_t OP2)
- -- Function: void mpz_add_ui (mpz_t ROP, mpz_t OP1, unsigned long int
- OP2)
+ -- Function: void mpz_add (mpz_t ROP, const mpz_t OP1, const mpz_t OP2)
+ -- Function: void mpz_add_ui (mpz_t ROP, const mpz_t OP1, unsigned
+ long int OP2)
Set ROP to OP1 + OP2.
- -- Function: void mpz_sub (mpz_t ROP, mpz_t OP1, mpz_t OP2)
- -- Function: void mpz_sub_ui (mpz_t ROP, mpz_t OP1, unsigned long int
- OP2)
- -- Function: void mpz_ui_sub (mpz_t ROP, unsigned long int OP1, mpz_t
- OP2)
+ -- Function: void mpz_sub (mpz_t ROP, const mpz_t OP1, const mpz_t OP2)
+ -- Function: void mpz_sub_ui (mpz_t ROP, const mpz_t OP1, unsigned
+ long int OP2)
+ -- Function: void mpz_ui_sub (mpz_t ROP, unsigned long int OP1, const
+ mpz_t OP2)
Set ROP to OP1 - OP2.
- -- Function: void mpz_mul (mpz_t ROP, mpz_t OP1, mpz_t OP2)
- -- Function: void mpz_mul_si (mpz_t ROP, mpz_t OP1, long int OP2)
- -- Function: void mpz_mul_ui (mpz_t ROP, mpz_t OP1, unsigned long int
- OP2)
+ -- Function: void mpz_mul (mpz_t ROP, const mpz_t OP1, const mpz_t OP2)
+ -- Function: void mpz_mul_si (mpz_t ROP, const mpz_t OP1, long int OP2)
+ -- Function: void mpz_mul_ui (mpz_t ROP, const mpz_t OP1, unsigned
+ long int OP2)
Set ROP to OP1 times OP2.
- -- Function: void mpz_addmul (mpz_t ROP, mpz_t OP1, mpz_t OP2)
- -- Function: void mpz_addmul_ui (mpz_t ROP, mpz_t OP1, unsigned long
- int OP2)
+ -- Function: void mpz_addmul (mpz_t ROP, const mpz_t OP1, const mpz_t
+ OP2)
+ -- Function: void mpz_addmul_ui (mpz_t ROP, const mpz_t OP1, unsigned
+ long int OP2)
Set ROP to ROP + OP1 times OP2.
- -- Function: void mpz_submul (mpz_t ROP, mpz_t OP1, mpz_t OP2)
- -- Function: void mpz_submul_ui (mpz_t ROP, mpz_t OP1, unsigned long
- int OP2)
+ -- Function: void mpz_submul (mpz_t ROP, const mpz_t OP1, const mpz_t
+ OP2)
+ -- Function: void mpz_submul_ui (mpz_t ROP, const mpz_t OP1, unsigned
+ long int OP2)
Set ROP to ROP - OP1 times OP2.
- -- Function: void mpz_mul_2exp (mpz_t ROP, mpz_t OP1, mp_bitcnt_t OP2)
+ -- Function: void mpz_mul_2exp (mpz_t ROP, const mpz_t OP1,
+ mp_bitcnt_t OP2)
Set ROP to OP1 times 2 raised to OP2. This operation can also be
defined as a left shift by OP2 bits.
- -- Function: void mpz_neg (mpz_t ROP, mpz_t OP)
+ -- Function: void mpz_neg (mpz_t ROP, const mpz_t OP)
Set ROP to -OP.
- -- Function: void mpz_abs (mpz_t ROP, mpz_t OP)
+ -- Function: void mpz_abs (mpz_t ROP, const mpz_t OP)
Set ROP to the absolute value of OP.
\1f
division by zero. This lets a program handle arithmetic exceptions in
these functions the same way as for normal C `int' arithmetic.
- -- Function: void mpz_cdiv_q (mpz_t Q, mpz_t N, mpz_t D)
- -- Function: void mpz_cdiv_r (mpz_t R, mpz_t N, mpz_t D)
- -- Function: void mpz_cdiv_qr (mpz_t Q, mpz_t R, mpz_t N, mpz_t D)
- -- Function: unsigned long int mpz_cdiv_q_ui (mpz_t Q, mpz_t N,
+ -- Function: void mpz_cdiv_q (mpz_t Q, const mpz_t N, const mpz_t D)
+ -- Function: void mpz_cdiv_r (mpz_t R, const mpz_t N, const mpz_t D)
+ -- Function: void mpz_cdiv_qr (mpz_t Q, mpz_t R, const mpz_t N, const
+ mpz_t D)
+ -- Function: unsigned long int mpz_cdiv_q_ui (mpz_t Q, const mpz_t N,
unsigned long int D)
- -- Function: unsigned long int mpz_cdiv_r_ui (mpz_t R, mpz_t N,
+ -- Function: unsigned long int mpz_cdiv_r_ui (mpz_t R, const mpz_t N,
unsigned long int D)
-- Function: unsigned long int mpz_cdiv_qr_ui (mpz_t Q, mpz_t R,
- mpz_t N, unsigned long int D)
- -- Function: unsigned long int mpz_cdiv_ui (mpz_t N,
+ const mpz_t N, unsigned long int D)
+ -- Function: unsigned long int mpz_cdiv_ui (const mpz_t N,
unsigned long int D)
- -- Function: void mpz_cdiv_q_2exp (mpz_t Q, mpz_t N, mp_bitcnt_t B)
- -- Function: void mpz_cdiv_r_2exp (mpz_t R, mpz_t N, mp_bitcnt_t B)
-
- -- Function: void mpz_fdiv_q (mpz_t Q, mpz_t N, mpz_t D)
- -- Function: void mpz_fdiv_r (mpz_t R, mpz_t N, mpz_t D)
- -- Function: void mpz_fdiv_qr (mpz_t Q, mpz_t R, mpz_t N, mpz_t D)
- -- Function: unsigned long int mpz_fdiv_q_ui (mpz_t Q, mpz_t N,
+ -- Function: void mpz_cdiv_q_2exp (mpz_t Q, const mpz_t N,
+ mp_bitcnt_t B)
+ -- Function: void mpz_cdiv_r_2exp (mpz_t R, const mpz_t N,
+ mp_bitcnt_t B)
+
+ -- Function: void mpz_fdiv_q (mpz_t Q, const mpz_t N, const mpz_t D)
+ -- Function: void mpz_fdiv_r (mpz_t R, const mpz_t N, const mpz_t D)
+ -- Function: void mpz_fdiv_qr (mpz_t Q, mpz_t R, const mpz_t N, const
+ mpz_t D)
+ -- Function: unsigned long int mpz_fdiv_q_ui (mpz_t Q, const mpz_t N,
unsigned long int D)
- -- Function: unsigned long int mpz_fdiv_r_ui (mpz_t R, mpz_t N,
+ -- Function: unsigned long int mpz_fdiv_r_ui (mpz_t R, const mpz_t N,
unsigned long int D)
-- Function: unsigned long int mpz_fdiv_qr_ui (mpz_t Q, mpz_t R,
- mpz_t N, unsigned long int D)
- -- Function: unsigned long int mpz_fdiv_ui (mpz_t N,
+ const mpz_t N, unsigned long int D)
+ -- Function: unsigned long int mpz_fdiv_ui (const mpz_t N,
unsigned long int D)
- -- Function: void mpz_fdiv_q_2exp (mpz_t Q, mpz_t N, mp_bitcnt_t B)
- -- Function: void mpz_fdiv_r_2exp (mpz_t R, mpz_t N, mp_bitcnt_t B)
-
- -- Function: void mpz_tdiv_q (mpz_t Q, mpz_t N, mpz_t D)
- -- Function: void mpz_tdiv_r (mpz_t R, mpz_t N, mpz_t D)
- -- Function: void mpz_tdiv_qr (mpz_t Q, mpz_t R, mpz_t N, mpz_t D)
- -- Function: unsigned long int mpz_tdiv_q_ui (mpz_t Q, mpz_t N,
+ -- Function: void mpz_fdiv_q_2exp (mpz_t Q, const mpz_t N,
+ mp_bitcnt_t B)
+ -- Function: void mpz_fdiv_r_2exp (mpz_t R, const mpz_t N,
+ mp_bitcnt_t B)
+
+ -- Function: void mpz_tdiv_q (mpz_t Q, const mpz_t N, const mpz_t D)
+ -- Function: void mpz_tdiv_r (mpz_t R, const mpz_t N, const mpz_t D)
+ -- Function: void mpz_tdiv_qr (mpz_t Q, mpz_t R, const mpz_t N, const
+ mpz_t D)
+ -- Function: unsigned long int mpz_tdiv_q_ui (mpz_t Q, const mpz_t N,
unsigned long int D)
- -- Function: unsigned long int mpz_tdiv_r_ui (mpz_t R, mpz_t N,
+ -- Function: unsigned long int mpz_tdiv_r_ui (mpz_t R, const mpz_t N,
unsigned long int D)
-- Function: unsigned long int mpz_tdiv_qr_ui (mpz_t Q, mpz_t R,
- mpz_t N, unsigned long int D)
- -- Function: unsigned long int mpz_tdiv_ui (mpz_t N,
+ const mpz_t N, unsigned long int D)
+ -- Function: unsigned long int mpz_tdiv_ui (const mpz_t N,
unsigned long int D)
- -- Function: void mpz_tdiv_q_2exp (mpz_t Q, mpz_t N, mp_bitcnt_t B)
- -- Function: void mpz_tdiv_r_2exp (mpz_t R, mpz_t N, mp_bitcnt_t B)
+ -- Function: void mpz_tdiv_q_2exp (mpz_t Q, const mpz_t N,
+ mp_bitcnt_t B)
+ -- Function: void mpz_tdiv_r_2exp (mpz_t R, const mpz_t N,
+ mp_bitcnt_t B)
Divide N by D, forming a quotient Q and/or remainder R. For the
`2exp' functions, D=2^B. The rounding is in three styles, each
the same as the bitwise logical functions do, whereas
`mpz_tdiv_q_2exp' effectively treats N as sign and magnitude.
- -- Function: void mpz_mod (mpz_t R, mpz_t N, mpz_t D)
- -- Function: unsigned long int mpz_mod_ui (mpz_t R, mpz_t N,
+ -- Function: void mpz_mod (mpz_t R, const mpz_t N, const mpz_t D)
+ -- Function: unsigned long int mpz_mod_ui (mpz_t R, const mpz_t N,
unsigned long int D)
Set R to N `mod' D. The sign of the divisor is ignored; the
result is always non-negative.
remainder as well as setting R. See `mpz_fdiv_ui' above if only
the return value is wanted.
- -- Function: void mpz_divexact (mpz_t Q, mpz_t N, mpz_t D)
- -- Function: void mpz_divexact_ui (mpz_t Q, mpz_t N, unsigned long D)
+ -- Function: void mpz_divexact (mpz_t Q, const mpz_t N, const mpz_t D)
+ -- Function: void mpz_divexact_ui (mpz_t Q, const mpz_t N, unsigned
+ long D)
Set Q to N/D. These functions produce correct results only when
it is known in advance that D divides N.
and are the best choice when exact division is known to occur, for
example reducing a rational to lowest terms.
- -- Function: int mpz_divisible_p (mpz_t N, mpz_t D)
- -- Function: int mpz_divisible_ui_p (mpz_t N, unsigned long int D)
- -- Function: int mpz_divisible_2exp_p (mpz_t N, mp_bitcnt_t B)
+ -- Function: int mpz_divisible_p (const mpz_t N, const mpz_t D)
+ -- Function: int mpz_divisible_ui_p (const mpz_t N, unsigned long int
+ D)
+ -- Function: int mpz_divisible_2exp_p (const mpz_t N, mp_bitcnt_t B)
Return non-zero if N is exactly divisible by D, or in the case of
`mpz_divisible_2exp_p' by 2^B.
following the rule it can be seen that only 0 is considered
divisible by 0.
- -- Function: int mpz_congruent_p (mpz_t N, mpz_t C, mpz_t D)
- -- Function: int mpz_congruent_ui_p (mpz_t N, unsigned long int C,
- unsigned long int D)
- -- Function: int mpz_congruent_2exp_p (mpz_t N, mpz_t C, mp_bitcnt_t B)
+ -- Function: int mpz_congruent_p (const mpz_t N, const mpz_t C, const
+ mpz_t D)
+ -- Function: int mpz_congruent_ui_p (const mpz_t N, unsigned long int
+ C, unsigned long int D)
+ -- Function: int mpz_congruent_2exp_p (const mpz_t N, const mpz_t C,
+ mp_bitcnt_t B)
Return non-zero if N is congruent to C modulo D, or in the case of
`mpz_congruent_2exp_p' modulo 2^B.
5.7 Exponentiation Functions
============================
- -- Function: void mpz_powm (mpz_t ROP, mpz_t BASE, mpz_t EXP, mpz_t
- MOD)
- -- Function: void mpz_powm_ui (mpz_t ROP, mpz_t BASE, unsigned long
- int EXP, mpz_t MOD)
+ -- Function: void mpz_powm (mpz_t ROP, const mpz_t BASE, const mpz_t
+ EXP, const mpz_t MOD)
+ -- Function: void mpz_powm_ui (mpz_t ROP, const mpz_t BASE, unsigned
+ long int EXP, const mpz_t MOD)
Set ROP to (BASE raised to EXP) modulo MOD.
Negative EXP is supported if an inverse BASE^-1 mod MOD exists
(see `mpz_invert' in *note Number Theoretic Functions::). If an
inverse doesn't exist then a divide by zero is raised.
- -- Function: void mpz_powm_sec (mpz_t ROP, mpz_t BASE, mpz_t EXP,
- mpz_t MOD)
+ -- Function: void mpz_powm_sec (mpz_t ROP, const mpz_t BASE, const
+ mpz_t EXP, const mpz_t MOD)
Set ROP to (BASE raised to EXP) modulo MOD.
It is required that EXP > 0 and that MOD is odd.
is intended for cryptographic purposes, where resilience to
side-channel attacks is desired.
- -- Function: void mpz_pow_ui (mpz_t ROP, mpz_t BASE, unsigned long int
- EXP)
+ -- Function: void mpz_pow_ui (mpz_t ROP, const mpz_t BASE, unsigned
+ long int EXP)
-- Function: void mpz_ui_pow_ui (mpz_t ROP, unsigned long int BASE,
unsigned long int EXP)
Set ROP to BASE raised to EXP. The case 0^0 yields 1.
5.8 Root Extraction Functions
=============================
- -- Function: int mpz_root (mpz_t ROP, mpz_t OP, unsigned long int N)
+ -- Function: int mpz_root (mpz_t ROP, const mpz_t OP, unsigned long
+ int N)
Set ROP to the truncated integer part of the Nth root of OP.
Return non-zero if the computation was exact, i.e., if OP is ROP
to the Nth power.
- -- Function: void mpz_rootrem (mpz_t ROOT, mpz_t REM, mpz_t U,
+ -- Function: void mpz_rootrem (mpz_t ROOT, mpz_t REM, const mpz_t U,
unsigned long int N)
Set ROOT to the truncated integer part of the Nth root of U. Set
REM to the remainder, U-ROOT**N.
- -- Function: void mpz_sqrt (mpz_t ROP, mpz_t OP)
+ -- Function: void mpz_sqrt (mpz_t ROP, const mpz_t OP)
Set ROP to the truncated integer part of the square root of OP.
- -- Function: void mpz_sqrtrem (mpz_t ROP1, mpz_t ROP2, mpz_t OP)
+ -- Function: void mpz_sqrtrem (mpz_t ROP1, mpz_t ROP2, const mpz_t OP)
Set ROP1 to the truncated integer part of the square root of OP,
like `mpz_sqrt'. Set ROP2 to the remainder OP-ROP1*ROP1, which
will be zero if OP is a perfect square.
If ROP1 and ROP2 are the same variable, the results are undefined.
- -- Function: int mpz_perfect_power_p (mpz_t OP)
+ -- Function: int mpz_perfect_power_p (const mpz_t OP)
Return non-zero if OP is a perfect power, i.e., if there exist
integers A and B, with B>1, such that OP equals A raised to the
power B.
powers. Negative values of OP are accepted, but of course can
only be odd perfect powers.
- -- Function: int mpz_perfect_square_p (mpz_t OP)
+ -- Function: int mpz_perfect_square_p (const mpz_t OP)
Return non-zero if OP is a perfect square, i.e., if the square
root of OP is an integer. Under this definition both 0 and 1 are
considered to be perfect squares.
5.9 Number Theoretic Functions
==============================
- -- Function: int mpz_probab_prime_p (mpz_t N, int REPS)
+ -- Function: int mpz_probab_prime_p (const mpz_t N, int REPS)
Determine whether N is prime. Return 2 if N is definitely prime,
return 1 if N is probably prime (without being certain), or return
0 if N is definitely composite.
This function does some trial divisions, then some Miller-Rabin
- probabilistic primality tests. REPS controls how many such tests
- are done, 5 to 10 is a reasonable number, more will reduce the
- chances of a composite being returned as "probably prime".
+ probabilistic primality tests. The argument REPS controls how
+ many such tests are done; a higher value will reduce the chances
+ of a composite being returned as "probably prime". 25 is a
+ reasonable number; a composite number will then be identified as a
+ prime with a probability of less than 2^(-50).
Miller-Rabin and similar tests can be more properly called
compositeness tests. Numbers which fail are known to be composite
few composites pass, hence those which pass are considered
probably prime.
- -- Function: void mpz_nextprime (mpz_t ROP, mpz_t OP)
+ -- Function: void mpz_nextprime (mpz_t ROP, const mpz_t OP)
Set ROP to the next prime greater than OP.
This function uses a probabilistic algorithm to identify primes.
For practical purposes it's adequate, the chance of a composite
passing will be extremely small.
- -- Function: void mpz_gcd (mpz_t ROP, mpz_t OP1, mpz_t OP2)
+ -- Function: void mpz_gcd (mpz_t ROP, const mpz_t OP1, const mpz_t OP2)
Set ROP to the greatest common divisor of OP1 and OP2. The result
is always positive even if one or both input operands are negative.
Except if both inputs are zero; then this function defines
gcd(0,0) = 0.
- -- Function: unsigned long int mpz_gcd_ui (mpz_t ROP, mpz_t OP1,
+ -- Function: unsigned long int mpz_gcd_ui (mpz_t ROP, const mpz_t OP1,
unsigned long int OP2)
Compute the greatest common divisor of OP1 and OP2. If ROP is not
`NULL', store the result there.
result is equal to the argument OP1. Note that the result will
always fit if OP2 is non-zero.
- -- Function: void mpz_gcdext (mpz_t G, mpz_t S, mpz_t T, mpz_t A,
- mpz_t B)
+ -- Function: void mpz_gcdext (mpz_t G, mpz_t S, mpz_t T, const mpz_t
+ A, const mpz_t B)
Set G to the greatest common divisor of A and B, and in addition
set S and T to coefficients satisfying A*S + B*T = G. The value
in G is always positive, even if one or both of A and B are
If T is `NULL' then that value is not computed.
- -- Function: void mpz_lcm (mpz_t ROP, mpz_t OP1, mpz_t OP2)
- -- Function: void mpz_lcm_ui (mpz_t ROP, mpz_t OP1, unsigned long OP2)
+ -- Function: void mpz_lcm (mpz_t ROP, const mpz_t OP1, const mpz_t OP2)
+ -- Function: void mpz_lcm_ui (mpz_t ROP, const mpz_t OP1, unsigned
+ long OP2)
Set ROP to the least common multiple of OP1 and OP2. ROP is
always positive, irrespective of the signs of OP1 and OP2. ROP
will be zero if either OP1 or OP2 is zero.
- -- Function: int mpz_invert (mpz_t ROP, mpz_t OP1, mpz_t OP2)
+ -- Function: int mpz_invert (mpz_t ROP, const mpz_t OP1, const mpz_t
+ OP2)
Compute the inverse of OP1 modulo OP2 and put the result in ROP.
If the inverse exists, the return value is non-zero and ROP will
satisfy 0 < ROP < abs(OP2). If an inverse doesn't exist the
return value is zero and ROP is undefined. The behaviour of this
function is undefined when OP2 is zero.
- -- Function: int mpz_jacobi (mpz_t A, mpz_t B)
+ -- Function: int mpz_jacobi (const mpz_t A, const mpz_t B)
Calculate the Jacobi symbol (A/B). This is defined only for B odd.
- -- Function: int mpz_legendre (mpz_t A, mpz_t P)
+ -- Function: int mpz_legendre (const mpz_t A, const mpz_t P)
Calculate the Legendre symbol (A/P). This is defined only for P
an odd positive prime, and for such P it's identical to the Jacobi
symbol.
- -- Function: int mpz_kronecker (mpz_t A, mpz_t B)
- -- Function: int mpz_kronecker_si (mpz_t A, long B)
- -- Function: int mpz_kronecker_ui (mpz_t A, unsigned long B)
- -- Function: int mpz_si_kronecker (long A, mpz_t B)
- -- Function: int mpz_ui_kronecker (unsigned long A, mpz_t B)
+ -- Function: int mpz_kronecker (const mpz_t A, const mpz_t B)
+ -- Function: int mpz_kronecker_si (const mpz_t A, long B)
+ -- Function: int mpz_kronecker_ui (const mpz_t A, unsigned long B)
+ -- Function: int mpz_si_kronecker (long A, const mpz_t B)
+ -- Function: int mpz_ui_kronecker (unsigned long A, const mpz_t B)
Calculate the Jacobi symbol (A/B) with the Kronecker extension
(a/2)=(2/a) when a odd, or (a/2)=0 when a even.
References::), or any number theory textbook. See also the
example program `demos/qcn.c' which uses `mpz_kronecker_ui'.
- -- Function: mp_bitcnt_t mpz_remove (mpz_t ROP, mpz_t OP, mpz_t F)
+ -- Function: mp_bitcnt_t mpz_remove (mpz_t ROP, const mpz_t OP, const
+ mpz_t F)
Remove all occurrences of the factor F from OP and store the
result in ROP. The return value is how many such occurrences were
removed.
- -- Function: void mpz_fac_ui (mpz_t ROP, unsigned long int OP)
- Set ROP to OP!, the factorial of OP.
+ -- Function: void mpz_fac_ui (mpz_t ROP, unsigned long int N)
+ -- Function: void mpz_2fac_ui (mpz_t ROP, unsigned long int N)
+ -- Function: void mpz_mfac_uiui (mpz_t ROP, unsigned long int N,
+ unsigned long int M)
+ Set ROP to the factorial of N: `mpz_fac_ui' computes the plain
+ factorial N!, `mpz_2fac_ui' computes the double-factorial N!!, and
+ `mpz_mfac_uiui' the M-multi-factorial N!^(M).
- -- Function: void mpz_bin_ui (mpz_t ROP, mpz_t N, unsigned long int K)
+ -- Function: void mpz_primorial_ui (mpz_t ROP, unsigned long int N)
+ Set ROP to the primorial of N, i.e. the product of all positive
+ prime numbers <=N.
+
+ -- Function: void mpz_bin_ui (mpz_t ROP, const mpz_t N, unsigned long
+ int K)
-- Function: void mpz_bin_uiui (mpz_t ROP, unsigned long int N,
unsigned long int K)
Compute the binomial coefficient N over K and store the result in
5.10 Comparison Functions
=========================
- -- Function: int mpz_cmp (mpz_t OP1, mpz_t OP2)
- -- Function: int mpz_cmp_d (mpz_t OP1, double OP2)
- -- Macro: int mpz_cmp_si (mpz_t OP1, signed long int OP2)
- -- Macro: int mpz_cmp_ui (mpz_t OP1, unsigned long int OP2)
+ -- Function: int mpz_cmp (const mpz_t OP1, const mpz_t OP2)
+ -- Function: int mpz_cmp_d (const mpz_t OP1, double OP2)
+ -- Macro: int mpz_cmp_si (const mpz_t OP1, signed long int OP2)
+ -- Macro: int mpz_cmp_ui (const mpz_t OP1, unsigned long int OP2)
Compare OP1 and OP2. Return a positive value if OP1 > OP2, zero
if OP1 = OP2, or a negative value if OP1 < OP2.
arguments more than once. `mpz_cmp_d' can be called with an
infinity, but results are undefined for a NaN.
- -- Function: int mpz_cmpabs (mpz_t OP1, mpz_t OP2)
- -- Function: int mpz_cmpabs_d (mpz_t OP1, double OP2)
- -- Function: int mpz_cmpabs_ui (mpz_t OP1, unsigned long int OP2)
+ -- Function: int mpz_cmpabs (const mpz_t OP1, const mpz_t OP2)
+ -- Function: int mpz_cmpabs_d (const mpz_t OP1, double OP2)
+ -- Function: int mpz_cmpabs_ui (const mpz_t OP1, unsigned long int OP2)
Compare the absolute values of OP1 and OP2. Return a positive
value if abs(OP1) > abs(OP2), zero if abs(OP1) = abs(OP2), or a
negative value if abs(OP1) < abs(OP2).
`mpz_cmpabs_d' can be called with an infinity, but results are
undefined for a NaN.
- -- Macro: int mpz_sgn (mpz_t OP)
+ -- Macro: int mpz_sgn (const mpz_t OP)
Return +1 if OP > 0, 0 if OP = 0, and -1 if OP < 0.
This function is actually implemented as a macro. It evaluates
(although sign-magnitude is the actual implementation). The least
significant bit is number 0.
- -- Function: void mpz_and (mpz_t ROP, mpz_t OP1, mpz_t OP2)
+ -- Function: void mpz_and (mpz_t ROP, const mpz_t OP1, const mpz_t OP2)
Set ROP to OP1 bitwise-and OP2.
- -- Function: void mpz_ior (mpz_t ROP, mpz_t OP1, mpz_t OP2)
+ -- Function: void mpz_ior (mpz_t ROP, const mpz_t OP1, const mpz_t OP2)
Set ROP to OP1 bitwise inclusive-or OP2.
- -- Function: void mpz_xor (mpz_t ROP, mpz_t OP1, mpz_t OP2)
+ -- Function: void mpz_xor (mpz_t ROP, const mpz_t OP1, const mpz_t OP2)
Set ROP to OP1 bitwise exclusive-or OP2.
- -- Function: void mpz_com (mpz_t ROP, mpz_t OP)
+ -- Function: void mpz_com (mpz_t ROP, const mpz_t OP)
Set ROP to the one's complement of OP.
- -- Function: mp_bitcnt_t mpz_popcount (mpz_t OP)
+ -- Function: mp_bitcnt_t mpz_popcount (const mpz_t OP)
If OP>=0, return the population count of OP, which is the number
of 1 bits in the binary representation. If OP<0, the number of 1s
is infinite, and the return value is the largest possible
`mp_bitcnt_t'.
- -- Function: mp_bitcnt_t mpz_hamdist (mpz_t OP1, mpz_t OP2)
+ -- Function: mp_bitcnt_t mpz_hamdist (const mpz_t OP1, const mpz_t OP2)
If OP1 and OP2 are both >=0 or both <0, return the hamming
distance between the two operands, which is the number of bit
positions where OP1 and OP2 have different bit values. If one
is infinite, and the return value is the largest possible
`mp_bitcnt_t'.
- -- Function: mp_bitcnt_t mpz_scan0 (mpz_t OP, mp_bitcnt_t STARTING_BIT)
- -- Function: mp_bitcnt_t mpz_scan1 (mpz_t OP, mp_bitcnt_t STARTING_BIT)
+ -- Function: mp_bitcnt_t mpz_scan0 (const mpz_t OP, mp_bitcnt_t
+ STARTING_BIT)
+ -- Function: mp_bitcnt_t mpz_scan1 (const mpz_t OP, mp_bitcnt_t
+ STARTING_BIT)
Scan OP, starting from bit STARTING_BIT, towards more significant
bits, until the first 0 or 1 bit (respectively) is found. Return
the index of the found bit.
-- Function: void mpz_combit (mpz_t ROP, mp_bitcnt_t BIT_INDEX)
Complement bit BIT_INDEX in ROP.
- -- Function: int mpz_tstbit (mpz_t OP, mp_bitcnt_t BIT_INDEX)
+ -- Function: int mpz_tstbit (const mpz_t OP, mp_bitcnt_t BIT_INDEX)
Test bit BIT_INDEX in OP and return 0 or 1 accordingly.
\1f
See also *note Formatted Output:: and *note Formatted Input::.
- -- Function: size_t mpz_out_str (FILE *STREAM, int BASE, mpz_t OP)
+ -- Function: size_t mpz_out_str (FILE *STREAM, int BASE, const mpz_t
+ OP)
Output OP on stdio stream STREAM, as a string of digits in base
BASE. The base argument may vary from 2 to 62 or from -2 to -36.
Return the number of bytes read, or if an error occurred, return 0.
- -- Function: size_t mpz_out_raw (FILE *STREAM, mpz_t OP)
+ -- Function: size_t mpz_out_raw (FILE *STREAM, const mpz_t OP)
Output OP on stdio stream STREAM, in raw binary format. The
integer is written in a portable format, with 4 bytes of size
information, and that many bytes of limbs. Both the size and the
before invoking this function.
-- Function: void mpz_urandomm (mpz_t ROP, gmp_randstate_t STATE,
- mpz_t N)
+ const mpz_t N)
Generate a uniform random integer in the range 0 to N-1, inclusive.
The variable STATE must be initialized by calling one of the
instance `8*sizeof(int)-INT_BIT'.
-- Function: void * mpz_export (void *ROP, size_t *COUNTP, int ORDER,
- size_t SIZE, int ENDIAN, size_t NAILS, mpz_t OP)
+ size_t SIZE, int ENDIAN, size_t NAILS, const mpz_t OP)
Fill ROP with word data from OP.
The parameters specify the format of the data produced. Each word
5.15 Miscellaneous Functions
============================
- -- Function: int mpz_fits_ulong_p (mpz_t OP)
- -- Function: int mpz_fits_slong_p (mpz_t OP)
- -- Function: int mpz_fits_uint_p (mpz_t OP)
- -- Function: int mpz_fits_sint_p (mpz_t OP)
- -- Function: int mpz_fits_ushort_p (mpz_t OP)
- -- Function: int mpz_fits_sshort_p (mpz_t OP)
+ -- Function: int mpz_fits_ulong_p (const mpz_t OP)
+ -- Function: int mpz_fits_slong_p (const mpz_t OP)
+ -- Function: int mpz_fits_uint_p (const mpz_t OP)
+ -- Function: int mpz_fits_sint_p (const mpz_t OP)
+ -- Function: int mpz_fits_ushort_p (const mpz_t OP)
+ -- Function: int mpz_fits_sshort_p (const mpz_t OP)
Return non-zero iff the value of OP fits in an `unsigned long int',
`signed long int', `unsigned int', `signed int', `unsigned short
int', or `signed short int', respectively. Otherwise, return zero.
- -- Macro: int mpz_odd_p (mpz_t OP)
- -- Macro: int mpz_even_p (mpz_t OP)
+ -- Macro: int mpz_odd_p (const mpz_t OP)
+ -- Macro: int mpz_even_p (const mpz_t OP)
Determine whether OP is odd or even, respectively. Return
non-zero if yes, zero if no. These macros evaluate their argument
more than once.
- -- Function: size_t mpz_sizeinbase (mpz_t OP, int BASE)
+ -- Function: size_t mpz_sizeinbase (const mpz_t OP, int BASE)
Return the size of OP measured in number of digits in the given
BASE. BASE can vary from 2 to 62. The sign of OP is ignored,
just the absolute value is used. The result will be either exact
changes like this. `mpz_realloc2' and `_mpz_realloc' are the same
except that `_mpz_realloc' takes its size in limbs.
- -- Function: mp_limb_t mpz_getlimbn (mpz_t OP, mp_size_t N)
+ -- Function: mp_limb_t mpz_getlimbn (const mpz_t OP, mp_size_t N)
Return limb number N from OP. The sign of OP is ignored, just the
absolute value is used. The least significant limb is number 0.
`mpz_getlimbn' returns zero if N is outside the range 0 to
`mpz_size(OP)-1'.
- -- Function: size_t mpz_size (mpz_t OP)
+ -- Function: size_t mpz_size (const mpz_t OP)
Return the size of OP measured in number of limbs. If OP is zero,
the returned value will be zero.
Free the space occupied by a NULL-terminated list of `mpq_t'
variables.
- -- Function: void mpq_set (mpq_t ROP, mpq_t OP)
- -- Function: void mpq_set_z (mpq_t ROP, mpz_t OP)
+ -- Function: void mpq_set (mpq_t ROP, const mpq_t OP)
+ -- Function: void mpq_set_z (mpq_t ROP, const mpz_t OP)
Assign ROP from OP.
-- Function: void mpq_set_ui (mpq_t ROP, unsigned long int OP1,
common factors, ROP has to be passed to `mpq_canonicalize' before
any operations are performed on ROP.
- -- Function: int mpq_set_str (mpq_t ROP, char *STR, int BASE)
+ -- Function: int mpq_set_str (mpq_t ROP, const char *STR, int BASE)
Set ROP from a null-terminated string STR in the given BASE.
The string can be an integer like "41" or a fraction like
6.2 Conversion Functions
========================
- -- Function: double mpq_get_d (mpq_t OP)
+ -- Function: double mpq_get_d (const mpq_t OP)
Convert OP to a `double', truncating if necessary (i.e. rounding
towards zero).
may or may not occur.
-- Function: void mpq_set_d (mpq_t ROP, double OP)
- -- Function: void mpq_set_f (mpq_t ROP, mpf_t OP)
+ -- Function: void mpq_set_f (mpq_t ROP, const mpf_t OP)
Set ROP to the value of OP. There is no rounding, this conversion
is exact.
- -- Function: char * mpq_get_str (char *STR, int BASE, mpq_t OP)
+ -- Function: char * mpq_get_str (char *STR, int BASE, const mpq_t OP)
Convert OP to a string of digits in base BASE. The base may vary
from 2 to 36. The string will be of the form `num/den', or if the
denominator is 1 then just `num'.
6.3 Arithmetic Functions
========================
- -- Function: void mpq_add (mpq_t SUM, mpq_t ADDEND1, mpq_t ADDEND2)
+ -- Function: void mpq_add (mpq_t SUM, const mpq_t ADDEND1, const mpq_t
+ ADDEND2)
Set SUM to ADDEND1 + ADDEND2.
- -- Function: void mpq_sub (mpq_t DIFFERENCE, mpq_t MINUEND, mpq_t
- SUBTRAHEND)
+ -- Function: void mpq_sub (mpq_t DIFFERENCE, const mpq_t MINUEND,
+ const mpq_t SUBTRAHEND)
Set DIFFERENCE to MINUEND - SUBTRAHEND.
- -- Function: void mpq_mul (mpq_t PRODUCT, mpq_t MULTIPLIER, mpq_t
- MULTIPLICAND)
+ -- Function: void mpq_mul (mpq_t PRODUCT, const mpq_t MULTIPLIER,
+ const mpq_t MULTIPLICAND)
Set PRODUCT to MULTIPLIER times MULTIPLICAND.
- -- Function: void mpq_mul_2exp (mpq_t ROP, mpq_t OP1, mp_bitcnt_t OP2)
+ -- Function: void mpq_mul_2exp (mpq_t ROP, const mpq_t OP1,
+ mp_bitcnt_t OP2)
Set ROP to OP1 times 2 raised to OP2.
- -- Function: void mpq_div (mpq_t QUOTIENT, mpq_t DIVIDEND, mpq_t
- DIVISOR)
+ -- Function: void mpq_div (mpq_t QUOTIENT, const mpq_t DIVIDEND, const
+ mpq_t DIVISOR)
Set QUOTIENT to DIVIDEND/DIVISOR.
- -- Function: void mpq_div_2exp (mpq_t ROP, mpq_t OP1, mp_bitcnt_t OP2)
+ -- Function: void mpq_div_2exp (mpq_t ROP, const mpq_t OP1,
+ mp_bitcnt_t OP2)
Set ROP to OP1 divided by 2 raised to OP2.
- -- Function: void mpq_neg (mpq_t NEGATED_OPERAND, mpq_t OPERAND)
+ -- Function: void mpq_neg (mpq_t NEGATED_OPERAND, const mpq_t OPERAND)
Set NEGATED_OPERAND to -OPERAND.
- -- Function: void mpq_abs (mpq_t ROP, mpq_t OP)
+ -- Function: void mpq_abs (mpq_t ROP, const mpq_t OP)
Set ROP to the absolute value of OP.
- -- Function: void mpq_inv (mpq_t INVERTED_NUMBER, mpq_t NUMBER)
+ -- Function: void mpq_inv (mpq_t INVERTED_NUMBER, const mpq_t NUMBER)
Set INVERTED_NUMBER to 1/NUMBER. If the new denominator is zero,
this routine will divide by zero.
6.4 Comparison Functions
========================
- -- Function: int mpq_cmp (mpq_t OP1, mpq_t OP2)
+ -- Function: int mpq_cmp (const mpq_t OP1, const mpq_t OP2)
Compare OP1 and OP2. Return a positive value if OP1 > OP2, zero
if OP1 = OP2, and a negative value if OP1 < OP2.
To determine if two rationals are equal, `mpq_equal' is faster than
`mpq_cmp'.
- -- Macro: int mpq_cmp_ui (mpq_t OP1, unsigned long int NUM2, unsigned
+ -- Macro: int mpq_cmp_ui (const mpq_t OP1, unsigned long int NUM2,
+ unsigned long int DEN2)
+ -- Macro: int mpq_cmp_si (const mpq_t OP1, long int NUM2, unsigned
long int DEN2)
- -- Macro: int mpq_cmp_si (mpq_t OP1, long int NUM2, unsigned long int
- DEN2)
Compare OP1 and NUM2/DEN2. Return a positive value if OP1 >
NUM2/DEN2, zero if OP1 = NUM2/DEN2, and a negative value if OP1 <
NUM2/DEN2.
These functions are implemented as a macros and evaluate their
arguments multiple times.
- -- Macro: int mpq_sgn (mpq_t OP)
+ -- Macro: int mpq_sgn (const mpq_t OP)
Return +1 if OP > 0, 0 if OP = 0, and -1 if OP < 0.
This function is actually implemented as a macro. It evaluates its
- arguments multiple times.
+ argument multiple times.
- -- Function: int mpq_equal (mpq_t OP1, mpq_t OP2)
+ -- Function: int mpq_equal (const mpq_t OP1, const mpq_t OP2)
Return non-zero if OP1 and OP2 are equal, zero if they are
non-equal. Although `mpq_cmp' can be used for the same purpose,
this function is much faster.
`mpq_canonicalize' must be called before any other `mpq' functions are
applied to that `mpq_t'.
- -- Macro: mpz_t mpq_numref (mpq_t OP)
- -- Macro: mpz_t mpq_denref (mpq_t OP)
+ -- Macro: mpz_t mpq_numref (const mpq_t OP)
+ -- Macro: mpz_t mpq_denref (const mpq_t OP)
Return a reference to the numerator and denominator of OP,
respectively. The `mpz' functions can be used on the result of
these macros.
- -- Function: void mpq_get_num (mpz_t NUMERATOR, mpq_t RATIONAL)
- -- Function: void mpq_get_den (mpz_t DENOMINATOR, mpq_t RATIONAL)
- -- Function: void mpq_set_num (mpq_t RATIONAL, mpz_t NUMERATOR)
- -- Function: void mpq_set_den (mpq_t RATIONAL, mpz_t DENOMINATOR)
+ -- Function: void mpq_get_num (mpz_t NUMERATOR, const mpq_t RATIONAL)
+ -- Function: void mpq_get_den (mpz_t DENOMINATOR, const mpq_t RATIONAL)
+ -- Function: void mpq_set_num (mpq_t RATIONAL, const mpz_t NUMERATOR)
+ -- Function: void mpq_set_den (mpq_t RATIONAL, const mpz_t DENOMINATOR)
Get or set the numerator or denominator of a rational. These
functions are equivalent to calling `mpz_set' with an appropriate
`mpq_numref' or `mpq_denref'. Direct use of `mpq_numref' or
See also *note Formatted Output:: and *note Formatted Input::.
- -- Function: size_t mpq_out_str (FILE *STREAM, int BASE, mpq_t OP)
+ -- Function: size_t mpq_out_str (FILE *STREAM, int BASE, const mpq_t
+ OP)
Output OP on stdio stream STREAM, as a string of digits in base
BASE. The base may vary from 2 to 36. Output is in the form
`num/den' or if the denominator is 1 then just `num'.
the computation precision closely match the actual accurate part of the
numbers.
- -- Function: mp_bitcnt_t mpf_get_prec (mpf_t OP)
+ -- Function: mp_bitcnt_t mpf_get_prec (const mpf_t OP)
Return the current precision of OP, in bits.
-- Function: void mpf_set_prec (mpf_t ROP, mp_bitcnt_t PREC)
These functions assign new values to already initialized floats (*note
Initializing Floats::).
- -- Function: void mpf_set (mpf_t ROP, mpf_t OP)
+ -- Function: void mpf_set (mpf_t ROP, const mpf_t OP)
-- Function: void mpf_set_ui (mpf_t ROP, unsigned long int OP)
-- Function: void mpf_set_si (mpf_t ROP, signed long int OP)
-- Function: void mpf_set_d (mpf_t ROP, double OP)
- -- Function: void mpf_set_z (mpf_t ROP, mpz_t OP)
- -- Function: void mpf_set_q (mpf_t ROP, mpq_t OP)
+ -- Function: void mpf_set_z (mpf_t ROP, const mpz_t OP)
+ -- Function: void mpf_set_q (mpf_t ROP, const mpq_t OP)
Set the value of ROP from OP.
- -- Function: int mpf_set_str (mpf_t ROP, char *STR, int BASE)
+ -- Function: int mpf_set_str (mpf_t ROP, const char *STR, int BASE)
Set the value of ROP from the string in STR. The string is of the
form `M@N' or, if the base is 10 or less, alternatively `MeN'.
`M' is the mantissa and `N' is the exponent. The mantissa is
ordinary float functions. Don't use an initialize-and-set function on
a variable already initialized!
- -- Function: void mpf_init_set (mpf_t ROP, mpf_t OP)
+ -- Function: void mpf_init_set (mpf_t ROP, const mpf_t OP)
-- Function: void mpf_init_set_ui (mpf_t ROP, unsigned long int OP)
-- Function: void mpf_init_set_si (mpf_t ROP, signed long int OP)
-- Function: void mpf_init_set_d (mpf_t ROP, double OP)
The precision of ROP will be taken from the active default
precision, as set by `mpf_set_default_prec'.
- -- Function: int mpf_init_set_str (mpf_t ROP, char *STR, int BASE)
+ -- Function: int mpf_init_set_str (mpf_t ROP, const char *STR, int
+ BASE)
Initialize ROP and set its value from the string in STR. See
`mpf_set_str' above for details on the assignment operation.
7.4 Conversion Functions
========================
- -- Function: double mpf_get_d (mpf_t OP)
+ -- Function: double mpf_get_d (const mpf_t OP)
Convert OP to a `double', truncating if necessary (i.e. rounding
towards zero).
returned when available. For too small 0.0 is normally returned.
Hardware overflow, underflow and denorm traps may or may not occur.
- -- Function: double mpf_get_d_2exp (signed long int *EXP, mpf_t OP)
+ -- Function: double mpf_get_d_2exp (signed long int *EXP, const mpf_t
+ OP)
Convert OP to a `double', truncating if necessary (i.e. rounding
towards zero), and with an exponent returned separately.
This is similar to the standard C `frexp' function (*note
Normalization Functions: (libc)Normalization Functions.).
- -- Function: long mpf_get_si (mpf_t OP)
- -- Function: unsigned long mpf_get_ui (mpf_t OP)
+ -- Function: long mpf_get_si (const mpf_t OP)
+ -- Function: unsigned long mpf_get_ui (const mpf_t OP)
Convert OP to a `long' or `unsigned long', truncating any fraction
part. If OP is too big for the return type, the result is
undefined.
Miscellaneous Float Functions::).
-- Function: char * mpf_get_str (char *STR, mp_exp_t *EXPPTR, int
- BASE, size_t N_DIGITS, mpf_t OP)
+ BASE, size_t N_DIGITS, const mpf_t OP)
Convert OP to a string of digits in base BASE. The base argument
may vary from 2 to 62 or from -2 to -36. Up to N_DIGITS digits
will be generated. Trailing zeros are not returned. No more
7.5 Arithmetic Functions
========================
- -- Function: void mpf_add (mpf_t ROP, mpf_t OP1, mpf_t OP2)
- -- Function: void mpf_add_ui (mpf_t ROP, mpf_t OP1, unsigned long int
- OP2)
+ -- Function: void mpf_add (mpf_t ROP, const mpf_t OP1, const mpf_t OP2)
+ -- Function: void mpf_add_ui (mpf_t ROP, const mpf_t OP1, unsigned
+ long int OP2)
Set ROP to OP1 + OP2.
- -- Function: void mpf_sub (mpf_t ROP, mpf_t OP1, mpf_t OP2)
- -- Function: void mpf_ui_sub (mpf_t ROP, unsigned long int OP1, mpf_t
- OP2)
- -- Function: void mpf_sub_ui (mpf_t ROP, mpf_t OP1, unsigned long int
- OP2)
+ -- Function: void mpf_sub (mpf_t ROP, const mpf_t OP1, const mpf_t OP2)
+ -- Function: void mpf_ui_sub (mpf_t ROP, unsigned long int OP1, const
+ mpf_t OP2)
+ -- Function: void mpf_sub_ui (mpf_t ROP, const mpf_t OP1, unsigned
+ long int OP2)
Set ROP to OP1 - OP2.
- -- Function: void mpf_mul (mpf_t ROP, mpf_t OP1, mpf_t OP2)
- -- Function: void mpf_mul_ui (mpf_t ROP, mpf_t OP1, unsigned long int
- OP2)
+ -- Function: void mpf_mul (mpf_t ROP, const mpf_t OP1, const mpf_t OP2)
+ -- Function: void mpf_mul_ui (mpf_t ROP, const mpf_t OP1, unsigned
+ long int OP2)
Set ROP to OP1 times OP2.
Division is undefined if the divisor is zero, and passing a zero
divide by zero. This lets the user handle arithmetic exceptions in
these functions in the same manner as other arithmetic exceptions.
- -- Function: void mpf_div (mpf_t ROP, mpf_t OP1, mpf_t OP2)
- -- Function: void mpf_ui_div (mpf_t ROP, unsigned long int OP1, mpf_t
- OP2)
- -- Function: void mpf_div_ui (mpf_t ROP, mpf_t OP1, unsigned long int
- OP2)
+ -- Function: void mpf_div (mpf_t ROP, const mpf_t OP1, const mpf_t OP2)
+ -- Function: void mpf_ui_div (mpf_t ROP, unsigned long int OP1, const
+ mpf_t OP2)
+ -- Function: void mpf_div_ui (mpf_t ROP, const mpf_t OP1, unsigned
+ long int OP2)
Set ROP to OP1/OP2.
- -- Function: void mpf_sqrt (mpf_t ROP, mpf_t OP)
+ -- Function: void mpf_sqrt (mpf_t ROP, const mpf_t OP)
-- Function: void mpf_sqrt_ui (mpf_t ROP, unsigned long int OP)
Set ROP to the square root of OP.
- -- Function: void mpf_pow_ui (mpf_t ROP, mpf_t OP1, unsigned long int
- OP2)
+ -- Function: void mpf_pow_ui (mpf_t ROP, const mpf_t OP1, unsigned
+ long int OP2)
Set ROP to OP1 raised to the power OP2.
- -- Function: void mpf_neg (mpf_t ROP, mpf_t OP)
+ -- Function: void mpf_neg (mpf_t ROP, const mpf_t OP)
Set ROP to -OP.
- -- Function: void mpf_abs (mpf_t ROP, mpf_t OP)
+ -- Function: void mpf_abs (mpf_t ROP, const mpf_t OP)
Set ROP to the absolute value of OP.
- -- Function: void mpf_mul_2exp (mpf_t ROP, mpf_t OP1, mp_bitcnt_t OP2)
+ -- Function: void mpf_mul_2exp (mpf_t ROP, const mpf_t OP1,
+ mp_bitcnt_t OP2)
Set ROP to OP1 times 2 raised to OP2.
- -- Function: void mpf_div_2exp (mpf_t ROP, mpf_t OP1, mp_bitcnt_t OP2)
+ -- Function: void mpf_div_2exp (mpf_t ROP, const mpf_t OP1,
+ mp_bitcnt_t OP2)
Set ROP to OP1 divided by 2 raised to OP2.
\1f
7.6 Comparison Functions
========================
- -- Function: int mpf_cmp (mpf_t OP1, mpf_t OP2)
- -- Function: int mpf_cmp_d (mpf_t OP1, double OP2)
- -- Function: int mpf_cmp_ui (mpf_t OP1, unsigned long int OP2)
- -- Function: int mpf_cmp_si (mpf_t OP1, signed long int OP2)
+ -- Function: int mpf_cmp (const mpf_t OP1, const mpf_t OP2)
+ -- Function: int mpf_cmp_d (const mpf_t OP1, double OP2)
+ -- Function: int mpf_cmp_ui (const mpf_t OP1, unsigned long int OP2)
+ -- Function: int mpf_cmp_si (const mpf_t OP1, signed long int OP2)
Compare OP1 and OP2. Return a positive value if OP1 > OP2, zero
if OP1 = OP2, and a negative value if OP1 < OP2.
`mpf_cmp_d' can be called with an infinity, but results are
undefined for a NaN.
- -- Function: int mpf_eq (mpf_t OP1, mpf_t OP2, mp_bitcnt_t op3)
+ -- Function: int mpf_eq (const mpf_t OP1, const mpf_t OP2, mp_bitcnt_t
+ op3)
Return non-zero if the first OP3 bits of OP1 and OP2 are equal,
zero otherwise. I.e., test if OP1 and OP2 are approximately equal.
bits. Such numbers are really just one ulp off, and should be
considered equal.
- -- Function: void mpf_reldiff (mpf_t ROP, mpf_t OP1, mpf_t OP2)
+ -- Function: void mpf_reldiff (mpf_t ROP, const mpf_t OP1, const mpf_t
+ OP2)
Compute the relative difference between OP1 and OP2 and store the
result in ROP. This is abs(OP1-OP2)/OP1.
- -- Macro: int mpf_sgn (mpf_t OP)
+ -- Macro: int mpf_sgn (const mpf_t OP)
Return +1 if OP > 0, 0 if OP = 0, and -1 if OP < 0.
This function is actually implemented as a macro. It evaluates
- its arguments multiple times.
+ its argument multiple times.
\1f
File: gmp.info, Node: I/O of Floats, Next: Miscellaneous Float Functions, Prev: Float Comparison, Up: Floating-point Functions
See also *note Formatted Output:: and *note Formatted Input::.
-- Function: size_t mpf_out_str (FILE *STREAM, int BASE, size_t
- N_DIGITS, mpf_t OP)
+ N_DIGITS, const mpf_t OP)
Print OP to STREAM, as a string of digits. Return the number of
bytes written, or if an error occurred, return 0.
7.8 Miscellaneous Functions
===========================
- -- Function: void mpf_ceil (mpf_t ROP, mpf_t OP)
- -- Function: void mpf_floor (mpf_t ROP, mpf_t OP)
- -- Function: void mpf_trunc (mpf_t ROP, mpf_t OP)
+ -- Function: void mpf_ceil (mpf_t ROP, const mpf_t OP)
+ -- Function: void mpf_floor (mpf_t ROP, const mpf_t OP)
+ -- Function: void mpf_trunc (mpf_t ROP, const mpf_t OP)
Set ROP to OP rounded to an integer. `mpf_ceil' rounds to the
next higher integer, `mpf_floor' to the next lower, and `mpf_trunc'
to the integer towards zero.
- -- Function: int mpf_integer_p (mpf_t OP)
+ -- Function: int mpf_integer_p (const mpf_t OP)
Return non-zero if OP is an integer.
- -- Function: int mpf_fits_ulong_p (mpf_t OP)
- -- Function: int mpf_fits_slong_p (mpf_t OP)
- -- Function: int mpf_fits_uint_p (mpf_t OP)
- -- Function: int mpf_fits_sint_p (mpf_t OP)
- -- Function: int mpf_fits_ushort_p (mpf_t OP)
- -- Function: int mpf_fits_sshort_p (mpf_t OP)
+ -- Function: int mpf_fits_ulong_p (const mpf_t OP)
+ -- Function: int mpf_fits_slong_p (const mpf_t OP)
+ -- Function: int mpf_fits_uint_p (const mpf_t OP)
+ -- Function: int mpf_fits_sint_p (const mpf_t OP)
+ -- Function: int mpf_fits_ushort_p (const mpf_t OP)
+ -- Function: int mpf_fits_sshort_p (const mpf_t OP)
Return non-zero if OP would fit in the respective C data type, when
truncated to an integer.
This function requires that S1N is greater than or equal to S2N.
- -- Function: void mpn_neg (mp_limb_t *RP, const mp_limb_t *SP,
+ -- Function: mp_limb_t mpn_neg (mp_limb_t *RP, const mp_limb_t *SP,
mp_size_t N)
Perform the negation of {SP, N}, and write the result to {RP, N}.
Return carry-out.
{YP, YN}. The result can be up to YN limbs, the return value is
the actual number produced. Both source operands are destroyed.
- {XP, XN} must have at least as many bits as {YP, YN}. {YP, YN}
- must be odd. Both operands must have non-zero most significant
- limbs. No overlap is permitted between {XP, XN} and {YP, YN}.
+ It is required that XN >= YN > 0, and the most significant limb of
+ {YP, YN} must be non-zero. No overlap is permitted between {XP,
+ XN} and {YP, YN}.
-- Function: mp_limb_t mpn_gcd_1 (const mp_limb_t *XP, mp_size_t XN,
mp_limb_t YLIMB)
remainder would have been zero or non-zero.
A return value of zero indicates a perfect square. See also
- `mpz_perfect_square_p'.
+ `mpn_perfect_square_p'.
-- Function: mp_size_t mpn_get_str (unsigned char *STR, int BASE,
mp_limb_t *S1P, mp_size_t S1N)
Initialize STATE for a Mersenne Twister algorithm. This algorithm
is fast and has good randomness properties.
- -- Function: void gmp_randinit_lc_2exp (gmp_randstate_t STATE, mpz_t
- A, unsigned long C, mp_bitcnt_t M2EXP)
+ -- Function: void gmp_randinit_lc_2exp (gmp_randstate_t STATE, const
+ mpz_t A, unsigned long C, mp_bitcnt_t M2EXP)
Initialize STATE with a linear congruential algorithm X = (A*X +
C) mod 2^M2EXP.
9.2 Random State Seeding
========================
- -- Function: void gmp_randseed (gmp_randstate_t STATE, mpz_t SEED)
+ -- Function: void gmp_randseed (gmp_randstate_t STATE, const mpz_t
+ SEED)
-- Function: void gmp_randseed_ui (gmp_randstate_t STATE,
unsigned long int SEED)
Set an initial seed value into STATE.
results. For classes with overloading, see *note C++ Class Interface::.
\1f
-File: gmp.info, Node: C++ Class Interface, Next: BSD Compatible Functions, Prev: Formatted Input, Up: Top
+File: gmp.info, Node: C++ Class Interface, Next: Custom Allocation, Prev: Formatted Input, Up: Top
12 C++ Class Interface
**********************
-- Function: mpz_class::mpz_class (type N)
Construct an `mpz_class'. All the standard C++ types may be used,
except `long long' and `long double', and all the GMP C++ classes
- can be used. Any necessary conversion follows the corresponding C
- function, for example `double' follows `mpz_set_d' (*note
- Assigning Integers::).
+ can be used, although conversions from `mpq_class' and `mpf_class'
+ are `explicit'. Any necessary conversion follows the
+ corresponding C function, for example `double' follows `mpz_set_d'
+ (*note Assigning Integers::).
-- Function: explicit mpz_class::mpz_class (mpz_t Z)
Construct an `mpz_class' from an `mpz_t'. The value in Z is
If the string is not a valid integer, an `std::invalid_argument'
exception is thrown. The same applies to `operator='.
+ -- Function: mpz_class operator"" _mpz (const char *STR)
+ With C++11 compilers, integers can be constructed with the syntax
+ `123_mpz' which is equivalent to `mpz_class("123")'.
+
-- Function: mpz_class operator/ (mpz_class A, mpz_class D)
-- Function: mpz_class operator% (mpz_class A, mpz_class D)
Divisions involving `mpz_class' round towards zero, as per the
...
mpz_fdiv_q (q.get_mpz_t(), a.get_mpz_t(), d.get_mpz_t());
- -- Function: mpz_class abs (mpz_class OP1)
+ -- Function: mpz_class abs (mpz_class OP)
-- Function: int cmp (mpz_class OP1, type OP2)
-- Function: int cmp (type OP1, mpz_class OP2)
-- Function: bool mpz_class::fits_sint_p (void)
-- Function: int mpz_class::set_str (const string& STR, int BASE)
-- Function: int sgn (mpz_class OP)
-- Function: mpz_class sqrt (mpz_class OP)
+ -- Function: void mpz_class::swap (mpz_class& OP)
+ -- Function: void swap (mpz_class& OP1, mpz_class& OP2)
These functions provide a C++ class interface to the corresponding
GMP C routines.
-- Function: mpq_class::mpq_class (type OP)
-- Function: mpq_class::mpq_class (integer NUM, integer DEN)
Construct an `mpq_class'. The initial value can be a single value
- of any type, or a pair of integers (`mpz_class' or standard C++
- integer types) representing a fraction, except that `long long'
- and `long double' are not supported. For example,
+ of any type (conversion from `mpf_class' is `explicit'), or a pair
+ of integers (`mpz_class' or standard C++ integer types)
+ representing a fraction, except that `long long' and `long double'
+ are not supported. For example,
mpq_class q (99);
mpq_class q (1.75);
If the string is not a valid rational, an `std::invalid_argument'
exception is thrown. The same applies to `operator='.
+ -- Function: mpq_class operator"" _mpq (const char *STR)
+ With C++11 compilers, integral rationals can be constructed with
+ the syntax `123_mpq' which is equivalent to `mpq_class(123_mpz)'.
+ Other rationals can be built as `-1_mpq/2' or `0xb_mpq/123456_mpz'.
+
-- Function: void mpq_class::canonicalize ()
Put an `mpq_class' into canonical form, as per *note Rational
Number Functions::. All arithmetic operators require their
-- Function: int mpq_class::set_str (const char *STR, int BASE)
-- Function: int mpq_class::set_str (const string& STR, int BASE)
-- Function: int sgn (mpq_class OP)
+ -- Function: void mpq_class::swap (mpq_class& OP)
+ -- Function: void swap (mpq_class& OP1, mpq_class& OP2)
These functions provide a C++ class interface to the corresponding
GMP C routines.
If the string is not a valid float, an `std::invalid_argument'
exception is thrown. The same applies to `operator='.
+ -- Function: mpf_class operator"" _mpf (const char *STR)
+ With C++11 compilers, floats can be constructed with the syntax
+ `1.23e-1_mpf' which is equivalent to `mpf_class("1.23e-1")'.
+
-- Function: mpf_class& mpf_class::operator= (type OP)
Convert and store the given OP value to an `mpf_class' object. The
same types are accepted as for the constructors above.
-- Function: int mpf_class::set_str (const string& STR, int BASE)
-- Function: int sgn (mpf_class OP)
-- Function: mpf_class sqrt (mpf_class OP)
+ -- Function: void mpf_class::swap (mpf_class& OP)
+ -- Function: void swap (mpf_class& OP1, mpf_class& OP2)
-- Function: mpf_class trunc (mpf_class OP)
These functions provide a C++ class interface to the corresponding
GMP C routines.
Seed a random number generator. See *note Random Number
Functions::, for how to choose a good seed.
- -- Function: mpz_class gmp_randclass::get_z_bits (unsigned long BITS)
+ -- Function: mpz_class gmp_randclass::get_z_bits (mp_bitcnt_t BITS)
-- Function: mpz_class gmp_randclass::get_z_bits (mpz_class BITS)
Generate a random integer with a specified number of bits.
}
\1f
-File: gmp.info, Node: BSD Compatible Functions, Next: Custom Allocation, Prev: C++ Class Interface, Up: Top
-
-13 Berkeley MP Compatible Functions
-***********************************
-
-These functions are intended to be fully compatible with the Berkeley MP
-library which is available on many BSD derived U*ix systems. The
-`--enable-mpbsd' option must be used when building GNU MP to make these
-available (*note Installing GMP::).
-
- The original Berkeley MP library has a usage restriction: you cannot
-use the same variable as both source and destination in a single
-function call. The compatible functions in GNU MP do not share this
-restriction--inputs and outputs may overlap.
-
- It is not recommended that new programs are written using these
-functions. Apart from the incomplete set of functions, the interface
-for initializing `MINT' objects is more error prone, and the `pow'
-function collides with `pow' in `libm.a'.
-
- Include the header `mp.h' to get the definition of the necessary
-types and functions. If you are on a BSD derived system, make sure to
-include GNU `mp.h' if you are going to link the GNU `libmp.a' to your
-program. This means that you probably need to give the `-I<dir>'
-option to the compiler, where `<dir>' is the directory where you have
-GNU `mp.h'.
-
- -- Function: MINT * itom (signed short int INITIAL_VALUE)
- Allocate an integer consisting of a `MINT' object and dynamic limb
- space. Initialize the integer to INITIAL_VALUE. Return a pointer
- to the `MINT' object.
-
- -- Function: MINT * xtom (char *INITIAL_VALUE)
- Allocate an integer consisting of a `MINT' object and dynamic limb
- space. Initialize the integer from INITIAL_VALUE, a hexadecimal,
- null-terminated C string. Return a pointer to the `MINT' object.
-
- -- Function: void move (MINT *SRC, MINT *DEST)
- Set DEST to SRC by copying. Both variables must be previously
- initialized.
-
- -- Function: void madd (MINT *SRC_1, MINT *SRC_2, MINT *DESTINATION)
- Add SRC_1 and SRC_2 and put the sum in DESTINATION.
-
- -- Function: void msub (MINT *SRC_1, MINT *SRC_2, MINT *DESTINATION)
- Subtract SRC_2 from SRC_1 and put the difference in DESTINATION.
-
- -- Function: void mult (MINT *SRC_1, MINT *SRC_2, MINT *DESTINATION)
- Multiply SRC_1 and SRC_2 and put the product in DESTINATION.
-
- -- Function: void mdiv (MINT *DIVIDEND, MINT *DIVISOR, MINT *QUOTIENT,
- MINT *REMAINDER)
- -- Function: void sdiv (MINT *DIVIDEND, signed short int DIVISOR, MINT
- *QUOTIENT, signed short int *REMAINDER)
- Set QUOTIENT to DIVIDEND/DIVISOR, and REMAINDER to DIVIDEND mod
- DIVISOR. The quotient is rounded towards zero; the remainder has
- the same sign as the dividend unless it is zero.
-
- Some implementations of these functions work differently--or not
- at all--for negative arguments.
-
- -- Function: void msqrt (MINT *OP, MINT *ROOT, MINT *REMAINDER)
- Set ROOT to the truncated integer part of the square root of OP,
- like `mpz_sqrt'. Set REMAINDER to OP-ROOT*ROOT, i.e. zero if OP
- is a perfect square.
-
- If ROOT and REMAINDER are the same variable, the results are
- undefined.
-
- -- Function: void pow (MINT *BASE, MINT *EXP, MINT *MOD, MINT *DEST)
- Set DEST to (BASE raised to EXP) modulo MOD.
-
- Note that the name `pow' clashes with `pow' from the standard C
- math library (*note Exponentiation and Logarithms: (libc)Exponents
- and Logarithms.). An application will only be able to use one or
- the other.
-
- -- Function: void rpow (MINT *BASE, signed short int EXP, MINT *DEST)
- Set DEST to BASE raised to EXP.
-
- -- Function: void gcd (MINT *OP1, MINT *OP2, MINT *RES)
- Set RES to the greatest common divisor of OP1 and OP2.
-
- -- Function: int mcmp (MINT *OP1, MINT *OP2)
- Compare OP1 and OP2. Return a positive value if OP1 > OP2, zero
- if OP1 = OP2, and a negative value if OP1 < OP2.
-
- -- Function: void min (MINT *DEST)
- Input a decimal string from `stdin', and put the read integer in
- DEST. SPC and TAB are allowed in the number string, and are
- ignored.
-
- -- Function: void mout (MINT *SRC)
- Output SRC to `stdout', as a decimal string. Also output a
- newline.
-
- -- Function: char * mtox (MINT *OP)
- Convert OP to a hexadecimal string, and return a pointer to the
- string. The returned string is allocated using the default memory
- allocation function, `malloc' by default. It will be
- `strlen(str)+1' bytes, that being exactly enough for the string
- and null-terminator.
-
- -- Function: void mfree (MINT *OP)
- De-allocate, the space used by OP. *This function should only be
- passed a value returned by `itom' or `xtom'.*
-
-\1f
-File: gmp.info, Node: Custom Allocation, Next: Language Bindings, Prev: BSD Compatible Functions, Up: Top
+File: gmp.info, Node: Custom Allocation, Next: Language Bindings, Prev: C++ Class Interface, Up: Top
-14 Custom Allocation
+13 Custom Allocation
********************
By default GMP uses `malloc', `realloc' and `free' for memory
different way or to have a different error action on running out of
memory.
- This feature is available in the Berkeley compatibility library
-(*note BSD Compatible Functions::) as well as the main GMP library.
-
-- Function: void mp_set_memory_functions (
void *(*ALLOC_FUNC_PTR) (size_t),
void *(*REALLOC_FUNC_PTR) (void *, size_t, size_t),
\1f
File: gmp.info, Node: Language Bindings, Next: Algorithms, Prev: Custom Allocation, Up: Top
-15 Language Bindings
+14 Language Bindings
********************
The following packages and projects offer access to GMP from languages
\1f
File: gmp.info, Node: Algorithms, Next: Internals, Prev: Language Bindings, Up: Top
-16 Algorithms
+15 Algorithms
*************
This chapter is an introduction to some of the algorithms used for
\1f
File: gmp.info, Node: Multiplication Algorithms, Next: Division Algorithms, Prev: Algorithms, Up: Algorithms
-16.1 Multiplication
+15.1 Multiplication
===================
NxN limb multiplications and squares are done using one of seven
\1f
File: gmp.info, Node: Basecase Multiplication, Next: Karatsuba Multiplication, Prev: Multiplication Algorithms, Up: Multiplication Algorithms
-16.1.1 Basecase Multiplication
+15.1.1 Basecase Multiplication
------------------------------
Basecase NxM multiplication is a straightforward rectangular set of
\1f
File: gmp.info, Node: Karatsuba Multiplication, Next: Toom 3-Way Multiplication, Prev: Basecase Multiplication, Up: Multiplication Algorithms
-16.1.2 Karatsuba Multiplication
+15.1.2 Karatsuba Multiplication
-------------------------------
The Karatsuba multiplication algorithm is described in Knuth section
\1f
File: gmp.info, Node: Toom 3-Way Multiplication, Next: Toom 4-Way Multiplication, Prev: Karatsuba Multiplication, Up: Multiplication Algorithms
-16.1.3 Toom 3-Way Multiplication
+15.1.3 Toom 3-Way Multiplication
--------------------------------
The Karatsuba formula is the simplest case of a general approach to
\1f
File: gmp.info, Node: Toom 4-Way Multiplication, Next: Higher degree Toom'n'half, Prev: Toom 3-Way Multiplication, Up: Multiplication Algorithms
-16.1.4 Toom 4-Way Multiplication
+15.1.4 Toom 4-Way Multiplication
--------------------------------
Karatsuba and Toom-3 split the operands into 2 and 3 coefficients,
\1f
File: gmp.info, Node: Higher degree Toom'n'half, Next: FFT Multiplication, Prev: Toom 4-Way Multiplication, Up: Multiplication Algorithms
-16.1.5 Higher degree Toom'n'half
+15.1.5 Higher degree Toom'n'half
--------------------------------
The Toom algorithms described above (*note Toom 3-Way Multiplication::,
\1f
File: gmp.info, Node: FFT Multiplication, Next: Other Multiplication, Prev: Higher degree Toom'n'half, Up: Multiplication Algorithms
-16.1.6 FFT Multiplication
+15.1.6 FFT Multiplication
-------------------------
At large to very large sizes a Fermat style FFT multiplication is used,
\1f
File: gmp.info, Node: Other Multiplication, Next: Unbalanced Multiplication, Prev: FFT Multiplication, Up: Multiplication Algorithms
-16.1.7 Other Multiplication
+15.1.7 Other Multiplication
---------------------------
The Toom algorithms described above (*note Toom 3-Way Multiplication::,
\1f
File: gmp.info, Node: Unbalanced Multiplication, Prev: Other Multiplication, Up: Multiplication Algorithms
-16.1.8 Unbalanced Multiplication
+15.1.8 Unbalanced Multiplication
--------------------------------
Multiplication of operands with different sizes, both below
\1f
File: gmp.info, Node: Division Algorithms, Next: Greatest Common Divisor Algorithms, Prev: Multiplication Algorithms, Up: Algorithms
-16.2 Division Algorithms
+15.2 Division Algorithms
========================
* Menu:
\1f
File: gmp.info, Node: Single Limb Division, Next: Basecase Division, Prev: Division Algorithms, Up: Division Algorithms
-16.2.1 Single Limb Division
+15.2.1 Single Limb Division
---------------------------
Nx1 division is implemented using repeated 2x1 divisions from high to
\1f
File: gmp.info, Node: Basecase Division, Next: Divide and Conquer Division, Prev: Single Limb Division, Up: Division Algorithms
-16.2.2 Basecase Division
+15.2.2 Basecase Division
------------------------
Basecase NxM division is like long division done by hand, but in base
\1f
File: gmp.info, Node: Divide and Conquer Division, Next: Block-Wise Barrett Division, Prev: Basecase Division, Up: Division Algorithms
-16.2.3 Divide and Conquer Division
+15.2.3 Divide and Conquer Division
----------------------------------
For divisors larger than `DC_DIV_QR_THRESHOLD', division is done by
\1f
File: gmp.info, Node: Block-Wise Barrett Division, Next: Exact Division, Prev: Divide and Conquer Division, Up: Division Algorithms
-16.2.4 Block-Wise Barrett Division
+15.2.4 Block-Wise Barrett Division
----------------------------------
For the largest divisions, a block-wise Barrett division algorithm is
\1f
File: gmp.info, Node: Exact Division, Next: Exact Remainder, Prev: Block-Wise Barrett Division, Up: Division Algorithms
-16.2.5 Exact Division
+15.2.5 Exact Division
---------------------
A so-called exact division is when the dividend is known to be an exact
\1f
File: gmp.info, Node: Exact Remainder, Next: Small Quotient Division, Prev: Exact Division, Up: Division Algorithms
-16.2.6 Exact Remainder
+15.2.6 Exact Remainder
----------------------
If the exact division algorithm is done with a full subtraction at each
simplifications arise, providing good speedups on a number of
processors.
- `mpn_divexact_by3', `mpn_modexact_1_odd' and the `mpn_redc_X'
-functions differ subtly in how they return r, leading to some negations
-in the above formula, but all are essentially the same.
+ The functions `mpn_divexact_by3', `mpn_modexact_1_odd' and the
+internal `mpn_redc_X' functions differ subtly in how they return r,
+leading to some negations in the above formula, but all are essentially
+the same.
Clearly r is zero when a is a multiple of d, and this leads to
divisibility or congruence tests which are potentially more efficient
\1f
File: gmp.info, Node: Small Quotient Division, Prev: Exact Remainder, Up: Division Algorithms
-16.2.7 Small Quotient Division
+15.2.7 Small Quotient Division
------------------------------
An NxM division where the number of quotient limbs Q=N-M is small can
\1f
File: gmp.info, Node: Greatest Common Divisor Algorithms, Next: Powering Algorithms, Prev: Division Algorithms, Up: Algorithms
-16.3 Greatest Common Divisor
+15.3 Greatest Common Divisor
============================
* Menu:
\1f
File: gmp.info, Node: Binary GCD, Next: Lehmer's Algorithm, Prev: Greatest Common Divisor Algorithms, Up: Greatest Common Divisor Algorithms
-16.3.1 Binary GCD
+15.3.1 Binary GCD
-----------------
At small sizes GMP uses an O(N^2) binary style GCD. This is described
\1f
File: gmp.info, Node: Lehmer's Algorithm, Next: Subquadratic GCD, Prev: Binary GCD, Up: Greatest Common Divisor Algorithms
-16.3.2 Lehmer's algorithm
+15.3.2 Lehmer's algorithm
-------------------------
Lehmer's improvement of the Euclidean algorithms is based on the
reduced in size from two limbs to one and a half.
+\1f
+File: gmp.info, Node: Subquadratic GCD, Next: Extended GCD, Prev: Lehmer's Algorithm, Up: Greatest Common Divisor Algorithms
+
+15.3.3 Subquadratic GCD
+-----------------------
+
+For inputs larger than `GCD_DC_THRESHOLD', GCD is computed via the HGCD
+(Half GCD) function, as a generalization to Lehmer's algorithm.
+
+ Let the inputs a,b be of size N limbs each. Put S = floor(N/2) + 1.
+Then HGCD(a,b) returns a transformation matrix T with non-negative
+elements, and reduced numbers (c;d) = T^-1 (a;b). The reduced numbers
+c,d must be larger than S limbs, while their difference abs(c-d) must
+fit in S limbs. The matrix elements will also be of size roughly N/2.
+
+ The HGCD base case uses Lehmer's algorithm, but with the above stop
+condition that returns reduced numbers and the corresponding
+transformation matrix half-way through. For inputs larger than
+`HGCD_THRESHOLD', HGCD is computed recursively, using the divide and
+conquer algorithm in "On Schönhage's algorithm and subquadratic integer
+GCD computation" by Möller (*note References::). The recursive
+algorithm consists of these main steps.
+
+ * Call HGCD recursively, on the most significant N/2 limbs. Apply the
+ resulting matrix T_1 to the full numbers, reducing them to a size
+ just above 3N/2.
+
+ * Perform a small number of division or subtraction steps to reduce
+ the numbers to size below 3N/2. This is essential mainly for the
+ unlikely case of large quotients.
+
+ * Call HGCD recursively, on the most significant N/2 limbs of the
+ reduced numbers. Apply the resulting matrix T_2 to the full
+ numbers, reducing them to a size just above N/2.
+
+ * Compute T = T_1 T_2.
+
+ * Perform a small number of division and subtraction steps to
+ satisfy the requirements, and return.
+
+ GCD is then implemented as a loop around HGCD, similarly to Lehmer's
+algorithm. Where Lehmer repeatedly chops off the top two limbs, calls
+`mpn_hgcd2', and applies the resulting matrix to the full numbers, the
+subquadratic GCD chops off the most significant third of the limbs (the
+proportion is a tuning parameter, and 1/3 seems to be more efficient
+than, e.g, 1/2), calls `mpn_hgcd', and applies the resulting matrix.
+Once the input numbers are reduced to size below `GCD_DC_THRESHOLD',
+Lehmer's algorithm is used for the rest of the work.
+
+ The asymptotic running time of both HGCD and GCD is O(M(N)*log(N)),
+where M(N) is the time for multiplying two N-limb numbers.
+
\1f
Local Variables:
../../gmp/doc/gmp.texi.
This manual describes how to install and use the GNU multiple precision
-arithmetic library, version 5.0.5.
+arithmetic library, version 5.1.3.
Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
-Free Software Foundation, Inc.
+2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012,
+2013 Free Software Foundation, Inc.
Permission is granted to copy, distribute and/or modify this
document under the terms of the GNU Free Documentation License, Version
* gmp: (gmp). GNU Multiple Precision Arithmetic Library.
END-INFO-DIR-ENTRY
-\1f
-File: gmp.info, Node: Subquadratic GCD, Next: Extended GCD, Prev: Lehmer's Algorithm, Up: Greatest Common Divisor Algorithms
-
-16.3.3 Subquadratic GCD
------------------------
-
-For inputs larger than `GCD_DC_THRESHOLD', GCD is computed via the HGCD
-(Half GCD) function, as a generalization to Lehmer's algorithm.
-
- Let the inputs a,b be of size N limbs each. Put S = floor(N/2) + 1.
-Then HGCD(a,b) returns a transformation matrix T with non-negative
-elements, and reduced numbers (c;d) = T^-1 (a;b). The reduced numbers
-c,d must be larger than S limbs, while their difference abs(c-d) must
-fit in S limbs. The matrix elements will also be of size roughly N/2.
-
- The HGCD base case uses Lehmer's algorithm, but with the above stop
-condition that returns reduced numbers and the corresponding
-transformation matrix half-way through. For inputs larger than
-`HGCD_THRESHOLD', HGCD is computed recursively, using the divide and
-conquer algorithm in "On Schönhage's algorithm and subquadratic integer
-GCD computation" by Möller (*note References::). The recursive
-algorithm consists of these main steps.
-
- * Call HGCD recursively, on the most significant N/2 limbs. Apply the
- resulting matrix T_1 to the full numbers, reducing them to a size
- just above 3N/2.
-
- * Perform a small number of division or subtraction steps to reduce
- the numbers to size below 3N/2. This is essential mainly for the
- unlikely case of large quotients.
-
- * Call HGCD recursively, on the most significant N/2 limbs of the
- reduced numbers. Apply the resulting matrix T_2 to the full
- numbers, reducing them to a size just above N/2.
-
- * Compute T = T_1 T_2.
-
- * Perform a small number of division and subtraction steps to
- satisfy the requirements, and return.
-
- GCD is then implemented as a loop around HGCD, similarly to Lehmer's
-algorithm. Where Lehmer repeatedly chops off the top two limbs, calls
-`mpn_hgcd2', and applies the resulting matrix to the full numbers, the
-subquadratic GCD chops off the most significant third of the limbs (the
-proportion is a tuning parameter, and 1/3 seems to be more efficient
-than, e.g, 1/2), calls `mpn_hgcd', and applies the resulting matrix.
-Once the input numbers are reduced to size below `GCD_DC_THRESHOLD',
-Lehmer's algorithm is used for the rest of the work.
-
- The asymptotic running time of both HGCD and GCD is O(M(N)*log(N)),
-where M(N) is the time for multiplying two N-limb numbers.
-
\1f
File: gmp.info, Node: Extended GCD, Next: Jacobi Symbol, Prev: Subquadratic GCD, Up: Greatest Common Divisor Algorithms
-16.3.4 Extended GCD
+15.3.4 Extended GCD
-------------------
The extended GCD function, or GCDEXT, calculates gcd(a,b) and also
\1f
File: gmp.info, Node: Jacobi Symbol, Prev: Extended GCD, Up: Greatest Common Divisor Algorithms
-16.3.5 Jacobi Symbol
+15.3.5 Jacobi Symbol
--------------------
-`mpz_jacobi' and `mpz_kronecker' are currently implemented with a
+[This section is obsolete. The current Jacobi code actually uses a very
+efficient algorithm.]
+
+ `mpz_jacobi' and `mpz_kronecker' are currently implemented with a
simple binary algorithm similar to that described for the GCDs (*note
Binary GCD::). They're not very fast when both inputs are large.
Lehmer's multi-step improvement or a binary based multi-step algorithm
\1f
File: gmp.info, Node: Powering Algorithms, Next: Root Extraction Algorithms, Prev: Greatest Common Divisor Algorithms, Up: Algorithms
-16.4 Powering Algorithms
+15.4 Powering Algorithms
========================
* Menu:
\1f
File: gmp.info, Node: Normal Powering Algorithm, Next: Modular Powering Algorithm, Prev: Powering Algorithms, Up: Powering Algorithms
-16.4.1 Normal Powering
+15.4.1 Normal Powering
----------------------
Normal `mpz' or `mpf' powering uses a simple binary algorithm,
\1f
File: gmp.info, Node: Modular Powering Algorithm, Prev: Normal Powering Algorithm, Up: Powering Algorithms
-16.4.2 Modular Powering
+15.4.2 Modular Powering
-----------------------
Modular powering is implemented using a 2^k-ary sliding window
made to minimize the average number of multiplications that must
supplement the squaring.
- The modular multiplies and squares use either a simple division or
+ The modular multiplies and squarings use either a simple division or
the REDC method by Montgomery (*note References::). REDC is a little
faster, essentially saving N single limb divisions in a fashion similar
to an exact remainder (*note Exact Remainder::).
\1f
File: gmp.info, Node: Root Extraction Algorithms, Next: Radix Conversion Algorithms, Prev: Powering Algorithms, Up: Algorithms
-16.5 Root Extraction Algorithms
+15.5 Root Extraction Algorithms
===============================
* Menu:
\1f
File: gmp.info, Node: Square Root Algorithm, Next: Nth Root Algorithm, Prev: Root Extraction Algorithms, Up: Root Extraction Algorithms
-16.5.1 Square Root
+15.5.1 Square Root
------------------
Square roots are taken using the "Karatsuba Square Root" algorithm by
\1f
File: gmp.info, Node: Nth Root Algorithm, Next: Perfect Square Algorithm, Prev: Square Root Algorithm, Up: Root Extraction Algorithms
-16.5.2 Nth Root
+15.5.2 Nth Root
---------------
Integer Nth roots are taken using Newton's method with the following
\1f
File: gmp.info, Node: Perfect Square Algorithm, Next: Perfect Power Algorithm, Prev: Nth Root Algorithm, Up: Root Extraction Algorithms
-16.5.3 Perfect Square
+15.5.3 Perfect Square
---------------------
A significant fraction of non-squares can be quickly identified by
\1f
File: gmp.info, Node: Perfect Power Algorithm, Prev: Perfect Square Algorithm, Up: Root Extraction Algorithms
-16.5.4 Perfect Power
+15.5.4 Perfect Power
--------------------
Detecting perfect powers is required by some factorization algorithms.
\1f
File: gmp.info, Node: Radix Conversion Algorithms, Next: Other Algorithms, Prev: Root Extraction Algorithms, Up: Algorithms
-16.6 Radix Conversion
+15.6 Radix Conversion
=====================
Radix conversions are less important than other algorithms. A program
\1f
File: gmp.info, Node: Binary to Radix, Next: Radix to Binary, Prev: Radix Conversion Algorithms, Up: Radix Conversion Algorithms
-16.6.1 Binary to Radix
+15.6.1 Binary to Radix
----------------------
Conversions from binary to a power-of-2 radix use a simple and fast
\1f
File: gmp.info, Node: Radix to Binary, Prev: Binary to Radix, Up: Radix Conversion Algorithms
-16.6.2 Radix to Binary
+15.6.2 Radix to Binary
----------------------
*This section needs to be rewritten, it currently describes the
\1f
File: gmp.info, Node: Other Algorithms, Next: Assembly Coding, Prev: Radix Conversion Algorithms, Up: Algorithms
-16.7 Other Algorithms
+15.7 Other Algorithms
=====================
* Menu:
\1f
File: gmp.info, Node: Prime Testing Algorithm, Next: Factorial Algorithm, Prev: Other Algorithms, Up: Other Algorithms
-16.7.1 Prime Testing
+15.7.1 Prime Testing
--------------------
The primality testing in `mpz_probab_prime_p' (*note Number Theoretic
\1f
File: gmp.info, Node: Factorial Algorithm, Next: Binomial Coefficients Algorithm, Prev: Prime Testing Algorithm, Up: Other Algorithms
-16.7.2 Factorial
+15.7.2 Factorial
----------------
-Factorials are calculated by a combination of removal of twos,
-powering, and binary splitting. The procedure can be best illustrated
+Factorials are calculated by a combination of two algorithms. An idea is
+shared among them: to compute the odd part of the factorial; a final
+step takes account of the power of 2 term, by shifting.
+
+ For small n, the odd factor of n! is computed with the simple
+observation that it is equal to the product of all positive odd numbers
+smaller than n times the odd factor of [n/2]!, where [x] is the integer
+part of x, and so on recursively. The procedure can be best illustrated
with an example,
- 23! = 1.2.3.4.5.6.7.8.9.10.11.12.13.14.15.16.17.18.19.20.21.22.23
+ 23! = (23.21.19.17.15.13.11.9.7.5.3)(11.9.7.5.3)(5.3)2^19
-has factors of two removed,
+ Current code collects all the factors in a single list, with a loop
+and no recursion, and compute the product, with no special care for
+repeated chunks.
- 23! = 2^19.1.1.3.1.5.3.7.1.9.5.11.3.13.7.15.1.17.9.19.5.21.11.23
+ When n is larger, computation pass trough prime sieving. An helper
+function is used, as suggested by Peter Luschny:
-and the resulting terms collected up according to their multiplicity,
+ n
+ -----
+ n! | | L(p,n)
+ msf(n) = -------------- = | | p
+ [n/2]!^2.2^k p=3
- 23! = 2^19.(3.5)^3.(7.9.11)^2.(13.15.17.19.21.23)
+ Where p ranges on odd prime numbers. The exponent k is chosen to
+obtain an odd integer number: k is the number of 1 bits in the binary
+representation of [n/2]. The function L(p,n) can be defined as zero
+when p is composite, and, for any prime p, it is computed with:
- Each sequence such as 13.15.17.19.21.23 is evaluated by splitting
-into every second term, as for instance (13.17.21).(15.19.23), and the
-same recursively on each half. This is implemented iteratively using
-some bit twiddling.
+ ---
+ \ n
+ L(p,n) = / [---] mod 2 <= log (n) .
+ --- p^i p
+ i>0
+
+ With this helper function, we are able to compute the odd part of n!
+using the recursion implied by n!=[n/2]!^2*msf(n)*2^k. The recursion
+stops using the small-n algorithm on some [n/2^i].
+
+ Both the above algorithms use binary splitting to compute the
+product of many small factors. At first as many products as possible
+are accumulated in a single register, generating a list of factors that
+fit in a machine word. This list is then split into halves, and the
+product is computed recursively.
Such splitting is more efficient than repeated Nx1 multiplies since
it forms big multiplies, allowing Karatsuba and higher algorithms to be
used. And even below the Karatsuba threshold a big block of work can
be more efficient for the basecase algorithm.
- Splitting into subsequences of every second term keeps the resulting
-products more nearly equal in size than would the simpler approach of
-say taking the first half and second half of the sequence. Nearly
-equal products are more efficient for the current multiply
-implementation.
-
\1f
File: gmp.info, Node: Binomial Coefficients Algorithm, Next: Fibonacci Numbers Algorithm, Prev: Factorial Algorithm, Up: Other Algorithms
-16.7.3 Binomial Coefficients
+15.7.3 Binomial Coefficients
----------------------------
Binomial coefficients C(n,k) are calculated by first arranging k <= n/2
\1f
File: gmp.info, Node: Fibonacci Numbers Algorithm, Next: Lucas Numbers Algorithm, Prev: Binomial Coefficients Algorithm, Up: Other Algorithms
-16.7.4 Fibonacci Numbers
+15.7.4 Fibonacci Numbers
------------------------
The Fibonacci functions `mpz_fib_ui' and `mpz_fib2_ui' are designed for
\1f
File: gmp.info, Node: Lucas Numbers Algorithm, Next: Random Number Algorithms, Prev: Fibonacci Numbers Algorithm, Up: Other Algorithms
-16.7.5 Lucas Numbers
+15.7.5 Lucas Numbers
--------------------
`mpz_lucnum2_ui' derives a pair of Lucas numbers from a pair of
\1f
File: gmp.info, Node: Random Number Algorithms, Prev: Lucas Numbers Algorithm, Up: Other Algorithms
-16.7.6 Random Numbers
+15.7.6 Random Numbers
---------------------
For the `urandomb' functions, random numbers are generated simply by
Linear congruential generators are described in many text books, for
instance Knuth volume 2 (*note References::). With a modulus M and
-parameters A and C, a integer state S is iterated by the formula S <-
+parameters A and C, an integer state S is iterated by the formula S <-
A*S+C mod M. At each step the new state is a linear function of the
previous, mod M, hence the name of the generator.
\1f
File: gmp.info, Node: Assembly Coding, Prev: Other Algorithms, Up: Algorithms
-16.8 Assembly Coding
+15.8 Assembly Coding
====================
The assembly subroutines in GMP are the most significant source of
\1f
File: gmp.info, Node: Assembly Code Organisation, Next: Assembly Basics, Prev: Assembly Coding, Up: Assembly Coding
-16.8.1 Code Organisation
+15.8.1 Code Organisation
------------------------
The various `mpn' subdirectories contain machine-dependent code, written
\1f
File: gmp.info, Node: Assembly Basics, Next: Assembly Carry Propagation, Prev: Assembly Code Organisation, Up: Assembly Coding
-16.8.2 Assembly Basics
+15.8.2 Assembly Basics
----------------------
`mpn_addmul_1' and `mpn_submul_1' are the most important routines for
\1f
File: gmp.info, Node: Assembly Carry Propagation, Next: Assembly Cache Handling, Prev: Assembly Basics, Up: Assembly Coding
-16.8.3 Carry Propagation
+15.8.3 Carry Propagation
------------------------
The problem that presents most challenges in GMP is propagating carries
\1f
File: gmp.info, Node: Assembly Cache Handling, Next: Assembly Functional Units, Prev: Assembly Carry Propagation, Up: Assembly Coding
-16.8.4 Cache Handling
+15.8.4 Cache Handling
---------------------
GMP aims to perform well both on operands that fit entirely in L1 cache
\1f
File: gmp.info, Node: Assembly Functional Units, Next: Assembly Floating Point, Prev: Assembly Cache Handling, Up: Assembly Coding
-16.8.5 Functional Units
+15.8.5 Functional Units
-----------------------
When choosing an approach for an assembly loop, consideration is given
\1f
File: gmp.info, Node: Assembly Floating Point, Next: Assembly SIMD Instructions, Prev: Assembly Functional Units, Up: Assembly Coding
-16.8.6 Floating Point
+15.8.6 Floating Point
---------------------
Floating point arithmetic is used in GMP for multiplications on CPUs
\1f
File: gmp.info, Node: Assembly SIMD Instructions, Next: Assembly Software Pipelining, Prev: Assembly Floating Point, Up: Assembly Coding
-16.8.7 SIMD Instructions
+15.8.7 SIMD Instructions
------------------------
The single-instruction multiple-data support in current microprocessors
\1f
File: gmp.info, Node: Assembly Software Pipelining, Next: Assembly Loop Unrolling, Prev: Assembly SIMD Instructions, Up: Assembly Coding
-16.8.8 Software Pipelining
+15.8.8 Software Pipelining
--------------------------
Software pipelining consists of scheduling instructions around the
\1f
File: gmp.info, Node: Assembly Loop Unrolling, Next: Assembly Writing Guide, Prev: Assembly Software Pipelining, Up: Assembly Coding
-16.8.9 Loop Unrolling
+15.8.9 Loop Unrolling
---------------------
Loop unrolling consists of replicating code so that several limbs are
\1f
File: gmp.info, Node: Assembly Writing Guide, Prev: Assembly Loop Unrolling, Up: Assembly Coding
-16.8.10 Writing Guide
+15.8.10 Writing Guide
---------------------
This is a guide to writing software pipelined loops for processing limb
\1f
File: gmp.info, Node: Internals, Next: Contributors, Prev: Algorithms, Up: Top
-17 Internals
+16 Internals
************
*This chapter is provided only for informational purposes and the
\1f
File: gmp.info, Node: Integer Internals, Next: Rational Internals, Prev: Internals, Up: Internals
-17.1 Integer Internals
+16.1 Integer Internals
======================
`mpz_t' variables represent integers using sign and magnitude, in space
\1f
File: gmp.info, Node: Rational Internals, Next: Float Internals, Prev: Integer Internals, Up: Internals
-17.2 Rational Internals
+16.2 Rational Internals
=======================
`mpq_t' variables represent rationals using an `mpz_t' numerator and
\1f
File: gmp.info, Node: Float Internals, Next: Raw Output Internals, Prev: Rational Internals, Up: Internals
-17.3 Float Internals
+16.3 Float Internals
====================
Efficient calculation is the primary aim of GMP floats and the use of
\1f
File: gmp.info, Node: Raw Output Internals, Next: C++ Interface Internals, Prev: Float Internals, Up: Internals
-17.4 Raw Output Internals
+16.4 Raw Output Internals
=========================
`mpz_out_raw' uses the following format.
\1f
File: gmp.info, Node: C++ Interface Internals, Prev: Raw Output Internals, Up: Internals
-17.5 C++ Interface Internals
+16.5 C++ Interface Internals
============================
A system of expression templates is used to ensure something like
Pedro Gimeno implemented the Mersenne Twister and made other random
number improvements.
- Niels Möller wrote the sub-quadratic GCD and extended GCD code, the
-quadratic Hensel division code, and (with Torbjörn) the new divide and
-conquer division code for GMP 4.3. Niels also helped implement the new
-Toom multiply code for GMP 4.3 and implemented helper functions to
-simplify Toom evaluations for GMP 5.0. He wrote the original version
-of mpn_mulmod_bnm1.
+ Niels Möller wrote the sub-quadratic GCD, extended GCD and jacobi
+code, the quadratic Hensel division code, and (with Torbjörn) the new
+divide and conquer division code for GMP 4.3. Niels also helped
+implement the new Toom multiply code for GMP 4.3 and implemented helper
+functions to simplify Toom evaluations for GMP 5.0. He wrote the
+original version of mpn_mulmod_bnm1, and he is the main author of the
+mini-gmp package used for gmp bootstrapping.
Alberto Zanoni and Marco Bodrato suggested the unbalanced multiply
strategy, and found the optimal strategies for evaluation and
4.3 and implemented most of the new Toom multiply and squaring code for
5.0. He is the main author of the current mpn_mulmod_bnm1 and
mpn_mullo_n. Marco also wrote the functions mpn_invert and
-mpn_invertappr.
+mpn_invertappr. He is the author of the current combinatorial
+functions: binomial, factorial, multifactorial, primorial.
David Harvey suggested the internal function `mpn_bdiv_dbm1',
implementing division relevant to Toom multiplication. He also worked
on fast assembly sequences, in particular on a fast AMD64
-`mpn_mul_basecase'.
+`mpn_mul_basecase'. He wrote the internal middle product functions
+`mpn_mulmid_basecase', `mpn_toom42_mulmid', `mpn_mulmid_n' and related
+helper routines.
Martin Boij wrote `mpn_perfect_power_p'.
+ Marc Glisse improved `gmpxx.h': use fewer temporaries (faster),
+specializations of `numeric_limits' and `common_type', C++11 features
+(move constructors, explicit bool conversion, UDL), make the conversion
+from `mpq_class' to `mpz_class' explicit, optimize operations where one
+argument is a small compile-time constant, replace some heap
+allocations by stack allocations. He also fixed the eofbit handling of
+C++ streams, and removed one division from `mpq/aors.c'.
+
(This list is chronological, not ordered after significance. If you
have contributed to GMP but are not listed above, please tell
<gmp-devel@gmplib.org> about the omission!)
* #include: Headers and Libraries.
(line 6)
* --build: Build Options. (line 52)
-* --disable-fft: Build Options. (line 317)
+* --disable-fft: Build Options. (line 314)
* --disable-shared: Build Options. (line 45)
* --disable-static: Build Options. (line 45)
-* --enable-alloca: Build Options. (line 278)
-* --enable-assert: Build Options. (line 328)
-* --enable-cxx: Build Options. (line 230)
-* --enable-fat: Build Options. (line 164)
-* --enable-mpbsd: Build Options. (line 323)
-* --enable-profiling <1>: Profiling. (line 6)
-* --enable-profiling: Build Options. (line 332)
+* --enable-alloca: Build Options. (line 275)
+* --enable-assert: Build Options. (line 320)
+* --enable-cxx: Build Options. (line 227)
+* --enable-fat: Build Options. (line 162)
+* --enable-profiling <1>: Build Options. (line 324)
+* --enable-profiling: Profiling. (line 6)
* --exec-prefix: Build Options. (line 32)
* --host: Build Options. (line 66)
* --prefix: Build Options. (line 32)
* 68000: Notes for Particular Systems.
(line 80)
* 80x86: Notes for Particular Systems.
- (line 126)
-* ABI <1>: Build Options. (line 171)
+ (line 127)
+* ABI <1>: Build Options. (line 169)
* ABI: ABI and ISA. (line 6)
* About this manual: Introduction to GMP. (line 58)
* AC_CHECK_LIB: Autoconf. (line 11)
-* AIX <1>: ABI and ISA. (line 169)
-* AIX: Notes for Particular Systems.
+* AIX <1>: Notes for Particular Systems.
(line 7)
+* AIX: ABI and ISA. (line 169)
* Algorithms: Algorithms. (line 6)
-* alloca: Build Options. (line 278)
+* alloca: Build Options. (line 275)
* Allocation of memory: Custom Allocation. (line 6)
* AMD64: ABI and ISA. (line 44)
* Anonymous FTP of latest version: Introduction to GMP. (line 38)
* Application Binary Interface: ABI and ISA. (line 6)
-* Arithmetic functions <1>: Float Arithmetic. (line 6)
-* Arithmetic functions <2>: Integer Arithmetic. (line 6)
-* Arithmetic functions: Rational Arithmetic. (line 6)
+* Arithmetic functions <1>: Rational Arithmetic. (line 6)
+* Arithmetic functions <2>: Float Arithmetic. (line 6)
+* Arithmetic functions: Integer Arithmetic. (line 6)
* ARM: Notes for Particular Systems.
(line 20)
* Assembly cache handling: Assembly Cache Handling.
(line 6)
* Assembly writing guide: Assembly Writing Guide.
(line 6)
-* Assertion checking <1>: Debugging. (line 79)
-* Assertion checking: Build Options. (line 328)
+* Assertion checking <1>: Build Options. (line 320)
+* Assertion checking: Debugging. (line 79)
* Assignment functions <1>: Assigning Integers. (line 6)
-* Assignment functions <2>: Simultaneous Float Init & Assign.
+* Assignment functions <2>: Initializing Rationals.
(line 6)
* Assignment functions <3>: Assigning Floats. (line 6)
-* Assignment functions <4>: Initializing Rationals.
+* Assignment functions <4>: Simultaneous Float Init & Assign.
(line 6)
* Assignment functions: Simultaneous Integer Init & Assign.
(line 6)
* Autoconf: Autoconf. (line 6)
* Basics: GMP Basics. (line 6)
-* Berkeley MP compatible functions <1>: Build Options. (line 323)
-* Berkeley MP compatible functions: BSD Compatible Functions.
- (line 6)
* Binomial coefficient algorithm: Binomial Coefficients Algorithm.
(line 6)
* Binomial coefficient functions: Number Theoretic Functions.
- (line 113)
+ (line 128)
* Binutils strip: Known Build Problems.
(line 28)
* Bit manipulation functions: Integer Logic and Bit Fiddling.
(line 6)
* Bit scanning functions: Integer Logic and Bit Fiddling.
- (line 38)
-* Bit shift left: Integer Arithmetic. (line 35)
-* Bit shift right: Integer Division. (line 53)
+ (line 40)
+* Bit shift left: Integer Arithmetic. (line 38)
+* Bit shift right: Integer Division. (line 62)
* Bits per limb: Useful Macros and Constants.
(line 7)
-* BSD MP compatible functions <1>: BSD Compatible Functions.
- (line 6)
-* BSD MP compatible functions: Build Options. (line 323)
* Bug reporting: Reporting Bugs. (line 6)
* Build directory: Build Options. (line 19)
* Build notes for binary packaging: Notes for Package Builds.
* Build system: Build Options. (line 52)
* Building GMP: Installing GMP. (line 6)
* Bus error: Debugging. (line 7)
-* C compiler: Build Options. (line 182)
-* C++ compiler: Build Options. (line 254)
+* C compiler: Build Options. (line 180)
+* C++ compiler: Build Options. (line 251)
* C++ interface: C++ Class Interface. (line 6)
* C++ interface internals: C++ Interface Internals.
(line 6)
* C++ istream input: C++ Formatted Input. (line 6)
* C++ ostream output: C++ Formatted Output.
(line 6)
-* C++ support: Build Options. (line 230)
-* CC: Build Options. (line 182)
-* CC_FOR_BUILD: Build Options. (line 217)
-* CFLAGS: Build Options. (line 182)
+* C++ support: Build Options. (line 227)
+* CC: Build Options. (line 180)
+* CC_FOR_BUILD: Build Options. (line 214)
+* CFLAGS: Build Options. (line 180)
* Checker: Debugging. (line 115)
* checkergcc: Debugging. (line 122)
* Code organisation: Assembly Code Organisation.
(line 6)
* Compaq C++: Notes for Particular Systems.
(line 25)
-* Comparison functions <1>: Float Comparison. (line 6)
-* Comparison functions <2>: Integer Comparisons. (line 6)
-* Comparison functions: Comparing Rationals. (line 6)
+* Comparison functions <1>: Comparing Rationals. (line 6)
+* Comparison functions <2>: Float Comparison. (line 6)
+* Comparison functions: Integer Comparisons. (line 6)
* Compatibility with older versions: Compatibility with older versions.
(line 6)
* Conditions for copying GNU MP: Copying. (line 6)
* Configuring GMP: Installing GMP. (line 6)
-* Congruence algorithm: Exact Remainder. (line 29)
-* Congruence functions: Integer Division. (line 124)
+* Congruence algorithm: Exact Remainder. (line 30)
+* Congruence functions: Integer Division. (line 137)
* Constants: Useful Macros and Constants.
(line 6)
* Contributors: Contributors. (line 6)
(line 6)
* Conventions for variables: Variable Conventions.
(line 6)
-* Conversion functions <1>: Rational Conversions.
+* Conversion functions <1>: Converting Integers. (line 6)
+* Conversion functions <2>: Converting Floats. (line 6)
+* Conversion functions: Rational Conversions.
(line 6)
-* Conversion functions <2>: Converting Integers. (line 6)
-* Conversion functions: Converting Floats. (line 6)
* Copying conditions: Copying. (line 6)
-* CPPFLAGS: Build Options. (line 208)
+* CPPFLAGS: Build Options. (line 206)
* CPU types <1>: Introduction to GMP. (line 24)
* CPU types: Build Options. (line 108)
* Cross compiling: Build Options. (line 66)
* Custom allocation: Custom Allocation. (line 6)
-* CXX: Build Options. (line 254)
-* CXXFLAGS: Build Options. (line 254)
+* CXX: Build Options. (line 251)
+* CXXFLAGS: Build Options. (line 251)
* Cygwin: Notes for Particular Systems.
(line 43)
* Darwin: Known Build Problems.
(line 6)
* Digits in an integer: Miscellaneous Integer Functions.
(line 23)
-* Divisibility algorithm: Exact Remainder. (line 29)
-* Divisibility functions: Integer Division. (line 112)
+* Divisibility algorithm: Exact Remainder. (line 30)
+* Divisibility functions: Integer Division. (line 137)
* Divisibility testing: Efficiency. (line 91)
* Division algorithms: Division Algorithms. (line 6)
-* Division functions <1>: Float Arithmetic. (line 33)
-* Division functions <2>: Rational Arithmetic. (line 22)
+* Division functions <1>: Rational Arithmetic. (line 24)
+* Division functions <2>: Float Arithmetic. (line 33)
* Division functions: Integer Division. (line 6)
* DJGPP <1>: Notes for Particular Systems.
(line 43)
(line 18)
* DLLs: Notes for Particular Systems.
(line 56)
-* DocBook: Build Options. (line 355)
-* Documentation formats: Build Options. (line 348)
+* DocBook: Build Options. (line 347)
+* Documentation formats: Build Options. (line 340)
* Documentation license: GNU Free Documentation License.
(line 6)
-* DVI: Build Options. (line 351)
+* DVI: Build Options. (line 343)
* Efficiency: Efficiency. (line 6)
* Emacs: Emacs. (line 6)
-* Exact division functions: Integer Division. (line 102)
+* Exact division functions: Integer Division. (line 112)
* Exact remainder: Exact Remainder. (line 6)
* Example programs: Demonstration Programs.
(line 6)
* Exec prefix: Build Options. (line 32)
* Execution profiling <1>: Profiling. (line 6)
-* Execution profiling: Build Options. (line 332)
-* Exponentiation functions <1>: Integer Exponentiation.
+* Execution profiling: Build Options. (line 324)
+* Exponentiation functions <1>: Float Arithmetic. (line 41)
+* Exponentiation functions: Integer Exponentiation.
(line 6)
-* Exponentiation functions: Float Arithmetic. (line 41)
* Export: Integer Import and Export.
(line 45)
* Expression parsing demo: Demonstration Programs.
- (line 15)
+ (line 18)
* Extended GCD: Number Theoretic Functions.
- (line 47)
+ (line 49)
* Factor removal functions: Number Theoretic Functions.
- (line 103)
+ (line 108)
* Factorial algorithm: Factorial Algorithm. (line 6)
* Factorial functions: Number Theoretic Functions.
- (line 108)
+ (line 116)
* Factorization demo: Demonstration Programs.
(line 25)
* Fast Fourier Transform: FFT Multiplication. (line 6)
-* Fat binary: Build Options. (line 164)
-* FFT multiplication <1>: Build Options. (line 317)
+* Fat binary: Build Options. (line 162)
+* FFT multiplication <1>: Build Options. (line 314)
* FFT multiplication: FFT Multiplication. (line 6)
* Fibonacci number algorithm: Fibonacci Numbers Algorithm.
(line 6)
* Fibonacci sequence functions: Number Theoretic Functions.
- (line 121)
+ (line 136)
* Float arithmetic functions: Float Arithmetic. (line 6)
* Float assignment functions <1>: Simultaneous Float Init & Assign.
(line 6)
(line 27)
* Float rounding functions: Miscellaneous Float Functions.
(line 9)
-* Float sign tests: Float Comparison. (line 33)
+* Float sign tests: Float Comparison. (line 35)
* Floating point mode: Notes for Particular Systems.
(line 34)
* Floating-point functions: Floating-point Functions.
* Formatted output: Formatted Output. (line 6)
* Free Documentation License: GNU Free Documentation License.
(line 6)
-* frexp <1>: Converting Floats. (line 23)
-* frexp: Converting Integers. (line 42)
+* frexp <1>: Converting Integers. (line 43)
+* frexp: Converting Floats. (line 24)
* FTP of latest version: Introduction to GMP. (line 38)
* Function classes: Function Classes. (line 6)
* FunctionCheck: Profiling. (line 77)
* GCD algorithms: Greatest Common Divisor Algorithms.
(line 6)
* GCD extended: Number Theoretic Functions.
- (line 47)
+ (line 49)
* GCD functions: Number Theoretic Functions.
- (line 30)
+ (line 32)
* GDB: Debugging. (line 58)
* Generic C: Build Options. (line 153)
* GMP Perl module: Demonstration Programs.
* Greatest common divisor algorithms: Greatest Common Divisor Algorithms.
(line 6)
* Greatest common divisor functions: Number Theoretic Functions.
- (line 30)
+ (line 32)
* Hardware floating point mode: Notes for Particular Systems.
(line 34)
* Headers: Headers and Libraries.
* HP-UX: ABI and ISA. (line 107)
* HPPA: ABI and ISA. (line 68)
* I/O functions <1>: I/O of Floats. (line 6)
-* I/O functions <2>: I/O of Integers. (line 6)
-* I/O functions: I/O of Rationals. (line 6)
+* I/O functions <2>: I/O of Rationals. (line 6)
+* I/O functions: I/O of Integers. (line 6)
* i386: Notes for Particular Systems.
- (line 126)
+ (line 127)
* IA-64: ABI and ISA. (line 107)
* Import: Integer Import and Export.
(line 11)
* Include files: Headers and Libraries.
(line 6)
* info-lookup-symbol: Emacs. (line 6)
-* Initialization functions <1>: Initializing Integers.
+* Initialization functions <1>: Simultaneous Float Init & Assign.
(line 6)
* Initialization functions <2>: Random State Initialization.
(line 6)
-* Initialization functions <3>: Initializing Rationals.
+* Initialization functions <3>: Initializing Floats. (line 6)
+* Initialization functions <4>: Simultaneous Integer Init & Assign.
(line 6)
-* Initialization functions <4>: Initializing Floats. (line 6)
-* Initialization functions <5>: Simultaneous Float Init & Assign.
+* Initialization functions <5>: Initializing Rationals.
(line 6)
-* Initialization functions: Simultaneous Integer Init & Assign.
+* Initialization functions: Initializing Integers.
(line 6)
* Initializing and clearing: Efficiency. (line 21)
-* Input functions <1>: I/O of Floats. (line 6)
+* Input functions <1>: I/O of Integers. (line 6)
* Input functions <2>: I/O of Rationals. (line 6)
-* Input functions <3>: I/O of Integers. (line 6)
-* Input functions: Formatted Input Functions.
+* Input functions <3>: Formatted Input Functions.
(line 6)
+* Input functions: I/O of Floats. (line 6)
* Install prefix: Build Options. (line 32)
* Installing GMP: Installing GMP. (line 6)
* Instruction Set Architecture: ABI and ISA. (line 6)
* Integer functions: Integer Functions. (line 6)
* Integer import: Integer Import and Export.
(line 11)
-* Integer initialization functions <1>: Initializing Integers.
+* Integer initialization functions <1>: Simultaneous Integer Init & Assign.
(line 6)
-* Integer initialization functions: Simultaneous Integer Init & Assign.
+* Integer initialization functions: Initializing Integers.
(line 6)
* Integer input and output functions: I/O of Integers. (line 6)
* Integer internals: Integer Internals. (line 6)
* Internals: Internals. (line 6)
* Introduction: Introduction to GMP. (line 6)
* Inverse modulo functions: Number Theoretic Functions.
- (line 72)
+ (line 76)
* IRIX <1>: Known Build Problems.
(line 38)
* IRIX: ABI and ISA. (line 132)
* istream input: C++ Formatted Input. (line 6)
* Jacobi symbol algorithm: Jacobi Symbol. (line 6)
* Jacobi symbol functions: Number Theoretic Functions.
- (line 79)
+ (line 83)
* Karatsuba multiplication: Karatsuba Multiplication.
(line 6)
* Karatsuba square root algorithm: Square Root Algorithm.
(line 6)
* Kronecker symbol functions: Number Theoretic Functions.
- (line 91)
+ (line 95)
* Language bindings: Language Bindings. (line 6)
* Latest version of GMP: Introduction to GMP. (line 38)
* LCM functions: Number Theoretic Functions.
- (line 67)
+ (line 70)
* Least common multiple functions: Number Theoretic Functions.
- (line 67)
+ (line 70)
* Legendre symbol functions: Number Theoretic Functions.
- (line 82)
+ (line 86)
* libgmp: Headers and Libraries.
(line 22)
* libgmpxx: Headers and Libraries.
* Linear congruential algorithm: Random Number Algorithms.
(line 25)
* Linear congruential random numbers: Random State Initialization.
- (line 18)
+ (line 32)
* Linking: Headers and Libraries.
(line 22)
* Logical functions: Integer Logic and Bit Fiddling.
* Lucas number algorithm: Lucas Numbers Algorithm.
(line 6)
* Lucas number functions: Number Theoretic Functions.
- (line 132)
+ (line 147)
* MacOS X: Known Build Problems.
(line 51)
* Mailing lists: Introduction to GMP. (line 45)
* Miscellaneous integer functions: Miscellaneous Integer Functions.
(line 6)
* MMX: Notes for Particular Systems.
- (line 132)
+ (line 133)
* Modular inverse functions: Number Theoretic Functions.
- (line 72)
+ (line 76)
* Most significant bit: Miscellaneous Integer Functions.
(line 34)
-* mp.h: BSD Compatible Functions.
- (line 21)
-* MPN_PATH: Build Options. (line 336)
+* MPN_PATH: Build Options. (line 328)
* MS Windows: Notes for Particular Systems.
(line 43)
* MS-DOS: Notes for Particular Systems.
* NeXT: Known Build Problems.
(line 57)
* Next prime function: Number Theoretic Functions.
- (line 23)
+ (line 25)
* Nomenclature: Nomenclature and Types.
(line 6)
* Non-Unix systems: Build Options. (line 11)
* ostream output: C++ Formatted Output.
(line 6)
* Other languages: Language Bindings. (line 6)
-* Output functions <1>: I/O of Integers. (line 6)
-* Output functions <2>: I/O of Rationals. (line 6)
-* Output functions <3>: Formatted Output Functions.
+* Output functions <1>: Formatted Output Functions.
(line 6)
-* Output functions: I/O of Floats. (line 6)
+* Output functions <2>: I/O of Rationals. (line 6)
+* Output functions <3>: I/O of Floats. (line 6)
+* Output functions: I/O of Integers. (line 6)
* Packaged builds: Notes for Package Builds.
(line 6)
* Parameter conventions: Parameter Conventions.
(line 6)
* Parsing expressions demo: Demonstration Programs.
- (line 21)
+ (line 15)
* Particular systems: Notes for Particular Systems.
(line 6)
* Past GMP versions: Compatibility with older versions.
(line 6)
-* PDF: Build Options. (line 351)
+* PDF: Build Options. (line 343)
* Perfect power algorithm: Perfect Power Algorithm.
(line 6)
-* Perfect power functions: Integer Roots. (line 27)
+* Perfect power functions: Integer Roots. (line 28)
* Perfect square algorithm: Perfect Square Algorithm.
(line 6)
-* Perfect square functions: Integer Roots. (line 36)
+* Perfect square functions: Integer Roots. (line 37)
* perl: Demonstration Programs.
(line 35)
* Perl module: Demonstration Programs.
(line 35)
-* Postscript: Build Options. (line 351)
-* Power/PowerPC <1>: Known Build Problems.
- (line 63)
-* Power/PowerPC: Notes for Particular Systems.
+* Postscript: Build Options. (line 343)
+* Power/PowerPC <1>: Notes for Particular Systems.
(line 92)
+* Power/PowerPC: Known Build Problems.
+ (line 63)
* Powering algorithms: Powering Algorithms. (line 6)
-* Powering functions <1>: Float Arithmetic. (line 41)
-* Powering functions: Integer Exponentiation.
+* Powering functions <1>: Integer Exponentiation.
(line 6)
+* Powering functions: Float Arithmetic. (line 41)
* PowerPC: ABI and ISA. (line 167)
* Precision of floats: Floating-point Functions.
(line 6)
(line 6)
* Prime testing functions: Number Theoretic Functions.
(line 7)
+* Primorial functions: Number Theoretic Functions.
+ (line 121)
* printf formatted output: Formatted Output. (line 6)
* Probable prime testing functions: Number Theoretic Functions.
(line 7)
(line 6)
* Random number algorithms: Random Number Algorithms.
(line 6)
-* Random number functions <1>: Random Number Functions.
- (line 6)
-* Random number functions <2>: Miscellaneous Float Functions.
+* Random number functions <1>: Miscellaneous Float Functions.
(line 27)
+* Random number functions <2>: Random Number Functions.
+ (line 6)
* Random number functions: Integer Random Numbers.
(line 6)
* Random number seeding: Random State Seeding.
* Reentrancy: Reentrancy. (line 6)
* References: References. (line 6)
* Remove factor functions: Number Theoretic Functions.
- (line 103)
+ (line 108)
* Reporting bugs: Reporting Bugs. (line 6)
* Root extraction algorithm: Nth Root Algorithm. (line 6)
* Root extraction algorithms: Root Extraction Algorithms.
(line 6)
-* Root extraction functions <1>: Float Arithmetic. (line 37)
-* Root extraction functions: Integer Roots. (line 6)
-* Root testing functions: Integer Roots. (line 27)
+* Root extraction functions <1>: Integer Roots. (line 6)
+* Root extraction functions: Float Arithmetic. (line 37)
+* Root testing functions: Integer Roots. (line 37)
* Rounding functions: Miscellaneous Float Functions.
(line 9)
* Sample programs: Demonstration Programs.
(line 6)
* Scan bit functions: Integer Logic and Bit Fiddling.
- (line 38)
+ (line 40)
* scanf formatted input: Formatted Input. (line 6)
* SCO: Known Build Problems.
(line 38)
(line 9)
* Sign tests <1>: Integer Comparisons. (line 28)
* Sign tests <2>: Comparing Rationals. (line 27)
-* Sign tests: Float Comparison. (line 33)
+* Sign tests: Float Comparison. (line 35)
* Size in digits: Miscellaneous Integer Functions.
(line 23)
* Small operands: Efficiency. (line 7)
* Solaris <1>: Known Build Problems.
+ (line 72)
+* Solaris <2>: ABI and ISA. (line 199)
+* Solaris: Known Build Problems.
(line 78)
-* Solaris: ABI and ISA. (line 201)
* Sparc: Notes for Particular Systems.
- (line 103)
-* Sparc V9: ABI and ISA. (line 201)
+ (line 109)
+* Sparc V9: ABI and ISA. (line 199)
* Special integer functions: Integer Special Functions.
(line 6)
* Square root algorithm: Square Root Algorithm.
(line 6)
* SSE2: Notes for Particular Systems.
- (line 132)
+ (line 133)
* Stack backtrace: Debugging. (line 50)
-* Stack overflow <1>: Build Options. (line 278)
+* Stack overflow <1>: Build Options. (line 275)
* Stack overflow: Debugging. (line 7)
* Static linking: Efficiency. (line 14)
* stdarg.h: Headers and Libraries.
(line 11)
* Stripped libraries: Known Build Problems.
(line 28)
-* Sun: ABI and ISA. (line 201)
+* Sun: ABI and ISA. (line 199)
* SunOS: Notes for Particular Systems.
- (line 120)
+ (line 121)
* Systems: Notes for Particular Systems.
(line 6)
-* Temporary memory: Build Options. (line 278)
-* Texinfo: Build Options. (line 348)
+* Temporary memory: Build Options. (line 275)
+* Texinfo: Build Options. (line 340)
* Text input/output: Efficiency. (line 153)
* Thread safety: Reentrancy. (line 6)
* Toom multiplication <1>: Other Multiplication.
(line 6)
* Toom multiplication <2>: Toom 3-Way Multiplication.
(line 6)
-* Toom multiplication <3>: Toom 4-Way Multiplication.
+* Toom multiplication <3>: Higher degree Toom'n'half.
(line 6)
-* Toom multiplication: Higher degree Toom'n'half.
+* Toom multiplication: Toom 4-Way Multiplication.
(line 6)
* Types: Nomenclature and Types.
(line 6)
(line 12)
* Web page: Introduction to GMP. (line 34)
* Windows: Notes for Particular Systems.
- (line 43)
+ (line 56)
* x86: Notes for Particular Systems.
- (line 126)
+ (line 127)
* x87: Notes for Particular Systems.
(line 34)
-* XML: Build Options. (line 355)
+* XML: Build Options. (line 347)
\1f
File: gmp.info, Node: Function Index, Prev: Concept Index, Up: Top
(line 12)
* _mpz_realloc: Integer Special Functions.
(line 51)
-* abs <1>: C++ Interface Floats.
- (line 79)
-* abs <2>: C++ Interface Rationals.
- (line 43)
+* abs <1>: C++ Interface Rationals.
+ (line 49)
+* abs <2>: C++ Interface Floats.
+ (line 83)
* abs: C++ Interface Integers.
- (line 42)
+ (line 47)
* ceil: C++ Interface Floats.
- (line 80)
-* cmp <1>: C++ Interface Floats.
- (line 81)
+ (line 84)
+* cmp <1>: C++ Interface Rationals.
+ (line 51)
* cmp <2>: C++ Interface Integers.
- (line 43)
-* cmp <3>: C++ Interface Floats.
- (line 82)
-* cmp <4>: C++ Interface Rationals.
- (line 45)
-* cmp: C++ Interface Integers.
- (line 44)
+ (line 49)
+* cmp <3>: C++ Interface Rationals.
+ (line 50)
+* cmp: C++ Interface Floats.
+ (line 86)
* floor: C++ Interface Floats.
- (line 89)
-* gcd: BSD Compatible Functions.
- (line 82)
+ (line 93)
* gmp_asprintf: Formatted Output Functions.
(line 65)
* gmp_errno: Random State Initialization.
* gmp_randclass: C++ Interface Random Numbers.
(line 7)
* gmp_randclass::get_f: C++ Interface Random Numbers.
- (line 45)
+ (line 46)
* gmp_randclass::get_z_bits: C++ Interface Random Numbers.
(line 38)
* gmp_randclass::get_z_range: C++ Interface Random Numbers.
(line 42)
* gmp_randclass::gmp_randclass: C++ Interface Random Numbers.
- (line 27)
+ (line 13)
* gmp_randclass::seed: C++ Interface Random Numbers.
- (line 34)
+ (line 33)
* gmp_randclear: Random State Initialization.
(line 62)
* gmp_randinit: Random State Initialization.
* gmp_randinit_set: Random State Initialization.
(line 43)
* gmp_randseed: Random State Seeding.
- (line 7)
+ (line 8)
* gmp_randseed_ui: Random State Seeding.
- (line 9)
+ (line 10)
* gmp_randstate_t: Nomenclature and Types.
(line 46)
* gmp_scanf: Formatted Input Functions.
* gmp_vsscanf: Formatted Input Functions.
(line 31)
* hypot: C++ Interface Floats.
- (line 90)
-* itom: BSD Compatible Functions.
- (line 29)
-* madd: BSD Compatible Functions.
- (line 43)
-* mcmp: BSD Compatible Functions.
- (line 85)
-* mdiv: BSD Compatible Functions.
- (line 53)
-* mfree: BSD Compatible Functions.
- (line 105)
-* min: BSD Compatible Functions.
- (line 89)
-* MINT: BSD Compatible Functions.
- (line 21)
-* mout: BSD Compatible Functions.
(line 94)
-* move: BSD Compatible Functions.
- (line 39)
* mp_bitcnt_t: Nomenclature and Types.
(line 42)
* mp_bits_per_limb: Useful Macros and Constants.
(line 7)
* mp_exp_t: Nomenclature and Types.
(line 27)
-* mp_get_memory_functions: Custom Allocation. (line 93)
+* mp_get_memory_functions: Custom Allocation. (line 90)
* mp_limb_t: Nomenclature and Types.
(line 31)
-* mp_set_memory_functions: Custom Allocation. (line 21)
+* mp_set_memory_functions: Custom Allocation. (line 18)
* mp_size_t: Nomenclature and Types.
(line 37)
* mpf_abs: Float Arithmetic. (line 47)
* mpf_class: C++ Interface General.
(line 20)
* mpf_class::fits_sint_p: C++ Interface Floats.
- (line 83)
+ (line 87)
* mpf_class::fits_slong_p: C++ Interface Floats.
- (line 84)
+ (line 88)
* mpf_class::fits_sshort_p: C++ Interface Floats.
- (line 85)
+ (line 89)
* mpf_class::fits_uint_p: C++ Interface Floats.
- (line 86)
+ (line 90)
* mpf_class::fits_ulong_p: C++ Interface Floats.
- (line 87)
+ (line 91)
* mpf_class::fits_ushort_p: C++ Interface Floats.
- (line 88)
+ (line 92)
* mpf_class::get_d: C++ Interface Floats.
- (line 91)
+ (line 95)
* mpf_class::get_mpf_t: C++ Interface General.
(line 66)
* mpf_class::get_prec: C++ Interface Floats.
- (line 109)
+ (line 115)
* mpf_class::get_si: C++ Interface Floats.
- (line 92)
+ (line 96)
* mpf_class::get_str: C++ Interface Floats.
- (line 94)
+ (line 98)
* mpf_class::get_ui: C++ Interface Floats.
- (line 95)
+ (line 99)
* mpf_class::mpf_class: C++ Interface Floats.
(line 12)
* mpf_class::operator=: C++ Interface Floats.
- (line 56)
+ (line 60)
* mpf_class::set_prec: C++ Interface Floats.
- (line 110)
+ (line 116)
* mpf_class::set_prec_raw: C++ Interface Floats.
- (line 111)
+ (line 117)
* mpf_class::set_str: C++ Interface Floats.
- (line 97)
+ (line 101)
+* mpf_class::swap: C++ Interface Floats.
+ (line 104)
* mpf_clear: Initializing Floats. (line 37)
* mpf_clears: Initializing Floats. (line 41)
* mpf_cmp: Float Comparison. (line 7)
* mpf_cmp_si: Float Comparison. (line 10)
* mpf_cmp_ui: Float Comparison. (line 9)
* mpf_div: Float Arithmetic. (line 29)
-* mpf_div_2exp: Float Arithmetic. (line 53)
+* mpf_div_2exp: Float Arithmetic. (line 55)
* mpf_div_ui: Float Arithmetic. (line 33)
-* mpf_eq: Float Comparison. (line 17)
+* mpf_eq: Float Comparison. (line 18)
* mpf_fits_sint_p: Miscellaneous Float Functions.
(line 20)
* mpf_fits_slong_p: Miscellaneous Float Functions.
* mpf_floor: Miscellaneous Float Functions.
(line 8)
* mpf_get_d: Converting Floats. (line 7)
-* mpf_get_d_2exp: Converting Floats. (line 16)
+* mpf_get_d_2exp: Converting Floats. (line 17)
* mpf_get_default_prec: Initializing Floats. (line 12)
* mpf_get_prec: Initializing Floats. (line 62)
-* mpf_get_si: Converting Floats. (line 27)
-* mpf_get_str: Converting Floats. (line 37)
-* mpf_get_ui: Converting Floats. (line 28)
+* mpf_get_si: Converting Floats. (line 28)
+* mpf_get_str: Converting Floats. (line 38)
+* mpf_get_ui: Converting Floats. (line 29)
* mpf_init: Initializing Floats. (line 19)
* mpf_init2: Initializing Floats. (line 26)
* mpf_init_set: Simultaneous Float Init & Assign.
* mpf_init_set_si: Simultaneous Float Init & Assign.
(line 18)
* mpf_init_set_str: Simultaneous Float Init & Assign.
- (line 25)
+ (line 26)
* mpf_init_set_ui: Simultaneous Float Init & Assign.
(line 17)
* mpf_inits: Initializing Floats. (line 31)
* mpf_integer_p: Miscellaneous Float Functions.
(line 14)
* mpf_mul: Float Arithmetic. (line 19)
-* mpf_mul_2exp: Float Arithmetic. (line 50)
+* mpf_mul_2exp: Float Arithmetic. (line 51)
* mpf_mul_ui: Float Arithmetic. (line 21)
* mpf_neg: Float Arithmetic. (line 44)
* mpf_out_str: I/O of Floats. (line 19)
* mpf_pow_ui: Float Arithmetic. (line 41)
* mpf_random2: Miscellaneous Float Functions.
(line 37)
-* mpf_reldiff: Float Comparison. (line 29)
+* mpf_reldiff: Float Comparison. (line 31)
* mpf_set: Assigning Floats. (line 10)
* mpf_set_d: Assigning Floats. (line 13)
* mpf_set_default_prec: Initializing Floats. (line 7)
* mpf_set_str: Assigning Floats. (line 18)
* mpf_set_ui: Assigning Floats. (line 11)
* mpf_set_z: Assigning Floats. (line 14)
-* mpf_sgn: Float Comparison. (line 33)
+* mpf_sgn: Float Comparison. (line 35)
* mpf_sqrt: Float Arithmetic. (line 36)
* mpf_sqrt_ui: Float Arithmetic. (line 37)
* mpf_sub: Float Arithmetic. (line 12)
* mpn_xnor_n: Low-level Functions. (line 462)
* mpn_xor_n: Low-level Functions. (line 437)
* mpn_zero: Low-level Functions. (line 479)
-* mpq_abs: Rational Arithmetic. (line 31)
-* mpq_add: Rational Arithmetic. (line 7)
+* mpq_abs: Rational Arithmetic. (line 34)
+* mpq_add: Rational Arithmetic. (line 8)
* mpq_canonicalize: Rational Number Functions.
(line 22)
* mpq_class: C++ Interface General.
(line 19)
* mpq_class::canonicalize: C++ Interface Rationals.
- (line 37)
+ (line 43)
* mpq_class::get_d: C++ Interface Rationals.
- (line 46)
+ (line 52)
* mpq_class::get_den: C++ Interface Rationals.
- (line 58)
+ (line 66)
* mpq_class::get_den_mpz_t: C++ Interface Rationals.
- (line 68)
+ (line 76)
* mpq_class::get_mpq_t: C++ Interface General.
(line 65)
* mpq_class::get_num: C++ Interface Rationals.
- (line 57)
+ (line 65)
* mpq_class::get_num_mpz_t: C++ Interface Rationals.
- (line 67)
+ (line 75)
* mpq_class::get_str: C++ Interface Rationals.
- (line 47)
+ (line 53)
* mpq_class::mpq_class: C++ Interface Rationals.
- (line 30)
+ (line 23)
* mpq_class::set_str: C++ Interface Rationals.
- (line 48)
+ (line 54)
+* mpq_class::swap: C++ Interface Rationals.
+ (line 57)
* mpq_clear: Initializing Rationals.
(line 16)
* mpq_clears: Initializing Rationals.
* mpq_cmp_ui: Comparing Rationals. (line 15)
* mpq_denref: Applying Integer Functions.
(line 18)
-* mpq_div: Rational Arithmetic. (line 22)
-* mpq_div_2exp: Rational Arithmetic. (line 25)
+* mpq_div: Rational Arithmetic. (line 24)
+* mpq_div_2exp: Rational Arithmetic. (line 28)
* mpq_equal: Comparing Rationals. (line 33)
* mpq_get_d: Rational Conversions.
(line 7)
(line 7)
* mpq_inits: Initializing Rationals.
(line 12)
-* mpq_inp_str: I/O of Rationals. (line 26)
-* mpq_inv: Rational Arithmetic. (line 34)
-* mpq_mul: Rational Arithmetic. (line 15)
-* mpq_mul_2exp: Rational Arithmetic. (line 18)
-* mpq_neg: Rational Arithmetic. (line 28)
+* mpq_inp_str: I/O of Rationals. (line 27)
+* mpq_inv: Rational Arithmetic. (line 37)
+* mpq_mul: Rational Arithmetic. (line 16)
+* mpq_mul_2exp: Rational Arithmetic. (line 20)
+* mpq_neg: Rational Arithmetic. (line 31)
* mpq_numref: Applying Integer Functions.
(line 17)
-* mpq_out_str: I/O of Rationals. (line 18)
+* mpq_out_str: I/O of Rationals. (line 19)
* mpq_set: Initializing Rationals.
(line 24)
* mpq_set_d: Rational Conversions.
* mpq_set_z: Initializing Rationals.
(line 25)
* mpq_sgn: Comparing Rationals. (line 27)
-* mpq_sub: Rational Arithmetic. (line 11)
+* mpq_sub: Rational Arithmetic. (line 12)
* mpq_swap: Initializing Rationals.
(line 56)
* mpq_t: Nomenclature and Types.
(line 16)
-* mpz_abs: Integer Arithmetic. (line 42)
+* mpz_2fac_ui: Number Theoretic Functions.
+ (line 114)
+* mpz_abs: Integer Arithmetic. (line 45)
* mpz_add: Integer Arithmetic. (line 7)
* mpz_add_ui: Integer Arithmetic. (line 9)
-* mpz_addmul: Integer Arithmetic. (line 25)
-* mpz_addmul_ui: Integer Arithmetic. (line 27)
+* mpz_addmul: Integer Arithmetic. (line 26)
+* mpz_addmul_ui: Integer Arithmetic. (line 28)
* mpz_and: Integer Logic and Bit Fiddling.
(line 11)
* mpz_array_init: Integer Special Functions.
(line 11)
* mpz_bin_ui: Number Theoretic Functions.
- (line 111)
+ (line 126)
* mpz_bin_uiui: Number Theoretic Functions.
- (line 113)
+ (line 128)
* mpz_cdiv_q: Integer Division. (line 13)
-* mpz_cdiv_q_2exp: Integer Division. (line 24)
-* mpz_cdiv_q_ui: Integer Division. (line 17)
-* mpz_cdiv_qr: Integer Division. (line 15)
-* mpz_cdiv_qr_ui: Integer Division. (line 21)
+* mpz_cdiv_q_2exp: Integer Division. (line 26)
+* mpz_cdiv_q_ui: Integer Division. (line 18)
+* mpz_cdiv_qr: Integer Division. (line 16)
+* mpz_cdiv_qr_ui: Integer Division. (line 22)
* mpz_cdiv_r: Integer Division. (line 14)
-* mpz_cdiv_r_2exp: Integer Division. (line 25)
-* mpz_cdiv_r_ui: Integer Division. (line 19)
-* mpz_cdiv_ui: Integer Division. (line 23)
+* mpz_cdiv_r_2exp: Integer Division. (line 28)
+* mpz_cdiv_r_ui: Integer Division. (line 20)
+* mpz_cdiv_ui: Integer Division. (line 24)
* mpz_class: C++ Interface General.
(line 18)
* mpz_class::fits_sint_p: C++ Interface Integers.
- (line 45)
+ (line 50)
* mpz_class::fits_slong_p: C++ Interface Integers.
- (line 46)
+ (line 51)
* mpz_class::fits_sshort_p: C++ Interface Integers.
- (line 47)
+ (line 52)
* mpz_class::fits_uint_p: C++ Interface Integers.
- (line 48)
+ (line 53)
* mpz_class::fits_ulong_p: C++ Interface Integers.
- (line 49)
+ (line 54)
* mpz_class::fits_ushort_p: C++ Interface Integers.
- (line 50)
+ (line 55)
* mpz_class::get_d: C++ Interface Integers.
- (line 51)
+ (line 56)
* mpz_class::get_mpz_t: C++ Interface General.
(line 64)
* mpz_class::get_si: C++ Interface Integers.
- (line 52)
+ (line 57)
* mpz_class::get_str: C++ Interface Integers.
- (line 53)
+ (line 58)
* mpz_class::get_ui: C++ Interface Integers.
- (line 54)
+ (line 59)
* mpz_class::mpz_class: C++ Interface Integers.
- (line 20)
+ (line 7)
* mpz_class::set_str: C++ Interface Integers.
- (line 55)
+ (line 60)
+* mpz_class::swap: C++ Interface Integers.
+ (line 64)
* mpz_clear: Initializing Integers.
- (line 44)
+ (line 49)
* mpz_clears: Initializing Integers.
- (line 48)
+ (line 53)
* mpz_clrbit: Integer Logic and Bit Fiddling.
- (line 54)
+ (line 56)
* mpz_cmp: Integer Comparisons. (line 7)
* mpz_cmp_d: Integer Comparisons. (line 8)
* mpz_cmp_si: Integer Comparisons. (line 9)
* mpz_com: Integer Logic and Bit Fiddling.
(line 20)
* mpz_combit: Integer Logic and Bit Fiddling.
- (line 57)
-* mpz_congruent_2exp_p: Integer Division. (line 124)
-* mpz_congruent_p: Integer Division. (line 121)
-* mpz_congruent_ui_p: Integer Division. (line 123)
-* mpz_divexact: Integer Division. (line 101)
-* mpz_divexact_ui: Integer Division. (line 102)
-* mpz_divisible_2exp_p: Integer Division. (line 112)
-* mpz_divisible_p: Integer Division. (line 110)
-* mpz_divisible_ui_p: Integer Division. (line 111)
+ (line 59)
+* mpz_congruent_2exp_p: Integer Division. (line 137)
+* mpz_congruent_p: Integer Division. (line 133)
+* mpz_congruent_ui_p: Integer Division. (line 135)
+* mpz_divexact: Integer Division. (line 110)
+* mpz_divexact_ui: Integer Division. (line 112)
+* mpz_divisible_2exp_p: Integer Division. (line 123)
+* mpz_divisible_p: Integer Division. (line 120)
+* mpz_divisible_ui_p: Integer Division. (line 122)
* mpz_even_p: Miscellaneous Integer Functions.
(line 18)
* mpz_export: Integer Import and Export.
(line 45)
* mpz_fac_ui: Number Theoretic Functions.
- (line 108)
-* mpz_fdiv_q: Integer Division. (line 27)
-* mpz_fdiv_q_2exp: Integer Division. (line 38)
-* mpz_fdiv_q_ui: Integer Division. (line 31)
-* mpz_fdiv_qr: Integer Division. (line 29)
-* mpz_fdiv_qr_ui: Integer Division. (line 35)
-* mpz_fdiv_r: Integer Division. (line 28)
-* mpz_fdiv_r_2exp: Integer Division. (line 39)
-* mpz_fdiv_r_ui: Integer Division. (line 33)
-* mpz_fdiv_ui: Integer Division. (line 37)
+ (line 113)
+* mpz_fdiv_q: Integer Division. (line 30)
+* mpz_fdiv_q_2exp: Integer Division. (line 43)
+* mpz_fdiv_q_ui: Integer Division. (line 35)
+* mpz_fdiv_qr: Integer Division. (line 33)
+* mpz_fdiv_qr_ui: Integer Division. (line 39)
+* mpz_fdiv_r: Integer Division. (line 31)
+* mpz_fdiv_r_2exp: Integer Division. (line 45)
+* mpz_fdiv_r_ui: Integer Division. (line 37)
+* mpz_fdiv_ui: Integer Division. (line 41)
* mpz_fib2_ui: Number Theoretic Functions.
- (line 121)
+ (line 136)
* mpz_fib_ui: Number Theoretic Functions.
- (line 119)
+ (line 134)
* mpz_fits_sint_p: Miscellaneous Integer Functions.
(line 10)
* mpz_fits_slong_p: Miscellaneous Integer Functions.
* mpz_fits_ushort_p: Miscellaneous Integer Functions.
(line 11)
* mpz_gcd: Number Theoretic Functions.
- (line 30)
+ (line 32)
* mpz_gcd_ui: Number Theoretic Functions.
- (line 37)
+ (line 39)
* mpz_gcdext: Number Theoretic Functions.
- (line 47)
+ (line 49)
* mpz_get_d: Converting Integers. (line 27)
-* mpz_get_d_2exp: Converting Integers. (line 35)
+* mpz_get_d_2exp: Converting Integers. (line 36)
* mpz_get_si: Converting Integers. (line 18)
-* mpz_get_str: Converting Integers. (line 46)
+* mpz_get_str: Converting Integers. (line 47)
* mpz_get_ui: Converting Integers. (line 11)
* mpz_getlimbn: Integer Special Functions.
(line 60)
* mpz_init_set_si: Simultaneous Integer Init & Assign.
(line 29)
* mpz_init_set_str: Simultaneous Integer Init & Assign.
- (line 34)
+ (line 35)
* mpz_init_set_ui: Simultaneous Integer Init & Assign.
(line 28)
* mpz_inits: Initializing Integers.
(line 29)
-* mpz_inp_raw: I/O of Integers. (line 61)
-* mpz_inp_str: I/O of Integers. (line 30)
+* mpz_inp_raw: I/O of Integers. (line 62)
+* mpz_inp_str: I/O of Integers. (line 31)
* mpz_invert: Number Theoretic Functions.
- (line 72)
+ (line 76)
* mpz_ior: Integer Logic and Bit Fiddling.
(line 14)
* mpz_jacobi: Number Theoretic Functions.
- (line 79)
+ (line 83)
* mpz_kronecker: Number Theoretic Functions.
- (line 87)
+ (line 91)
* mpz_kronecker_si: Number Theoretic Functions.
- (line 88)
+ (line 92)
* mpz_kronecker_ui: Number Theoretic Functions.
- (line 89)
+ (line 93)
* mpz_lcm: Number Theoretic Functions.
- (line 66)
+ (line 68)
* mpz_lcm_ui: Number Theoretic Functions.
- (line 67)
+ (line 70)
* mpz_legendre: Number Theoretic Functions.
- (line 82)
+ (line 86)
* mpz_lucnum2_ui: Number Theoretic Functions.
- (line 132)
+ (line 147)
* mpz_lucnum_ui: Number Theoretic Functions.
- (line 130)
-* mpz_mod: Integer Division. (line 91)
-* mpz_mod_ui: Integer Division. (line 93)
+ (line 145)
+* mpz_mfac_uiui: Number Theoretic Functions.
+ (line 116)
+* mpz_mod: Integer Division. (line 100)
+* mpz_mod_ui: Integer Division. (line 102)
* mpz_mul: Integer Arithmetic. (line 19)
-* mpz_mul_2exp: Integer Arithmetic. (line 35)
+* mpz_mul_2exp: Integer Arithmetic. (line 38)
* mpz_mul_si: Integer Arithmetic. (line 20)
* mpz_mul_ui: Integer Arithmetic. (line 22)
-* mpz_neg: Integer Arithmetic. (line 39)
+* mpz_neg: Integer Arithmetic. (line 42)
* mpz_nextprime: Number Theoretic Functions.
- (line 23)
+ (line 25)
* mpz_odd_p: Miscellaneous Integer Functions.
(line 17)
-* mpz_out_raw: I/O of Integers. (line 45)
-* mpz_out_str: I/O of Integers. (line 18)
-* mpz_perfect_power_p: Integer Roots. (line 27)
-* mpz_perfect_square_p: Integer Roots. (line 36)
+* mpz_out_raw: I/O of Integers. (line 46)
+* mpz_out_str: I/O of Integers. (line 19)
+* mpz_perfect_power_p: Integer Roots. (line 28)
+* mpz_perfect_square_p: Integer Roots. (line 37)
* mpz_popcount: Integer Logic and Bit Fiddling.
(line 23)
* mpz_pow_ui: Integer Exponentiation.
(line 18)
* mpz_powm_ui: Integer Exponentiation.
(line 10)
+* mpz_primorial_ui: Number Theoretic Functions.
+ (line 121)
* mpz_probab_prime_p: Number Theoretic Functions.
(line 7)
* mpz_random: Integer Random Numbers.
* mpz_random2: Integer Random Numbers.
(line 51)
* mpz_realloc2: Initializing Integers.
- (line 52)
+ (line 57)
* mpz_remove: Number Theoretic Functions.
- (line 103)
-* mpz_root: Integer Roots. (line 7)
-* mpz_rootrem: Integer Roots. (line 13)
+ (line 108)
+* mpz_root: Integer Roots. (line 8)
+* mpz_rootrem: Integer Roots. (line 14)
* mpz_rrandomb: Integer Random Numbers.
(line 31)
* mpz_scan0: Integer Logic and Bit Fiddling.
- (line 37)
-* mpz_scan1: Integer Logic and Bit Fiddling.
(line 38)
+* mpz_scan1: Integer Logic and Bit Fiddling.
+ (line 40)
* mpz_set: Assigning Integers. (line 10)
* mpz_set_d: Assigning Integers. (line 13)
* mpz_set_f: Assigning Integers. (line 15)
* mpz_set_str: Assigning Integers. (line 21)
* mpz_set_ui: Assigning Integers. (line 11)
* mpz_setbit: Integer Logic and Bit Fiddling.
- (line 51)
+ (line 53)
* mpz_sgn: Integer Comparisons. (line 28)
* mpz_si_kronecker: Number Theoretic Functions.
- (line 90)
+ (line 94)
* mpz_size: Integer Special Functions.
(line 68)
* mpz_sizeinbase: Miscellaneous Integer Functions.
(line 23)
-* mpz_sqrt: Integer Roots. (line 17)
-* mpz_sqrtrem: Integer Roots. (line 20)
+* mpz_sqrt: Integer Roots. (line 18)
+* mpz_sqrtrem: Integer Roots. (line 21)
* mpz_sub: Integer Arithmetic. (line 12)
* mpz_sub_ui: Integer Arithmetic. (line 14)
-* mpz_submul: Integer Arithmetic. (line 30)
-* mpz_submul_ui: Integer Arithmetic. (line 32)
+* mpz_submul: Integer Arithmetic. (line 32)
+* mpz_submul_ui: Integer Arithmetic. (line 34)
* mpz_swap: Assigning Integers. (line 37)
* mpz_t: Nomenclature and Types.
(line 6)
-* mpz_tdiv_q: Integer Division. (line 41)
-* mpz_tdiv_q_2exp: Integer Division. (line 52)
-* mpz_tdiv_q_ui: Integer Division. (line 45)
-* mpz_tdiv_qr: Integer Division. (line 43)
-* mpz_tdiv_qr_ui: Integer Division. (line 49)
-* mpz_tdiv_r: Integer Division. (line 42)
-* mpz_tdiv_r_2exp: Integer Division. (line 53)
-* mpz_tdiv_r_ui: Integer Division. (line 47)
-* mpz_tdiv_ui: Integer Division. (line 51)
+* mpz_tdiv_q: Integer Division. (line 47)
+* mpz_tdiv_q_2exp: Integer Division. (line 60)
+* mpz_tdiv_q_ui: Integer Division. (line 52)
+* mpz_tdiv_qr: Integer Division. (line 50)
+* mpz_tdiv_qr_ui: Integer Division. (line 56)
+* mpz_tdiv_r: Integer Division. (line 48)
+* mpz_tdiv_r_2exp: Integer Division. (line 62)
+* mpz_tdiv_r_ui: Integer Division. (line 54)
+* mpz_tdiv_ui: Integer Division. (line 58)
* mpz_tstbit: Integer Logic and Bit Fiddling.
- (line 60)
+ (line 62)
* mpz_ui_kronecker: Number Theoretic Functions.
- (line 91)
+ (line 95)
* mpz_ui_pow_ui: Integer Exponentiation.
(line 33)
* mpz_ui_sub: Integer Arithmetic. (line 16)
(line 23)
* mpz_xor: Integer Logic and Bit Fiddling.
(line 17)
-* msqrt: BSD Compatible Functions.
- (line 63)
-* msub: BSD Compatible Functions.
- (line 46)
-* mtox: BSD Compatible Functions.
- (line 98)
-* mult: BSD Compatible Functions.
- (line 49)
-* operator%: C++ Interface Integers.
+* operator"" <1>: C++ Interface Integers.
(line 30)
+* operator"" <2>: C++ Interface Floats.
+ (line 56)
+* operator"": C++ Interface Rationals.
+ (line 38)
+* operator%: C++ Interface Integers.
+ (line 35)
* operator/: C++ Interface Integers.
- (line 29)
+ (line 34)
* operator<<: C++ Formatted Output.
- (line 11)
-* operator>> <1>: C++ Formatted Input. (line 11)
-* operator>> <2>: C++ Interface Rationals.
- (line 77)
-* operator>>: C++ Formatted Input. (line 14)
-* pow: BSD Compatible Functions.
- (line 71)
-* rpow: BSD Compatible Functions.
- (line 79)
-* sdiv: BSD Compatible Functions.
- (line 55)
+ (line 20)
+* operator>> <1>: C++ Interface Rationals.
+ (line 85)
+* operator>>: C++ Formatted Input. (line 25)
* sgn <1>: C++ Interface Rationals.
- (line 50)
+ (line 56)
* sgn <2>: C++ Interface Integers.
- (line 57)
+ (line 62)
* sgn: C++ Interface Floats.
- (line 98)
-* sqrt <1>: C++ Interface Floats.
- (line 99)
-* sqrt: C++ Interface Integers.
+ (line 102)
+* sqrt <1>: C++ Interface Integers.
+ (line 63)
+* sqrt: C++ Interface Floats.
+ (line 103)
+* swap <1>: C++ Interface Floats.
+ (line 105)
+* swap <2>: C++ Interface Integers.
+ (line 65)
+* swap: C++ Interface Rationals.
(line 58)
* trunc: C++ Interface Floats.
- (line 100)
-* xtom: BSD Compatible Functions.
- (line 34)
+ (line 106)
arithmetic library, version @value{VERSION}.
Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software
+2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software
Foundation, Inc.
Permission is granted to copy, distribute and/or modify this document under
* Formatted Output:: @code{printf} style output.
* Formatted Input:: @code{scanf} style input.
* C++ Class Interface:: Class wrappers around GMP types.
-* BSD Compatible Functions:: All functions found in BSD MP.
* Custom Allocation:: How to customize the internal allocation.
* Language Bindings:: Using GMP from other languages.
* Algorithms:: What happens behind the scenes.
other members, older or newer. The best idea is always to build GMP for the
exact machine type you intend to run it on.
-The following CPUs have specific support. See @file{configure.in} for details
+The following CPUs have specific support. See @file{configure.ac} for details
of what code and compiler options they select.
@itemize @bullet
@item Generic C Build
@cindex Generic C
If some of the assembly code causes problems, or if otherwise desired, the
-generic C code can be selected with CPU @samp{none}. For example,
-
-@example
-./configure --host=none-unknown-freebsd3.5
-@end example
+generic C code can be selected with the configure @option{--disable-assembly}.
Note that this will run quite slowly, but it should be portable and should at
least make it possible to get something running if all else fails.
Compiling is done with both @samp{CPPFLAGS} and @samp{CFLAGS}, but
preprocessing uses just @samp{CPPFLAGS}. This distinction is because most
preprocessors won't accept all the flags the compiler does. Preprocessing is
-done separately in some configure tests, and in the @samp{ansi2knr} support
-for K&R compilers.
+done separately in some configure tests.
@item @option{CC_FOR_BUILD}
@cindex @code{CC_FOR_BUILD}
Toom, and Fermat FFT@. The FFT is only used on large to very large operands
and can be disabled to save code size if desired.
-@item Berkeley MP, @option{--enable-mpbsd}
-@cindex Berkeley MP compatible functions
-@cindex BSD MP compatible functions
-@cindex @code{--enable-mpbsd}
-The Berkeley MP compatibility library (@file{libmp}) and header file
-(@file{mp.h}) are built and installed only if @option{--enable-mpbsd} is used.
-@xref{BSD Compatible Functions}.
-
@item Assertion Checking, @option{--enable-assert}
@cindex Assertion checking
@cindex @code{--enable-assert}
@item PowerPC 64 (@samp{powerpc64}, @samp{powerpc620}, @samp{powerpc630}, @samp{powerpc970}, @samp{power4}, @samp{power5})
@cindex PowerPC
@table @asis
-@item @samp{ABI=aix64}
+@item @samp{ABI=mode64}
@cindex AIX
The AIX 64 ABI uses 64-bit limbs and pointers and is the default on PowerPC 64
@samp{*-*-aix*} systems. Applications must be compiled with
xlc -q64
@end example
-@item @samp{ABI=mode64}
-The @samp{mode64} ABI uses 64-bit limbs and pointers, and is the default on
-64-bit GNU/Linux, BSD, and Mac OS X/Darwin systems. Applications must be
-compiled with
+On 64-bit GNU/Linux, BSD, and Mac OS X/Darwin systems, the applications must
+be compiled with
@example
gcc -m64
@end example
@item @samp{ABI=mode32}
-@cindex AIX
The @samp{mode32} ABI uses a 64-bit @code{long long} limb but with the chip
still in 32-bit mode and using 32-bit calling conventions. This is the default
for systems where the true 64-bit ABI is unavailable. No special compiler
-options are typically needed for applications.
+options are typically needed for applications. This ABI is not available under
+AIX.
@item @samp{ABI=32}
This is the basic 32-bit PowerPC ABI, with a 32-bit limb. No special compiler
options are needed for applications.
@end table
-GMP's speed is greatest for @samp{aix64} and @samp{mode64}. In @samp{ABI=32}
-only the 32-bit ISA is used and this doesn't make full use of a 64-bit chip.
-On a suitable system we could perhaps use more of the ISA, but there are no
-plans to do so.
+GMP's speed is greatest for the @samp{mode64} ABI, the @samp{mode32} ABI is 2nd
+best. In @samp{ABI=32} only the 32-bit ISA is used and this doesn't make full
+use of a 64-bit chip.
@sp 1
@need 1000
not available on the other, so it's important to choose the right one for the
CPU that will be used. Currently GMP has no assembly code support for using
just the common instruction subset. To get executables that run on both, the
-current suggestion is to use the generic C code (CPU @samp{none}), possibly
-with appropriate compiler options (like @samp{-mcpu=common} for
+current suggestion is to use the generic C code (@option{--disable-assembly}),
+possibly with appropriate compiler options (like @samp{-mcpu=common} for
@command{gcc}). CPU @samp{rs6000} (which is not a CPU but a family of
workstations) is accepted by @file{config.sub}, but is currently equivalent to
-@samp{none}.
+@option{--disable-assembly}.
@item Sparc CPU Types
@cindex Sparc
This makes that code unsuitable for use with the special V9
@samp{-mcmodel=embmedany} (which uses @code{g4} as a data segment pointer), and
for applications wanting to use those registers for special purposes. In these
-cases the only suggestion currently is to build GMP with CPU @samp{none} to
-avoid the assembly code.
+cases the only suggestion currently is to build GMP with
+@option{--disable-assembly} to avoid the assembly code.
@item SunOS 4
@cindex SunOS
@code{mpf_}. The associated type is @code{mpf_t}. There are about 60
functions is this class. (@pxref{Floating-point Functions})
-@item
-Functions compatible with Berkeley MP, such as @code{itom}, @code{madd}, and
-@code{mult}. The associated type is @code{MINT}. (@pxref{BSD Compatible
-Functions})
-
@item
Fast low-level functions that operate on natural numbers. These are used by
the functions in the preceding groups, and you can also call them directly
@findex gmp_version
The GMP version number, as a null-terminated string, in the form ``i.j.k''.
This release is @nicode{"@value{VERSION}"}. Note that the format ``i.j'' was
-used when k was zero was used before version 4.3.0.
+used, before version 4.3.0, when k was zero.
@end deftypevr
@defmac __GMP_CC
@cindex Past GMP versions
@cindex Upward compatibility
-This version of GMP is upwardly binary compatible with all 4.x and 3.x
+This version of GMP is upwardly binary compatible with all 5.x, 4.x, and 3.x
versions, and upwardly compatible at the source level with all 2.x versions,
with the following exceptions.
@item
@code{mpf_get_prec} counted precision slightly differently in GMP 3.0 and
3.0.1, but in 3.1 reverted to the 2.x style.
+
+@item
+@code{mpn_bdivmod}, documented as preliminary in GMP 4, has been removed.
@end itemize
There are a number of compatibility issues between GMP 1 and GMP 2 that of
-course also apply when porting applications from GMP 1 to GMP 4. Please
+course also apply when porting applications from GMP 1 to GMP 5. Please
see the GMP 2 manual for details.
-The Berkeley MP compatibility library (@pxref{BSD Compatible Functions}) is
-source and binary compatible with the standard @file{libmp}.
-
-@c @enumerate
@c @item Integer division functions round the result differently. The obsolete
@c functions (@code{mpz_div}, @code{mpz_divmod}, @code{mpz_mdiv},
@c @code{mpz_mdivmod}, etc) now all use floor rounding (i.e., they round the
from @option{--enable-assert} since it adds checks on the parameters of most
such functions, many of which have subtle restrictions on their usage. Note
however that only the generic C code has checks, not the assembly code, so
-CPU @samp{none} should be used for maximum checking.
+@option{--disable-assembly} should be used for maximum checking.
@item Temporary Memory Checking
The build option @option{--enable-alloca=debug} arranges that each block of
@example
./configure --disable-shared --enable-assert \
- --enable-alloca=debug --host=none CFLAGS=-g
+ --enable-alloca=debug --disable-assembly CFLAGS=-g
@end example
For C++, add @samp{--enable-cxx CXXFLAGS=-g}.
@cindex @command{checkergcc}
@example
-./configure --host=none-pc-linux-gnu CC=checkergcc
+./configure --disable-assembly CC=checkergcc
@end example
-@samp{--host=none} must be used, since the GMP assembly code doesn't support
-the checking scheme. The GMP C++ features cannot be used, since current
-versions of checker (0.9.9.1) don't yet support the standard C++ library.
+@option{--disable-assembly} must be used, since the GMP assembly code doesn't
+support the checking scheme. The GMP C++ features cannot be used, since
+current versions of checker (0.9.9.1) don't yet support the standard C++
+library.
@item Valgrind
@cindex Valgrind
-The valgrind program (@uref{http://valgrind.org/}) is a memory
-checker for x86s. It translates and emulates machine instructions to do
+Valgrind (@uref{http://valgrind.org/}) is a memory checker for x86, ARM, MIPS,
+PowerPC, and S/390. It translates and emulates machine instructions to do
strong checks for uninitialized data (at the level of individual bits), memory
accesses through bad pointers, and memory leaks.
-Recent versions of Valgrind are getting support for MMX and SSE/SSE2
-instructions, for past versions GMP will need to be configured not to use
-those, i.e.@: for an x86 without them (for instance plain @samp{i486}).
+Valgrind does not always support every possible instruction, in particular
+ones recently added to an ISA. Valgrind might therefore be incompatible with
+a recent GMP or even a less recent GMP which is compiled using a recent GCC.
GMP's assembly code sometimes promotes a read of the limbs to some larger size,
for efficiency. GMP will do this even at the start and end of a multilimb
-operand, using naturaly aligned operations on the larger type. This may lead
-to benign reads outside of allocated areas, triggering complants from Valgrind.
+operand, using naturally aligned operations on the larger type. This may lead
+to benign reads outside of allocated areas, triggering complaints from
+Valgrind. Valgrind's option @samp{--partial-loads-ok=yes} should help.
@item Other Problems
Any suspected bug in GMP itself should be isolated to make sure it's not an
Please do not send core dumps, executables or @command{strace}s.
@item
-The configuration options you used when building GMP, if any.
+The @samp{configure} options you used when building GMP, if any.
+
+@item
+The output from @samp{configure}, as printed to stdout, with any options used.
@item
The name of the compiler and its version. For @command{gcc}, get the version
Calling this function instead of @code{mpz_init} or @code{mpz_inits} is never
necessary; reallocation is handled automatically by GMP when needed.
-@var{n} is only the initial space, @var{x} will grow automatically in
-the normal way, if necessary, for subsequent values stored. @code{mpz_init2}
-makes it possible to avoid such reallocations if a maximum size is known in
-advance.
+While @var{n} defines the initial space, @var{x} will grow automatically in the
+normal way, if necessary, for subsequent values stored. @code{mpz_init2} makes
+it possible to avoid such reallocations if a maximum size is known in advance.
+
+In preparation for an operation, GMP often allocates one limb more than
+ultimately needed. To make sure GMP will not perform reallocation for
+@var{x}, you need to add the number of bits in @code{mp_limb_t} to @var{n}.
@end deftypefun
@deftypefun void mpz_clear (mpz_t @var{x})
These functions assign new values to already initialized integers
(@pxref{Initializing Integers}).
-@deftypefun void mpz_set (mpz_t @var{rop}, mpz_t @var{op})
+@deftypefun void mpz_set (mpz_t @var{rop}, const mpz_t @var{op})
@deftypefunx void mpz_set_ui (mpz_t @var{rop}, unsigned long int @var{op})
@deftypefunx void mpz_set_si (mpz_t @var{rop}, signed long int @var{op})
@deftypefunx void mpz_set_d (mpz_t @var{rop}, double @var{op})
-@deftypefunx void mpz_set_q (mpz_t @var{rop}, mpq_t @var{op})
-@deftypefunx void mpz_set_f (mpz_t @var{rop}, mpf_t @var{op})
+@deftypefunx void mpz_set_q (mpz_t @var{rop}, const mpq_t @var{op})
+@deftypefunx void mpz_set_f (mpz_t @var{rop}, const mpf_t @var{op})
Set the value of @var{rop} from @var{op}.
@code{mpz_set_d}, @code{mpz_set_q} and @code{mpz_set_f} truncate @var{op} to
make it an integer.
@end deftypefun
-@deftypefun int mpz_set_str (mpz_t @var{rop}, char *@var{str}, int @var{base})
+@deftypefun int mpz_set_str (mpz_t @var{rop}, const char *@var{str}, int @var{base})
Set the value of @var{rop} from @var{str}, a null-terminated C string in base
@var{base}. White space is allowed in the string, and is simply ignored.
integer functions. Don't use an initialize-and-set function on a variable
already initialized!
-@deftypefun void mpz_init_set (mpz_t @var{rop}, mpz_t @var{op})
+@deftypefun void mpz_init_set (mpz_t @var{rop}, const mpz_t @var{op})
@deftypefunx void mpz_init_set_ui (mpz_t @var{rop}, unsigned long int @var{op})
@deftypefunx void mpz_init_set_si (mpz_t @var{rop}, signed long int @var{op})
@deftypefunx void mpz_init_set_d (mpz_t @var{rop}, double @var{op})
@var{op}.
@end deftypefun
-@deftypefun int mpz_init_set_str (mpz_t @var{rop}, char *@var{str}, int @var{base})
+@deftypefun int mpz_init_set_str (mpz_t @var{rop}, const char *@var{str}, int @var{base})
Initialize @var{rop} and set its value like @code{mpz_set_str} (see its
documentation above for details).
types. Functions for converting @emph{to} GMP integers are described in
@ref{Assigning Integers} and @ref{I/O of Integers}.
-@deftypefun {unsigned long int} mpz_get_ui (mpz_t @var{op})
+@deftypefun {unsigned long int} mpz_get_ui (const mpz_t @var{op})
Return the value of @var{op} as an @code{unsigned long}.
If @var{op} is too big to fit an @code{unsigned long} then just the least
only the absolute value is used.
@end deftypefun
-@deftypefun {signed long int} mpz_get_si (mpz_t @var{op})
+@deftypefun {signed long int} mpz_get_si (const mpz_t @var{op})
If @var{op} fits into a @code{signed long int} return the value of @var{op}.
Otherwise return the least significant part of @var{op}, with the same sign
as @var{op}.
the function @code{mpz_fits_slong_p}.
@end deftypefun
-@deftypefun double mpz_get_d (mpz_t @var{op})
+@deftypefun double mpz_get_d (const mpz_t @var{op})
Convert @var{op} to a @code{double}, truncating if necessary (i.e.@: rounding
towards zero).
may or may not occur.
@end deftypefun
-@deftypefun double mpz_get_d_2exp (signed long int *@var{exp}, mpz_t @var{op})
+@deftypefun double mpz_get_d_2exp (signed long int *@var{exp}, const mpz_t @var{op})
Convert @var{op} to a @code{double}, truncating if necessary (i.e.@: rounding
towards zero), and returning the exponent separately.
Functions,,, libc, The GNU C Library Reference Manual}).
@end deftypefun
-@deftypefun {char *} mpz_get_str (char *@var{str}, int @var{base}, mpz_t @var{op})
+@deftypefun {char *} mpz_get_str (char *@var{str}, int @var{base}, const mpz_t @var{op})
Convert @var{op} to a string of digits in base @var{base}. The base argument
may vary from 2 to 62 or from @minus{}2 to @minus{}36.
@cindex Integer arithmetic functions
@cindex Arithmetic functions
-@deftypefun void mpz_add (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
-@deftypefunx void mpz_add_ui (mpz_t @var{rop}, mpz_t @var{op1}, unsigned long int @var{op2})
+@deftypefun void mpz_add (mpz_t @var{rop}, const mpz_t @var{op1}, const mpz_t @var{op2})
+@deftypefunx void mpz_add_ui (mpz_t @var{rop}, const mpz_t @var{op1}, unsigned long int @var{op2})
Set @var{rop} to @math{@var{op1} + @var{op2}}.
@end deftypefun
-@deftypefun void mpz_sub (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
-@deftypefunx void mpz_sub_ui (mpz_t @var{rop}, mpz_t @var{op1}, unsigned long int @var{op2})
-@deftypefunx void mpz_ui_sub (mpz_t @var{rop}, unsigned long int @var{op1}, mpz_t @var{op2})
+@deftypefun void mpz_sub (mpz_t @var{rop}, const mpz_t @var{op1}, const mpz_t @var{op2})
+@deftypefunx void mpz_sub_ui (mpz_t @var{rop}, const mpz_t @var{op1}, unsigned long int @var{op2})
+@deftypefunx void mpz_ui_sub (mpz_t @var{rop}, unsigned long int @var{op1}, const mpz_t @var{op2})
Set @var{rop} to @var{op1} @minus{} @var{op2}.
@end deftypefun
-@deftypefun void mpz_mul (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
-@deftypefunx void mpz_mul_si (mpz_t @var{rop}, mpz_t @var{op1}, long int @var{op2})
-@deftypefunx void mpz_mul_ui (mpz_t @var{rop}, mpz_t @var{op1}, unsigned long int @var{op2})
+@deftypefun void mpz_mul (mpz_t @var{rop}, const mpz_t @var{op1}, const mpz_t @var{op2})
+@deftypefunx void mpz_mul_si (mpz_t @var{rop}, const mpz_t @var{op1}, long int @var{op2})
+@deftypefunx void mpz_mul_ui (mpz_t @var{rop}, const mpz_t @var{op1}, unsigned long int @var{op2})
Set @var{rop} to @math{@var{op1} @GMPtimes{} @var{op2}}.
@end deftypefun
-@deftypefun void mpz_addmul (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
-@deftypefunx void mpz_addmul_ui (mpz_t @var{rop}, mpz_t @var{op1}, unsigned long int @var{op2})
+@deftypefun void mpz_addmul (mpz_t @var{rop}, const mpz_t @var{op1}, const mpz_t @var{op2})
+@deftypefunx void mpz_addmul_ui (mpz_t @var{rop}, const mpz_t @var{op1}, unsigned long int @var{op2})
Set @var{rop} to @math{@var{rop} + @var{op1} @GMPtimes{} @var{op2}}.
@end deftypefun
-@deftypefun void mpz_submul (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
-@deftypefunx void mpz_submul_ui (mpz_t @var{rop}, mpz_t @var{op1}, unsigned long int @var{op2})
+@deftypefun void mpz_submul (mpz_t @var{rop}, const mpz_t @var{op1}, const mpz_t @var{op2})
+@deftypefunx void mpz_submul_ui (mpz_t @var{rop}, const mpz_t @var{op1}, unsigned long int @var{op2})
Set @var{rop} to @math{@var{rop} - @var{op1} @GMPtimes{} @var{op2}}.
@end deftypefun
-@deftypefun void mpz_mul_2exp (mpz_t @var{rop}, mpz_t @var{op1}, mp_bitcnt_t @var{op2})
+@deftypefun void mpz_mul_2exp (mpz_t @var{rop}, const mpz_t @var{op1}, mp_bitcnt_t @var{op2})
@cindex Bit shift left
Set @var{rop} to @m{@var{op1} \times 2^{op2}, @var{op1} times 2 raised to
@var{op2}}. This operation can also be defined as a left shift by @var{op2}
bits.
@end deftypefun
-@deftypefun void mpz_neg (mpz_t @var{rop}, mpz_t @var{op})
+@deftypefun void mpz_neg (mpz_t @var{rop}, const mpz_t @var{op})
Set @var{rop} to @minus{}@var{op}.
@end deftypefun
-@deftypefun void mpz_abs (mpz_t @var{rop}, mpz_t @var{op})
+@deftypefun void mpz_abs (mpz_t @var{rop}, const mpz_t @var{op})
Set @var{rop} to the absolute value of @var{op}.
@end deftypefun
@c between each, and seem to let tex do a better job of page breaks than an
@c @sp 1 in the middle of one big set.
-@deftypefun void mpz_cdiv_q (mpz_t @var{q}, mpz_t @var{n}, mpz_t @var{d})
-@deftypefunx void mpz_cdiv_r (mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})
-@deftypefunx void mpz_cdiv_qr (mpz_t @var{q}, mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})
+@deftypefun void mpz_cdiv_q (mpz_t @var{q}, const mpz_t @var{n}, const mpz_t @var{d})
+@deftypefunx void mpz_cdiv_r (mpz_t @var{r}, const mpz_t @var{n}, const mpz_t @var{d})
+@deftypefunx void mpz_cdiv_qr (mpz_t @var{q}, mpz_t @var{r}, const mpz_t @var{n}, const mpz_t @var{d})
@maybepagebreak
-@deftypefunx {unsigned long int} mpz_cdiv_q_ui (mpz_t @var{q}, mpz_t @var{n}, @w{unsigned long int @var{d}})
-@deftypefunx {unsigned long int} mpz_cdiv_r_ui (mpz_t @var{r}, mpz_t @var{n}, @w{unsigned long int @var{d}})
-@deftypefunx {unsigned long int} mpz_cdiv_qr_ui (mpz_t @var{q}, mpz_t @var{r}, @w{mpz_t @var{n}}, @w{unsigned long int @var{d}})
-@deftypefunx {unsigned long int} mpz_cdiv_ui (mpz_t @var{n}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_cdiv_q_ui (mpz_t @var{q}, const mpz_t @var{n}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_cdiv_r_ui (mpz_t @var{r}, const mpz_t @var{n}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_cdiv_qr_ui (mpz_t @var{q}, mpz_t @var{r}, @w{const mpz_t @var{n}}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_cdiv_ui (const mpz_t @var{n}, @w{unsigned long int @var{d}})
@maybepagebreak
-@deftypefunx void mpz_cdiv_q_2exp (mpz_t @var{q}, mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
-@deftypefunx void mpz_cdiv_r_2exp (mpz_t @var{r}, mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
+@deftypefunx void mpz_cdiv_q_2exp (mpz_t @var{q}, const mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
+@deftypefunx void mpz_cdiv_r_2exp (mpz_t @var{r}, const mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
@end deftypefun
-@deftypefun void mpz_fdiv_q (mpz_t @var{q}, mpz_t @var{n}, mpz_t @var{d})
-@deftypefunx void mpz_fdiv_r (mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})
-@deftypefunx void mpz_fdiv_qr (mpz_t @var{q}, mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})
+@deftypefun void mpz_fdiv_q (mpz_t @var{q}, const mpz_t @var{n}, const mpz_t @var{d})
+@deftypefunx void mpz_fdiv_r (mpz_t @var{r}, const mpz_t @var{n}, const mpz_t @var{d})
+@deftypefunx void mpz_fdiv_qr (mpz_t @var{q}, mpz_t @var{r}, const mpz_t @var{n}, const mpz_t @var{d})
@maybepagebreak
-@deftypefunx {unsigned long int} mpz_fdiv_q_ui (mpz_t @var{q}, mpz_t @var{n}, @w{unsigned long int @var{d}})
-@deftypefunx {unsigned long int} mpz_fdiv_r_ui (mpz_t @var{r}, mpz_t @var{n}, @w{unsigned long int @var{d}})
-@deftypefunx {unsigned long int} mpz_fdiv_qr_ui (mpz_t @var{q}, mpz_t @var{r}, @w{mpz_t @var{n}}, @w{unsigned long int @var{d}})
-@deftypefunx {unsigned long int} mpz_fdiv_ui (mpz_t @var{n}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_fdiv_q_ui (mpz_t @var{q}, const mpz_t @var{n}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_fdiv_r_ui (mpz_t @var{r}, const mpz_t @var{n}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_fdiv_qr_ui (mpz_t @var{q}, mpz_t @var{r}, @w{const mpz_t @var{n}}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_fdiv_ui (const mpz_t @var{n}, @w{unsigned long int @var{d}})
@maybepagebreak
-@deftypefunx void mpz_fdiv_q_2exp (mpz_t @var{q}, mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
-@deftypefunx void mpz_fdiv_r_2exp (mpz_t @var{r}, mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
+@deftypefunx void mpz_fdiv_q_2exp (mpz_t @var{q}, const mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
+@deftypefunx void mpz_fdiv_r_2exp (mpz_t @var{r}, const mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
@end deftypefun
-@deftypefun void mpz_tdiv_q (mpz_t @var{q}, mpz_t @var{n}, mpz_t @var{d})
-@deftypefunx void mpz_tdiv_r (mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})
-@deftypefunx void mpz_tdiv_qr (mpz_t @var{q}, mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})
+@deftypefun void mpz_tdiv_q (mpz_t @var{q}, const mpz_t @var{n}, const mpz_t @var{d})
+@deftypefunx void mpz_tdiv_r (mpz_t @var{r}, const mpz_t @var{n}, const mpz_t @var{d})
+@deftypefunx void mpz_tdiv_qr (mpz_t @var{q}, mpz_t @var{r}, const mpz_t @var{n}, const mpz_t @var{d})
@maybepagebreak
-@deftypefunx {unsigned long int} mpz_tdiv_q_ui (mpz_t @var{q}, mpz_t @var{n}, @w{unsigned long int @var{d}})
-@deftypefunx {unsigned long int} mpz_tdiv_r_ui (mpz_t @var{r}, mpz_t @var{n}, @w{unsigned long int @var{d}})
-@deftypefunx {unsigned long int} mpz_tdiv_qr_ui (mpz_t @var{q}, mpz_t @var{r}, @w{mpz_t @var{n}}, @w{unsigned long int @var{d}})
-@deftypefunx {unsigned long int} mpz_tdiv_ui (mpz_t @var{n}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_tdiv_q_ui (mpz_t @var{q}, const mpz_t @var{n}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_tdiv_r_ui (mpz_t @var{r}, const mpz_t @var{n}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_tdiv_qr_ui (mpz_t @var{q}, mpz_t @var{r}, @w{const mpz_t @var{n}}, @w{unsigned long int @var{d}})
+@deftypefunx {unsigned long int} mpz_tdiv_ui (const mpz_t @var{n}, @w{unsigned long int @var{d}})
@maybepagebreak
-@deftypefunx void mpz_tdiv_q_2exp (mpz_t @var{q}, mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
-@deftypefunx void mpz_tdiv_r_2exp (mpz_t @var{r}, mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
+@deftypefunx void mpz_tdiv_q_2exp (mpz_t @var{q}, const mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
+@deftypefunx void mpz_tdiv_r_2exp (mpz_t @var{r}, const mpz_t @var{n}, @w{mp_bitcnt_t @var{b}})
@cindex Bit shift right
@sp 1
effectively treats @var{n} as sign and magnitude.
@end deftypefun
-@deftypefun void mpz_mod (mpz_t @var{r}, mpz_t @var{n}, mpz_t @var{d})
-@deftypefunx {unsigned long int} mpz_mod_ui (mpz_t @var{r}, mpz_t @var{n}, @w{unsigned long int @var{d}})
+@deftypefun void mpz_mod (mpz_t @var{r}, const mpz_t @var{n}, const mpz_t @var{d})
+@deftypefunx {unsigned long int} mpz_mod_ui (mpz_t @var{r}, const mpz_t @var{n}, @w{unsigned long int @var{d}})
Set @var{r} to @var{n} @code{mod} @var{d}. The sign of the divisor is
ignored; the result is always non-negative.
the return value is wanted.
@end deftypefun
-@deftypefun void mpz_divexact (mpz_t @var{q}, mpz_t @var{n}, mpz_t @var{d})
-@deftypefunx void mpz_divexact_ui (mpz_t @var{q}, mpz_t @var{n}, unsigned long @var{d})
+@deftypefun void mpz_divexact (mpz_t @var{q}, const mpz_t @var{n}, const mpz_t @var{d})
+@deftypefunx void mpz_divexact_ui (mpz_t @var{q}, const mpz_t @var{n}, unsigned long @var{d})
@cindex Exact division functions
Set @var{q} to @var{n}/@var{d}. These functions produce correct results only
when it is known in advance that @var{d} divides @var{n}.
rational to lowest terms.
@end deftypefun
-@deftypefun int mpz_divisible_p (mpz_t @var{n}, mpz_t @var{d})
-@deftypefunx int mpz_divisible_ui_p (mpz_t @var{n}, unsigned long int @var{d})
-@deftypefunx int mpz_divisible_2exp_p (mpz_t @var{n}, mp_bitcnt_t @var{b})
+@deftypefun int mpz_divisible_p (const mpz_t @var{n}, const mpz_t @var{d})
+@deftypefunx int mpz_divisible_ui_p (const mpz_t @var{n}, unsigned long int @var{d})
+@deftypefunx int mpz_divisible_2exp_p (const mpz_t @var{n}, mp_bitcnt_t @var{b})
@cindex Divisibility functions
Return non-zero if @var{n} is exactly divisible by @var{d}, or in the case of
@code{mpz_divisible_2exp_p} by @m{2^b,2^@var{b}}.
that only 0 is considered divisible by 0.
@end deftypefun
-@deftypefun int mpz_congruent_p (mpz_t @var{n}, mpz_t @var{c}, mpz_t @var{d})
-@deftypefunx int mpz_congruent_ui_p (mpz_t @var{n}, unsigned long int @var{c}, unsigned long int @var{d})
-@deftypefunx int mpz_congruent_2exp_p (mpz_t @var{n}, mpz_t @var{c}, mp_bitcnt_t @var{b})
+@deftypefun int mpz_congruent_p (const mpz_t @var{n}, const mpz_t @var{c}, const mpz_t @var{d})
+@deftypefunx int mpz_congruent_ui_p (const mpz_t @var{n}, unsigned long int @var{c}, unsigned long int @var{d})
+@deftypefunx int mpz_congruent_2exp_p (const mpz_t @var{n}, const mpz_t @var{c}, mp_bitcnt_t @var{b})
@cindex Divisibility functions
@cindex Congruence functions
Return non-zero if @var{n} is congruent to @var{c} modulo @var{d}, or in the
@cindex Exponentiation functions
@cindex Powering functions
-@deftypefun void mpz_powm (mpz_t @var{rop}, mpz_t @var{base}, mpz_t @var{exp}, mpz_t @var{mod})
-@deftypefunx void mpz_powm_ui (mpz_t @var{rop}, mpz_t @var{base}, unsigned long int @var{exp}, mpz_t @var{mod})
+@deftypefun void mpz_powm (mpz_t @var{rop}, const mpz_t @var{base}, const mpz_t @var{exp}, const mpz_t @var{mod})
+@deftypefunx void mpz_powm_ui (mpz_t @var{rop}, const mpz_t @var{base}, unsigned long int @var{exp}, const mpz_t @var{mod})
Set @var{rop} to @m{base^{exp} \bmod mod, (@var{base} raised to @var{exp})
modulo @var{mod}}.
If an inverse doesn't exist then a divide by zero is raised.
@end deftypefun
-@deftypefun void mpz_powm_sec (mpz_t @var{rop}, mpz_t @var{base}, mpz_t @var{exp}, mpz_t @var{mod})
+@deftypefun void mpz_powm_sec (mpz_t @var{rop}, const mpz_t @var{base}, const mpz_t @var{exp}, const mpz_t @var{mod})
Set @var{rop} to @m{base^{exp} \bmod mod, (@var{base} raised to @var{exp})
modulo @var{mod}}.
resilience to side-channel attacks is desired.
@end deftypefun
-@deftypefun void mpz_pow_ui (mpz_t @var{rop}, mpz_t @var{base}, unsigned long int @var{exp})
+@deftypefun void mpz_pow_ui (mpz_t @var{rop}, const mpz_t @var{base}, unsigned long int @var{exp})
@deftypefunx void mpz_ui_pow_ui (mpz_t @var{rop}, unsigned long int @var{base}, unsigned long int @var{exp})
Set @var{rop} to @m{base^{exp}, @var{base} raised to @var{exp}}. The case
@math{0^0} yields 1.
@cindex Integer root functions
@cindex Root extraction functions
-@deftypefun int mpz_root (mpz_t @var{rop}, mpz_t @var{op}, unsigned long int @var{n})
+@deftypefun int mpz_root (mpz_t @var{rop}, const mpz_t @var{op}, unsigned long int @var{n})
Set @var{rop} to @m{\lfloor\root n \of {op}\rfloor@C{},} the truncated integer
part of the @var{n}th root of @var{op}. Return non-zero if the computation
was exact, i.e., if @var{op} is @var{rop} to the @var{n}th power.
@end deftypefun
-@deftypefun void mpz_rootrem (mpz_t @var{root}, mpz_t @var{rem}, mpz_t @var{u}, unsigned long int @var{n})
+@deftypefun void mpz_rootrem (mpz_t @var{root}, mpz_t @var{rem}, const mpz_t @var{u}, unsigned long int @var{n})
Set @var{root} to @m{\lfloor\root n \of {u}\rfloor@C{},} the truncated
integer part of the @var{n}th root of @var{u}. Set @var{rem} to the
remainder, @m{(@var{u} - @var{root}^n),
@var{u}@minus{}@var{root}**@var{n}}.
@end deftypefun
-@deftypefun void mpz_sqrt (mpz_t @var{rop}, mpz_t @var{op})
+@deftypefun void mpz_sqrt (mpz_t @var{rop}, const mpz_t @var{op})
Set @var{rop} to @m{\lfloor\sqrt{@var{op}}\rfloor@C{},} the truncated
integer part of the square root of @var{op}.
@end deftypefun
-@deftypefun void mpz_sqrtrem (mpz_t @var{rop1}, mpz_t @var{rop2}, mpz_t @var{op})
+@deftypefun void mpz_sqrtrem (mpz_t @var{rop1}, mpz_t @var{rop2}, const mpz_t @var{op})
Set @var{rop1} to @m{\lfloor\sqrt{@var{op}}\rfloor, the truncated integer part
of the square root of @var{op}}, like @code{mpz_sqrt}. Set @var{rop2} to the
remainder @m{(@var{op} - @var{rop1}^2),
undefined.
@end deftypefun
-@deftypefun int mpz_perfect_power_p (mpz_t @var{op})
+@deftypefun int mpz_perfect_power_p (const mpz_t @var{op})
@cindex Perfect power functions
@cindex Root testing functions
Return non-zero if @var{op} is a perfect power, i.e., if there exist integers
perfect powers.
@end deftypefun
-@deftypefun int mpz_perfect_square_p (mpz_t @var{op})
+@deftypefun int mpz_perfect_square_p (const mpz_t @var{op})
@cindex Perfect square functions
@cindex Root testing functions
Return non-zero if @var{op} is a perfect square, i.e., if the square root of
@section Number Theoretic Functions
@cindex Number theoretic functions
-@deftypefun int mpz_probab_prime_p (mpz_t @var{n}, int @var{reps})
+@deftypefun int mpz_probab_prime_p (const mpz_t @var{n}, int @var{reps})
@cindex Prime testing functions
@cindex Probable prime testing functions
Determine whether @var{n} is prime. Return 2 if @var{n} is definitely prime,
@var{n} is definitely composite.
This function does some trial divisions, then some Miller-Rabin probabilistic
-primality tests. @var{reps} controls how many such tests are done, 5 to 10 is
-a reasonable number, more will reduce the chances of a composite being
-returned as ``probably prime''.
+primality tests. The argument @var{reps} controls how many such tests are
+done; a higher value will reduce the chances of a composite being returned as
+``probably prime''. 25 is a reasonable number; a composite number will then be
+identified as a prime with a probability of less than @m{2^{-50},2^(-50)}.
Miller-Rabin and similar tests can be more properly called compositeness
tests. Numbers which fail are known to be composite but those which pass
which pass are considered probably prime.
@end deftypefun
-@deftypefun void mpz_nextprime (mpz_t @var{rop}, mpz_t @var{op})
+@deftypefun void mpz_nextprime (mpz_t @var{rop}, const mpz_t @var{op})
@cindex Next prime function
Set @var{rop} to the next prime greater than @var{op}.
@c mpz_prime_p not implemented as of gmp 3.0.
-@c @deftypefun int mpz_prime_p (mpz_t @var{n})
+@c @deftypefun int mpz_prime_p (const mpz_t @var{n})
@c Return non-zero if @var{n} is prime and zero if @var{n} is a non-prime.
@c This function is far slower than @code{mpz_probab_prime_p}, but then it
@c never returns non-zero for composite numbers.
@c prime, if the @var{reps} argument is in the suggested range.)
@c @end deftypefun
-@deftypefun void mpz_gcd (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
+@deftypefun void mpz_gcd (mpz_t @var{rop}, const mpz_t @var{op1}, const mpz_t @var{op2})
@cindex Greatest common divisor functions
@cindex GCD functions
Set @var{rop} to the greatest common divisor of @var{op1} and @var{op2}. The
Except if both inputs are zero; then this function defines @math{gcd(0,0) = 0}.
@end deftypefun
-@deftypefun {unsigned long int} mpz_gcd_ui (mpz_t @var{rop}, mpz_t @var{op1}, unsigned long int @var{op2})
+@deftypefun {unsigned long int} mpz_gcd_ui (mpz_t @var{rop}, const mpz_t @var{op1}, unsigned long int @var{op2})
Compute the greatest common divisor of @var{op1} and @var{op2}. If
@var{rop} is not @code{NULL}, store the result there.
is non-zero.
@end deftypefun
-@deftypefun void mpz_gcdext (mpz_t @var{g}, mpz_t @var{s}, mpz_t @var{t}, mpz_t @var{a}, mpz_t @var{b})
+@deftypefun void mpz_gcdext (mpz_t @var{g}, mpz_t @var{s}, mpz_t @var{t}, const mpz_t @var{a}, const mpz_t @var{b})
@cindex Extended GCD
@cindex GCD extended
Set @var{g} to the greatest common divisor of @var{a} and @var{b}, and in
@var{b} are negative (or zero if both inputs are zero). The values in @var{s}
and @var{t} are chosen such that normally, @math{@GMPabs{@var{s}} <
@GMPabs{@var{b}} / (2 @var{g})} and @math{@GMPabs{@var{t}} < @GMPabs{@var{a}}
-/ (2 @var{g})}, and these relations define @var{s} and @var{t} uniquely. There
+/ (2 @var{g})}, and these relations define @var{s} and @var{t} uniquely. There
are a few exceptional cases:
If @math{@GMPabs{@var{a}} = @GMPabs{@var{b}}}, then @math{@var{s} = 0},
If @var{t} is @code{NULL} then that value is not computed.
@end deftypefun
-@deftypefun void mpz_lcm (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
-@deftypefunx void mpz_lcm_ui (mpz_t @var{rop}, mpz_t @var{op1}, unsigned long @var{op2})
+@deftypefun void mpz_lcm (mpz_t @var{rop}, const mpz_t @var{op1}, const mpz_t @var{op2})
+@deftypefunx void mpz_lcm_ui (mpz_t @var{rop}, const mpz_t @var{op1}, unsigned long @var{op2})
@cindex Least common multiple functions
@cindex LCM functions
Set @var{rop} to the least common multiple of @var{op1} and @var{op2}.
@var{op2}. @var{rop} will be zero if either @var{op1} or @var{op2} is zero.
@end deftypefun
-@deftypefun int mpz_invert (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
+@deftypefun int mpz_invert (mpz_t @var{rop}, const mpz_t @var{op1}, const mpz_t @var{op2})
@cindex Modular inverse functions
@cindex Inverse modulo functions
Compute the inverse of @var{op1} modulo @var{op2} and put the result in
this function is undefined when @var{op2} is zero.
@end deftypefun
-@deftypefun int mpz_jacobi (mpz_t @var{a}, mpz_t @var{b})
+@deftypefun int mpz_jacobi (const mpz_t @var{a}, const mpz_t @var{b})
@cindex Jacobi symbol functions
Calculate the Jacobi symbol @m{\left(a \over b\right),
(@var{a}/@var{b})}. This is defined only for @var{b} odd.
@end deftypefun
-@deftypefun int mpz_legendre (mpz_t @var{a}, mpz_t @var{p})
+@deftypefun int mpz_legendre (const mpz_t @var{a}, const mpz_t @var{p})
@cindex Legendre symbol functions
Calculate the Legendre symbol @m{\left(a \over p\right),
(@var{a}/@var{p})}. This is defined only for @var{p} an odd positive
prime, and for such @var{p} it's identical to the Jacobi symbol.
@end deftypefun
-@deftypefun int mpz_kronecker (mpz_t @var{a}, mpz_t @var{b})
-@deftypefunx int mpz_kronecker_si (mpz_t @var{a}, long @var{b})
-@deftypefunx int mpz_kronecker_ui (mpz_t @var{a}, unsigned long @var{b})
-@deftypefunx int mpz_si_kronecker (long @var{a}, mpz_t @var{b})
-@deftypefunx int mpz_ui_kronecker (unsigned long @var{a}, mpz_t @var{b})
+@deftypefun int mpz_kronecker (const mpz_t @var{a}, const mpz_t @var{b})
+@deftypefunx int mpz_kronecker_si (const mpz_t @var{a}, long @var{b})
+@deftypefunx int mpz_kronecker_ui (const mpz_t @var{a}, unsigned long @var{b})
+@deftypefunx int mpz_si_kronecker (long @var{a}, const mpz_t @var{b})
+@deftypefunx int mpz_ui_kronecker (unsigned long @var{a}, const mpz_t @var{b})
@cindex Kronecker symbol functions
Calculate the Jacobi symbol @m{\left(a \over b\right),
(@var{a}/@var{b})} with the Kronecker extension @m{\left(a \over
@file{demos/qcn.c} which uses @code{mpz_kronecker_ui}.
@end deftypefun
-@deftypefun {mp_bitcnt_t} mpz_remove (mpz_t @var{rop}, mpz_t @var{op}, mpz_t @var{f})
+@deftypefun {mp_bitcnt_t} mpz_remove (mpz_t @var{rop}, const mpz_t @var{op}, const mpz_t @var{f})
@cindex Remove factor functions
@cindex Factor removal functions
Remove all occurrences of the factor @var{f} from @var{op} and store the
removed.
@end deftypefun
-@deftypefun void mpz_fac_ui (mpz_t @var{rop}, unsigned long int @var{op})
+@deftypefun void mpz_fac_ui (mpz_t @var{rop}, unsigned long int @var{n})
+@deftypefunx void mpz_2fac_ui (mpz_t @var{rop}, unsigned long int @var{n})
+@deftypefunx void mpz_mfac_uiui (mpz_t @var{rop}, unsigned long int @var{n}, unsigned long int @var{m})
@cindex Factorial functions
-Set @var{rop} to @var{op}!, the factorial of @var{op}.
+Set @var{rop} to the factorial of @var{n}: @code{mpz_fac_ui} computes the plain factorial @var{n}!,
+@code{mpz_2fac_ui} computes the double-factorial @var{n}!!, and @code{mpz_mfac_uiui} the
+@var{m}-multi-factorial @m{n!^{(m)}, @var{n}!^(@var{m})}.
+@end deftypefun
+
+@deftypefun void mpz_primorial_ui (mpz_t @var{rop}, unsigned long int @var{n})
+@cindex Primorial functions
+Set @var{rop} to the primorial of @var{n}, i.e. the product of all positive
+prime numbers @math{@le{}@var{n}}.
@end deftypefun
-@deftypefun void mpz_bin_ui (mpz_t @var{rop}, mpz_t @var{n}, unsigned long int @var{k})
+@deftypefun void mpz_bin_ui (mpz_t @var{rop}, const mpz_t @var{n}, unsigned long int @var{k})
@deftypefunx void mpz_bin_uiui (mpz_t @var{rop}, unsigned long int @var{n}, @w{unsigned long int @var{k}})
@cindex Binomial coefficient functions
Compute the binomial coefficient @m{\left({n}\atop{k}\right), @var{n} over
@cindex Integer comparison functions
@cindex Comparison functions
-@deftypefn Function int mpz_cmp (mpz_t @var{op1}, mpz_t @var{op2})
-@deftypefnx Function int mpz_cmp_d (mpz_t @var{op1}, double @var{op2})
-@deftypefnx Macro int mpz_cmp_si (mpz_t @var{op1}, signed long int @var{op2})
-@deftypefnx Macro int mpz_cmp_ui (mpz_t @var{op1}, unsigned long int @var{op2})
+@deftypefn Function int mpz_cmp (const mpz_t @var{op1}, const mpz_t @var{op2})
+@deftypefnx Function int mpz_cmp_d (const mpz_t @var{op1}, double @var{op2})
+@deftypefnx Macro int mpz_cmp_si (const mpz_t @var{op1}, signed long int @var{op2})
+@deftypefnx Macro int mpz_cmp_ui (const mpz_t @var{op1}, unsigned long int @var{op2})
Compare @var{op1} and @var{op2}. Return a positive value if @math{@var{op1} >
@var{op2}}, zero if @math{@var{op1} = @var{op2}}, or a negative value if
@math{@var{op1} < @var{op2}}.
but results are undefined for a NaN.
@end deftypefn
-@deftypefn Function int mpz_cmpabs (mpz_t @var{op1}, mpz_t @var{op2})
-@deftypefnx Function int mpz_cmpabs_d (mpz_t @var{op1}, double @var{op2})
-@deftypefnx Function int mpz_cmpabs_ui (mpz_t @var{op1}, unsigned long int @var{op2})
+@deftypefn Function int mpz_cmpabs (const mpz_t @var{op1}, const mpz_t @var{op2})
+@deftypefnx Function int mpz_cmpabs_d (const mpz_t @var{op1}, double @var{op2})
+@deftypefnx Function int mpz_cmpabs_ui (const mpz_t @var{op1}, unsigned long int @var{op2})
Compare the absolute values of @var{op1} and @var{op2}. Return a positive
value if @math{@GMPabs{@var{op1}} > @GMPabs{@var{op2}}}, zero if
@math{@GMPabs{@var{op1}} = @GMPabs{@var{op2}}}, or a negative value if
for a NaN.
@end deftypefn
-@deftypefn Macro int mpz_sgn (mpz_t @var{op})
+@deftypefn Macro int mpz_sgn (const mpz_t @var{op})
@cindex Sign tests
@cindex Integer sign tests
Return @math{+1} if @math{@var{op} > 0}, 0 if @math{@var{op} = 0}, and
sign-magnitude is the actual implementation). The least significant bit is
number 0.
-@deftypefun void mpz_and (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
+@deftypefun void mpz_and (mpz_t @var{rop}, const mpz_t @var{op1}, const mpz_t @var{op2})
Set @var{rop} to @var{op1} bitwise-and @var{op2}.
@end deftypefun
-@deftypefun void mpz_ior (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
+@deftypefun void mpz_ior (mpz_t @var{rop}, const mpz_t @var{op1}, const mpz_t @var{op2})
Set @var{rop} to @var{op1} bitwise inclusive-or @var{op2}.
@end deftypefun
-@deftypefun void mpz_xor (mpz_t @var{rop}, mpz_t @var{op1}, mpz_t @var{op2})
+@deftypefun void mpz_xor (mpz_t @var{rop}, const mpz_t @var{op1}, const mpz_t @var{op2})
Set @var{rop} to @var{op1} bitwise exclusive-or @var{op2}.
@end deftypefun
-@deftypefun void mpz_com (mpz_t @var{rop}, mpz_t @var{op})
+@deftypefun void mpz_com (mpz_t @var{rop}, const mpz_t @var{op})
Set @var{rop} to the one's complement of @var{op}.
@end deftypefun
-@deftypefun {mp_bitcnt_t} mpz_popcount (mpz_t @var{op})
+@deftypefun {mp_bitcnt_t} mpz_popcount (const mpz_t @var{op})
If @math{@var{op}@ge{}0}, return the population count of @var{op}, which is the
number of 1 bits in the binary representation. If @math{@var{op}<0}, the
number of 1s is infinite, and the return value is the largest possible
@code{mp_bitcnt_t}.
@end deftypefun
-@deftypefun {mp_bitcnt_t} mpz_hamdist (mpz_t @var{op1}, mpz_t @var{op2})
+@deftypefun {mp_bitcnt_t} mpz_hamdist (const mpz_t @var{op1}, const mpz_t @var{op2})
If @var{op1} and @var{op2} are both @math{@ge{}0} or both @math{<0}, return the
hamming distance between the two operands, which is the number of bit positions
where @var{op1} and @var{op2} have different bit values. If one operand is
infinite, and the return value is the largest possible @code{mp_bitcnt_t}.
@end deftypefun
-@deftypefun {mp_bitcnt_t} mpz_scan0 (mpz_t @var{op}, mp_bitcnt_t @var{starting_bit})
-@deftypefunx {mp_bitcnt_t} mpz_scan1 (mpz_t @var{op}, mp_bitcnt_t @var{starting_bit})
+@deftypefun {mp_bitcnt_t} mpz_scan0 (const mpz_t @var{op}, mp_bitcnt_t @var{starting_bit})
+@deftypefunx {mp_bitcnt_t} mpz_scan1 (const mpz_t @var{op}, mp_bitcnt_t @var{starting_bit})
@cindex Bit scanning functions
@cindex Scan bit functions
Scan @var{op}, starting from bit @var{starting_bit}, towards more significant
Complement bit @var{bit_index} in @var{rop}.
@end deftypefun
-@deftypefun int mpz_tstbit (mpz_t @var{op}, mp_bitcnt_t @var{bit_index})
+@deftypefun int mpz_tstbit (const mpz_t @var{op}, mp_bitcnt_t @var{bit_index})
Test bit @var{bit_index} in @var{op} and return 0 or 1 accordingly.
@end deftypefun
See also @ref{Formatted Output} and @ref{Formatted Input}.
-@deftypefun size_t mpz_out_str (FILE *@var{stream}, int @var{base}, mpz_t @var{op})
+@deftypefun size_t mpz_out_str (FILE *@var{stream}, int @var{base}, const mpz_t @var{op})
Output @var{op} on stdio stream @var{stream}, as a string of digits in base
@var{base}. The base argument may vary from 2 to 62 or from @minus{}2 to
@minus{}36.
Return the number of bytes read, or if an error occurred, return 0.
@end deftypefun
-@deftypefun size_t mpz_out_raw (FILE *@var{stream}, mpz_t @var{op})
+@deftypefun size_t mpz_out_raw (FILE *@var{stream}, const mpz_t @var{op})
Output @var{op} on stdio stream @var{stream}, in raw binary format. The
integer is written in a portable format, with 4 bytes of size information, and
that many bytes of limbs. Both the size and the limbs are written in
invoking this function.
@end deftypefun
-@deftypefun void mpz_urandomm (mpz_t @var{rop}, gmp_randstate_t @var{state}, mpz_t @var{n})
+@deftypefun void mpz_urandomm (mpz_t @var{rop}, gmp_randstate_t @var{state}, const mpz_t @var{n})
Generate a uniform random integer in the range 0 to @math{@var{n}-1},
inclusive.
@code{8*sizeof(int)-INT_BIT}.
@end deftypefun
-@deftypefun {void *} mpz_export (void *@var{rop}, size_t *@var{countp}, int @var{order}, size_t @var{size}, int @var{endian}, size_t @var{nails}, mpz_t @var{op})
+@deftypefun {void *} mpz_export (void *@var{rop}, size_t *@var{countp}, int @var{order}, size_t @var{size}, int @var{endian}, size_t @var{nails}, const mpz_t @var{op})
@cindex Integer export
@cindex Export
Fill @var{rop} with word data from @var{op}.
@cindex Miscellaneous integer functions
@cindex Integer miscellaneous functions
-@deftypefun int mpz_fits_ulong_p (mpz_t @var{op})
-@deftypefunx int mpz_fits_slong_p (mpz_t @var{op})
-@deftypefunx int mpz_fits_uint_p (mpz_t @var{op})
-@deftypefunx int mpz_fits_sint_p (mpz_t @var{op})
-@deftypefunx int mpz_fits_ushort_p (mpz_t @var{op})
-@deftypefunx int mpz_fits_sshort_p (mpz_t @var{op})
+@deftypefun int mpz_fits_ulong_p (const mpz_t @var{op})
+@deftypefunx int mpz_fits_slong_p (const mpz_t @var{op})
+@deftypefunx int mpz_fits_uint_p (const mpz_t @var{op})
+@deftypefunx int mpz_fits_sint_p (const mpz_t @var{op})
+@deftypefunx int mpz_fits_ushort_p (const mpz_t @var{op})
+@deftypefunx int mpz_fits_sshort_p (const mpz_t @var{op})
Return non-zero iff the value of @var{op} fits in an @code{unsigned long int},
@code{signed long int}, @code{unsigned int}, @code{signed int}, @code{unsigned
short int}, or @code{signed short int}, respectively. Otherwise, return zero.
@end deftypefun
-@deftypefn Macro int mpz_odd_p (mpz_t @var{op})
-@deftypefnx Macro int mpz_even_p (mpz_t @var{op})
+@deftypefn Macro int mpz_odd_p (const mpz_t @var{op})
+@deftypefnx Macro int mpz_even_p (const mpz_t @var{op})
Determine whether @var{op} is odd or even, respectively. Return non-zero if
yes, zero if no. These macros evaluate their argument more than once.
@end deftypefn
-@deftypefun size_t mpz_sizeinbase (mpz_t @var{op}, int @var{base})
+@deftypefun size_t mpz_sizeinbase (const mpz_t @var{op}, int @var{base})
@cindex Size in digits
@cindex Digits in an integer
Return the size of @var{op} measured in number of digits in the given
@code{_mpz_realloc} takes its size in limbs.
@end deftypefun
-@deftypefun mp_limb_t mpz_getlimbn (mpz_t @var{op}, mp_size_t @var{n})
+@deftypefun mp_limb_t mpz_getlimbn (const mpz_t @var{op}, mp_size_t @var{n})
Return limb number @var{n} from @var{op}. The sign of @var{op} is ignored,
just the absolute value is used. The least significant limb is number 0.
@code{mpz_size(@var{op})-1}.
@end deftypefun
-@deftypefun size_t mpz_size (mpz_t @var{op})
+@deftypefun size_t mpz_size (const mpz_t @var{op})
Return the size of @var{op} measured in number of limbs. If @var{op} is zero,
the returned value will be zero.
@c (@xref{Nomenclature}, for an explanation of the concept @dfn{limb}.)
Free the space occupied by a NULL-terminated list of @code{mpq_t} variables.
@end deftypefun
-@deftypefun void mpq_set (mpq_t @var{rop}, mpq_t @var{op})
-@deftypefunx void mpq_set_z (mpq_t @var{rop}, mpz_t @var{op})
+@deftypefun void mpq_set (mpq_t @var{rop}, const mpq_t @var{op})
+@deftypefunx void mpq_set_z (mpq_t @var{rop}, const mpz_t @var{op})
Assign @var{rop} from @var{op}.
@end deftypefun
@code{mpq_canonicalize} before any operations are performed on @var{rop}.
@end deftypefun
-@deftypefun int mpq_set_str (mpq_t @var{rop}, char *@var{str}, int @var{base})
+@deftypefun int mpq_set_str (mpq_t @var{rop}, const char *@var{str}, int @var{base})
Set @var{rop} from a null-terminated string @var{str} in the given @var{base}.
The string can be an integer like ``41'' or a fraction like ``41/152''. The
@cindex Rational conversion functions
@cindex Conversion functions
-@deftypefun double mpq_get_d (mpq_t @var{op})
+@deftypefun double mpq_get_d (const mpq_t @var{op})
Convert @var{op} to a @code{double}, truncating if necessary (i.e.@: rounding
towards zero).
@end deftypefun
@deftypefun void mpq_set_d (mpq_t @var{rop}, double @var{op})
-@deftypefunx void mpq_set_f (mpq_t @var{rop}, mpf_t @var{op})
+@deftypefunx void mpq_set_f (mpq_t @var{rop}, const mpf_t @var{op})
Set @var{rop} to the value of @var{op}. There is no rounding, this conversion
is exact.
@end deftypefun
-@deftypefun {char *} mpq_get_str (char *@var{str}, int @var{base}, mpq_t @var{op})
+@deftypefun {char *} mpq_get_str (char *@var{str}, int @var{base}, const mpq_t @var{op})
Convert @var{op} to a string of digits in base @var{base}. The base may vary
from 2 to 36. The string will be of the form @samp{num/den}, or if the
denominator is 1 then just @samp{num}.
@cindex Rational arithmetic functions
@cindex Arithmetic functions
-@deftypefun void mpq_add (mpq_t @var{sum}, mpq_t @var{addend1}, mpq_t @var{addend2})
+@deftypefun void mpq_add (mpq_t @var{sum}, const mpq_t @var{addend1}, const mpq_t @var{addend2})
Set @var{sum} to @var{addend1} + @var{addend2}.
@end deftypefun
-@deftypefun void mpq_sub (mpq_t @var{difference}, mpq_t @var{minuend}, mpq_t @var{subtrahend})
+@deftypefun void mpq_sub (mpq_t @var{difference}, const mpq_t @var{minuend}, const mpq_t @var{subtrahend})
Set @var{difference} to @var{minuend} @minus{} @var{subtrahend}.
@end deftypefun
-@deftypefun void mpq_mul (mpq_t @var{product}, mpq_t @var{multiplier}, mpq_t @var{multiplicand})
+@deftypefun void mpq_mul (mpq_t @var{product}, const mpq_t @var{multiplier}, const mpq_t @var{multiplicand})
Set @var{product} to @math{@var{multiplier} @GMPtimes{} @var{multiplicand}}.
@end deftypefun
-@deftypefun void mpq_mul_2exp (mpq_t @var{rop}, mpq_t @var{op1}, mp_bitcnt_t @var{op2})
+@deftypefun void mpq_mul_2exp (mpq_t @var{rop}, const mpq_t @var{op1}, mp_bitcnt_t @var{op2})
Set @var{rop} to @m{@var{op1} \times 2^{op2}, @var{op1} times 2 raised to
@var{op2}}.
@end deftypefun
-@deftypefun void mpq_div (mpq_t @var{quotient}, mpq_t @var{dividend}, mpq_t @var{divisor})
+@deftypefun void mpq_div (mpq_t @var{quotient}, const mpq_t @var{dividend}, const mpq_t @var{divisor})
@cindex Division functions
Set @var{quotient} to @var{dividend}/@var{divisor}.
@end deftypefun
-@deftypefun void mpq_div_2exp (mpq_t @var{rop}, mpq_t @var{op1}, mp_bitcnt_t @var{op2})
+@deftypefun void mpq_div_2exp (mpq_t @var{rop}, const mpq_t @var{op1}, mp_bitcnt_t @var{op2})
Set @var{rop} to @m{@var{op1}/2^{op2}, @var{op1} divided by 2 raised to
@var{op2}}.
@end deftypefun
-@deftypefun void mpq_neg (mpq_t @var{negated_operand}, mpq_t @var{operand})
+@deftypefun void mpq_neg (mpq_t @var{negated_operand}, const mpq_t @var{operand})
Set @var{negated_operand} to @minus{}@var{operand}.
@end deftypefun
-@deftypefun void mpq_abs (mpq_t @var{rop}, mpq_t @var{op})
+@deftypefun void mpq_abs (mpq_t @var{rop}, const mpq_t @var{op})
Set @var{rop} to the absolute value of @var{op}.
@end deftypefun
-@deftypefun void mpq_inv (mpq_t @var{inverted_number}, mpq_t @var{number})
+@deftypefun void mpq_inv (mpq_t @var{inverted_number}, const mpq_t @var{number})
Set @var{inverted_number} to 1/@var{number}. If the new denominator is
zero, this routine will divide by zero.
@end deftypefun
@cindex Rational comparison functions
@cindex Comparison functions
-@deftypefun int mpq_cmp (mpq_t @var{op1}, mpq_t @var{op2})
+@deftypefun int mpq_cmp (const mpq_t @var{op1}, const mpq_t @var{op2})
Compare @var{op1} and @var{op2}. Return a positive value if @math{@var{op1} >
@var{op2}}, zero if @math{@var{op1} = @var{op2}}, and a negative value if
@math{@var{op1} < @var{op2}}.
@code{mpq_cmp}.
@end deftypefun
-@deftypefn Macro int mpq_cmp_ui (mpq_t @var{op1}, unsigned long int @var{num2}, unsigned long int @var{den2})
-@deftypefnx Macro int mpq_cmp_si (mpq_t @var{op1}, long int @var{num2}, unsigned long int @var{den2})
+@deftypefn Macro int mpq_cmp_ui (const mpq_t @var{op1}, unsigned long int @var{num2}, unsigned long int @var{den2})
+@deftypefnx Macro int mpq_cmp_si (const mpq_t @var{op1}, long int @var{num2}, unsigned long int @var{den2})
Compare @var{op1} and @var{num2}/@var{den2}. Return a positive value if
@math{@var{op1} > @var{num2}/@var{den2}}, zero if @math{@var{op1} =
@var{num2}/@var{den2}}, and a negative value if @math{@var{op1} <
multiple times.
@end deftypefn
-@deftypefn Macro int mpq_sgn (mpq_t @var{op})
+@deftypefn Macro int mpq_sgn (const mpq_t @var{op})
@cindex Sign tests
@cindex Rational sign tests
Return @math{+1} if @math{@var{op} > 0}, 0 if @math{@var{op} = 0}, and
@math{-1} if @math{@var{op} < 0}.
This function is actually implemented as a macro. It evaluates its
-arguments multiple times.
+argument multiple times.
@end deftypefn
-@deftypefun int mpq_equal (mpq_t @var{op1}, mpq_t @var{op2})
+@deftypefun int mpq_equal (const mpq_t @var{op1}, const mpq_t @var{op2})
Return non-zero if @var{op1} and @var{op2} are equal, zero if they are
non-equal. Although @code{mpq_cmp} can be used for the same purpose, this
function is much faster.
(@pxref{Rational Number Functions}) then @code{mpq_canonicalize} must be
called before any other @code{mpq} functions are applied to that @code{mpq_t}.
-@deftypefn Macro mpz_t mpq_numref (mpq_t @var{op})
-@deftypefnx Macro mpz_t mpq_denref (mpq_t @var{op})
+@deftypefn Macro mpz_t mpq_numref (const mpq_t @var{op})
+@deftypefnx Macro mpz_t mpq_denref (const mpq_t @var{op})
Return a reference to the numerator and denominator of @var{op}, respectively.
The @code{mpz} functions can be used on the result of these macros.
@end deftypefn
-@deftypefun void mpq_get_num (mpz_t @var{numerator}, mpq_t @var{rational})
-@deftypefunx void mpq_get_den (mpz_t @var{denominator}, mpq_t @var{rational})
-@deftypefunx void mpq_set_num (mpq_t @var{rational}, mpz_t @var{numerator})
-@deftypefunx void mpq_set_den (mpq_t @var{rational}, mpz_t @var{denominator})
+@deftypefun void mpq_get_num (mpz_t @var{numerator}, const mpq_t @var{rational})
+@deftypefunx void mpq_get_den (mpz_t @var{denominator}, const mpq_t @var{rational})
+@deftypefunx void mpq_set_num (mpq_t @var{rational}, const mpz_t @var{numerator})
+@deftypefunx void mpq_set_den (mpq_t @var{rational}, const mpz_t @var{denominator})
Get or set the numerator or denominator of a rational. These functions are
equivalent to calling @code{mpz_set} with an appropriate @code{mpq_numref} or
@code{mpq_denref}. Direct use of @code{mpq_numref} or @code{mpq_denref} is
See also @ref{Formatted Output} and @ref{Formatted Input}.
-@deftypefun size_t mpq_out_str (FILE *@var{stream}, int @var{base}, mpq_t @var{op})
+@deftypefun size_t mpq_out_str (FILE *@var{stream}, int @var{base}, const mpq_t @var{op})
Output @var{op} on stdio stream @var{stream}, as a string of digits in base
@var{base}. The base may vary from 2 to 36. Output is in the form
@samp{num/den} or if the denominator is 1 then just @samp{num}.
iterative algorithms like Newton-Raphson, making the computation precision
closely match the actual accurate part of the numbers.
-@deftypefun {mp_bitcnt_t} mpf_get_prec (mpf_t @var{op})
+@deftypefun {mp_bitcnt_t} mpf_get_prec (const mpf_t @var{op})
Return the current precision of @var{op}, in bits.
@end deftypefun
These functions assign new values to already initialized floats
(@pxref{Initializing Floats}).
-@deftypefun void mpf_set (mpf_t @var{rop}, mpf_t @var{op})
+@deftypefun void mpf_set (mpf_t @var{rop}, const mpf_t @var{op})
@deftypefunx void mpf_set_ui (mpf_t @var{rop}, unsigned long int @var{op})
@deftypefunx void mpf_set_si (mpf_t @var{rop}, signed long int @var{op})
@deftypefunx void mpf_set_d (mpf_t @var{rop}, double @var{op})
-@deftypefunx void mpf_set_z (mpf_t @var{rop}, mpz_t @var{op})
-@deftypefunx void mpf_set_q (mpf_t @var{rop}, mpq_t @var{op})
+@deftypefunx void mpf_set_z (mpf_t @var{rop}, const mpz_t @var{op})
+@deftypefunx void mpf_set_q (mpf_t @var{rop}, const mpq_t @var{op})
Set the value of @var{rop} from @var{op}.
@end deftypefun
-@deftypefun int mpf_set_str (mpf_t @var{rop}, char *@var{str}, int @var{base})
+@deftypefun int mpf_set_str (mpf_t @var{rop}, const char *@var{str}, int @var{base})
Set the value of @var{rop} from the string in @var{str}. The string is of the
form @samp{M@@N} or, if the base is 10 or less, alternatively @samp{MeN}.
@samp{M} is the mantissa and @samp{N} is the exponent. The mantissa is always
float functions. Don't use an initialize-and-set function on a variable
already initialized!
-@deftypefun void mpf_init_set (mpf_t @var{rop}, mpf_t @var{op})
+@deftypefun void mpf_init_set (mpf_t @var{rop}, const mpf_t @var{op})
@deftypefunx void mpf_init_set_ui (mpf_t @var{rop}, unsigned long int @var{op})
@deftypefunx void mpf_init_set_si (mpf_t @var{rop}, signed long int @var{op})
@deftypefunx void mpf_init_set_d (mpf_t @var{rop}, double @var{op})
set by @code{mpf_set_default_prec}.
@end deftypefun
-@deftypefun int mpf_init_set_str (mpf_t @var{rop}, char *@var{str}, int @var{base})
+@deftypefun int mpf_init_set_str (mpf_t @var{rop}, const char *@var{str}, int @var{base})
Initialize @var{rop} and set its value from the string in @var{str}. See
@code{mpf_set_str} above for details on the assignment operation.
@cindex Float conversion functions
@cindex Conversion functions
-@deftypefun double mpf_get_d (mpf_t @var{op})
+@deftypefun double mpf_get_d (const mpf_t @var{op})
Convert @var{op} to a @code{double}, truncating if necessary (i.e.@: rounding
towards zero).
underflow and denorm traps may or may not occur.
@end deftypefun
-@deftypefun double mpf_get_d_2exp (signed long int *@var{exp}, mpf_t @var{op})
+@deftypefun double mpf_get_d_2exp (signed long int *@var{exp}, const mpf_t @var{op})
Convert @var{op} to a @code{double}, truncating if necessary (i.e.@: rounding
towards zero), and with an exponent returned separately.
Functions,,, libc, The GNU C Library Reference Manual}).
@end deftypefun
-@deftypefun long mpf_get_si (mpf_t @var{op})
-@deftypefunx {unsigned long} mpf_get_ui (mpf_t @var{op})
+@deftypefun long mpf_get_si (const mpf_t @var{op})
+@deftypefunx {unsigned long} mpf_get_ui (const mpf_t @var{op})
Convert @var{op} to a @code{long} or @code{unsigned long}, truncating any
fraction part. If @var{op} is too big for the return type, the result is
undefined.
(@pxref{Miscellaneous Float Functions}).
@end deftypefun
-@deftypefun {char *} mpf_get_str (char *@var{str}, mp_exp_t *@var{expptr}, int @var{base}, size_t @var{n_digits}, mpf_t @var{op})
+@deftypefun {char *} mpf_get_str (char *@var{str}, mp_exp_t *@var{expptr}, int @var{base}, size_t @var{n_digits}, const mpf_t @var{op})
Convert @var{op} to a string of digits in base @var{base}. The base argument
may vary from 2 to 62 or from @minus{}2 to @minus{}36. Up to @var{n_digits}
digits will be generated. Trailing zeros are not returned. No more digits
@cindex Float arithmetic functions
@cindex Arithmetic functions
-@deftypefun void mpf_add (mpf_t @var{rop}, mpf_t @var{op1}, mpf_t @var{op2})
-@deftypefunx void mpf_add_ui (mpf_t @var{rop}, mpf_t @var{op1}, unsigned long int @var{op2})
+@deftypefun void mpf_add (mpf_t @var{rop}, const mpf_t @var{op1}, const mpf_t @var{op2})
+@deftypefunx void mpf_add_ui (mpf_t @var{rop}, const mpf_t @var{op1}, unsigned long int @var{op2})
Set @var{rop} to @math{@var{op1} + @var{op2}}.
@end deftypefun
-@deftypefun void mpf_sub (mpf_t @var{rop}, mpf_t @var{op1}, mpf_t @var{op2})
-@deftypefunx void mpf_ui_sub (mpf_t @var{rop}, unsigned long int @var{op1}, mpf_t @var{op2})
-@deftypefunx void mpf_sub_ui (mpf_t @var{rop}, mpf_t @var{op1}, unsigned long int @var{op2})
+@deftypefun void mpf_sub (mpf_t @var{rop}, const mpf_t @var{op1}, const mpf_t @var{op2})
+@deftypefunx void mpf_ui_sub (mpf_t @var{rop}, unsigned long int @var{op1}, const mpf_t @var{op2})
+@deftypefunx void mpf_sub_ui (mpf_t @var{rop}, const mpf_t @var{op1}, unsigned long int @var{op2})
Set @var{rop} to @var{op1} @minus{} @var{op2}.
@end deftypefun
-@deftypefun void mpf_mul (mpf_t @var{rop}, mpf_t @var{op1}, mpf_t @var{op2})
-@deftypefunx void mpf_mul_ui (mpf_t @var{rop}, mpf_t @var{op1}, unsigned long int @var{op2})
+@deftypefun void mpf_mul (mpf_t @var{rop}, const mpf_t @var{op1}, const mpf_t @var{op2})
+@deftypefunx void mpf_mul_ui (mpf_t @var{rop}, const mpf_t @var{op1}, unsigned long int @var{op2})
Set @var{rop} to @math{@var{op1} @GMPtimes{} @var{op2}}.
@end deftypefun
lets the user handle arithmetic exceptions in these functions in the same
manner as other arithmetic exceptions.
-@deftypefun void mpf_div (mpf_t @var{rop}, mpf_t @var{op1}, mpf_t @var{op2})
-@deftypefunx void mpf_ui_div (mpf_t @var{rop}, unsigned long int @var{op1}, mpf_t @var{op2})
-@deftypefunx void mpf_div_ui (mpf_t @var{rop}, mpf_t @var{op1}, unsigned long int @var{op2})
+@deftypefun void mpf_div (mpf_t @var{rop}, const mpf_t @var{op1}, const mpf_t @var{op2})
+@deftypefunx void mpf_ui_div (mpf_t @var{rop}, unsigned long int @var{op1}, const mpf_t @var{op2})
+@deftypefunx void mpf_div_ui (mpf_t @var{rop}, const mpf_t @var{op1}, unsigned long int @var{op2})
@cindex Division functions
Set @var{rop} to @var{op1}/@var{op2}.
@end deftypefun
-@deftypefun void mpf_sqrt (mpf_t @var{rop}, mpf_t @var{op})
+@deftypefun void mpf_sqrt (mpf_t @var{rop}, const mpf_t @var{op})
@deftypefunx void mpf_sqrt_ui (mpf_t @var{rop}, unsigned long int @var{op})
@cindex Root extraction functions
Set @var{rop} to @m{\sqrt{@var{op}}, the square root of @var{op}}.
@end deftypefun
-@deftypefun void mpf_pow_ui (mpf_t @var{rop}, mpf_t @var{op1}, unsigned long int @var{op2})
+@deftypefun void mpf_pow_ui (mpf_t @var{rop}, const mpf_t @var{op1}, unsigned long int @var{op2})
@cindex Exponentiation functions
@cindex Powering functions
Set @var{rop} to @m{@var{op1}^{op2}, @var{op1} raised to the power @var{op2}}.
@end deftypefun
-@deftypefun void mpf_neg (mpf_t @var{rop}, mpf_t @var{op})
+@deftypefun void mpf_neg (mpf_t @var{rop}, const mpf_t @var{op})
Set @var{rop} to @minus{}@var{op}.
@end deftypefun
-@deftypefun void mpf_abs (mpf_t @var{rop}, mpf_t @var{op})
+@deftypefun void mpf_abs (mpf_t @var{rop}, const mpf_t @var{op})
Set @var{rop} to the absolute value of @var{op}.
@end deftypefun
-@deftypefun void mpf_mul_2exp (mpf_t @var{rop}, mpf_t @var{op1}, mp_bitcnt_t @var{op2})
+@deftypefun void mpf_mul_2exp (mpf_t @var{rop}, const mpf_t @var{op1}, mp_bitcnt_t @var{op2})
Set @var{rop} to @m{@var{op1} \times 2^{op2}, @var{op1} times 2 raised to
@var{op2}}.
@end deftypefun
-@deftypefun void mpf_div_2exp (mpf_t @var{rop}, mpf_t @var{op1}, mp_bitcnt_t @var{op2})
+@deftypefun void mpf_div_2exp (mpf_t @var{rop}, const mpf_t @var{op1}, mp_bitcnt_t @var{op2})
Set @var{rop} to @m{@var{op1}/2^{op2}, @var{op1} divided by 2 raised to
@var{op2}}.
@end deftypefun
@cindex Float comparison functions
@cindex Comparison functions
-@deftypefun int mpf_cmp (mpf_t @var{op1}, mpf_t @var{op2})
-@deftypefunx int mpf_cmp_d (mpf_t @var{op1}, double @var{op2})
-@deftypefunx int mpf_cmp_ui (mpf_t @var{op1}, unsigned long int @var{op2})
-@deftypefunx int mpf_cmp_si (mpf_t @var{op1}, signed long int @var{op2})
+@deftypefun int mpf_cmp (const mpf_t @var{op1}, const mpf_t @var{op2})
+@deftypefunx int mpf_cmp_d (const mpf_t @var{op1}, double @var{op2})
+@deftypefunx int mpf_cmp_ui (const mpf_t @var{op1}, unsigned long int @var{op2})
+@deftypefunx int mpf_cmp_si (const mpf_t @var{op1}, signed long int @var{op2})
Compare @var{op1} and @var{op2}. Return a positive value if @math{@var{op1} >
@var{op2}}, zero if @math{@var{op1} = @var{op2}}, and a negative value if
@math{@var{op1} < @var{op2}}.
a NaN.
@end deftypefun
-@deftypefun int mpf_eq (mpf_t @var{op1}, mpf_t @var{op2}, mp_bitcnt_t op3)
+@deftypefun int mpf_eq (const mpf_t @var{op1}, const mpf_t @var{op2}, mp_bitcnt_t op3)
Return non-zero if the first @var{op3} bits of @var{op1} and @var{op2} are
equal, zero otherwise. I.e., test if @var{op1} and @var{op2} are approximately
equal.
really just one ulp off, and should be considered equal.
@end deftypefun
-@deftypefun void mpf_reldiff (mpf_t @var{rop}, mpf_t @var{op1}, mpf_t @var{op2})
+@deftypefun void mpf_reldiff (mpf_t @var{rop}, const mpf_t @var{op1}, const mpf_t @var{op2})
Compute the relative difference between @var{op1} and @var{op2} and store the
result in @var{rop}. This is @math{@GMPabs{@var{op1}-@var{op2}}/@var{op1}}.
@end deftypefun
-@deftypefn Macro int mpf_sgn (mpf_t @var{op})
+@deftypefn Macro int mpf_sgn (const mpf_t @var{op})
@cindex Sign tests
@cindex Float sign tests
Return @math{+1} if @math{@var{op} > 0}, 0 if @math{@var{op} = 0}, and
@math{-1} if @math{@var{op} < 0}.
-This function is actually implemented as a macro. It evaluates its arguments
+This function is actually implemented as a macro. It evaluates its argument
multiple times.
@end deftypefn
See also @ref{Formatted Output} and @ref{Formatted Input}.
-@deftypefun size_t mpf_out_str (FILE *@var{stream}, int @var{base}, size_t @var{n_digits}, mpf_t @var{op})
+@deftypefun size_t mpf_out_str (FILE *@var{stream}, int @var{base}, size_t @var{n_digits}, const mpf_t @var{op})
Print @var{op} to @var{stream}, as a string of digits. Return the number of
bytes written, or if an error occurred, return 0.
Return the number of bytes read, or if an error occurred, return 0.
@end deftypefun
-@c @deftypefun void mpf_out_raw (FILE *@var{stream}, mpf_t @var{float})
+@c @deftypefun void mpf_out_raw (FILE *@var{stream}, const mpf_t @var{float})
@c Output @var{float} on stdio stream @var{stream}, in raw binary
@c format. The float is written in a portable format, with 4 bytes of
@c size information, and that many bytes of limbs. Both the size and the
@cindex Miscellaneous float functions
@cindex Float miscellaneous functions
-@deftypefun void mpf_ceil (mpf_t @var{rop}, mpf_t @var{op})
-@deftypefunx void mpf_floor (mpf_t @var{rop}, mpf_t @var{op})
-@deftypefunx void mpf_trunc (mpf_t @var{rop}, mpf_t @var{op})
+@deftypefun void mpf_ceil (mpf_t @var{rop}, const mpf_t @var{op})
+@deftypefunx void mpf_floor (mpf_t @var{rop}, const mpf_t @var{op})
+@deftypefunx void mpf_trunc (mpf_t @var{rop}, const mpf_t @var{op})
@cindex Rounding functions
@cindex Float rounding functions
Set @var{rop} to @var{op} rounded to an integer. @code{mpf_ceil} rounds to the
to the integer towards zero.
@end deftypefun
-@deftypefun int mpf_integer_p (mpf_t @var{op})
+@deftypefun int mpf_integer_p (const mpf_t @var{op})
Return non-zero if @var{op} is an integer.
@end deftypefun
-@deftypefun int mpf_fits_ulong_p (mpf_t @var{op})
-@deftypefunx int mpf_fits_slong_p (mpf_t @var{op})
-@deftypefunx int mpf_fits_uint_p (mpf_t @var{op})
-@deftypefunx int mpf_fits_sint_p (mpf_t @var{op})
-@deftypefunx int mpf_fits_ushort_p (mpf_t @var{op})
-@deftypefunx int mpf_fits_sshort_p (mpf_t @var{op})
+@deftypefun int mpf_fits_ulong_p (const mpf_t @var{op})
+@deftypefunx int mpf_fits_slong_p (const mpf_t @var{op})
+@deftypefunx int mpf_fits_uint_p (const mpf_t @var{op})
+@deftypefunx int mpf_fits_sint_p (const mpf_t @var{op})
+@deftypefunx int mpf_fits_ushort_p (const mpf_t @var{op})
+@deftypefunx int mpf_fits_sshort_p (const mpf_t @var{op})
Return non-zero if @var{op} would fit in the respective C data type, when
truncated to an integer.
@end deftypefun
random numbers are generated when @var{max_size} is negative.
@end deftypefun
-@c @deftypefun size_t mpf_size (mpf_t @var{op})
+@c @deftypefun size_t mpf_size (const mpf_t @var{op})
@c Return the size of @var{op} measured in number of limbs. If @var{op} is
@c zero, the returned value will be zero. (@xref{Nomenclature}, for an
@c explanation of the concept @dfn{limb}.)
@var{s2n}.
@end deftypefun
-@deftypefun void mpn_neg (mp_limb_t *@var{rp}, const mp_limb_t *@var{sp}, mp_size_t @var{n})
+@deftypefun mp_limb_t mpn_neg (mp_limb_t *@var{rp}, const mp_limb_t *@var{sp}, mp_size_t @var{n})
Perform the negation of @{@var{sp}, @var{n}@}, and write the result to
@{@var{rp}, @var{n}@}. Return carry-out.
@end deftypefun
the return value is the actual number produced. Both source operands are
destroyed.
-@{@var{xp}, @var{xn}@} must have at least as many bits as @{@var{yp},
-@var{yn}@}. @{@var{yp}, @var{yn}@} must be odd. Both operands must have
-non-zero most significant limbs. No overlap is permitted between @{@var{xp},
-@var{xn}@} and @{@var{yp}, @var{yn}@}.
+It is required that @math{@var{xn} @ge @var{yn} > 0}, and the most significant
+limb of @{@var{yp}, @var{yn}@} must be non-zero. No overlap is permitted
+between @{@var{xp}, @var{xn}@} and @{@var{yp}, @var{yn}@}.
@end deftypefun
@deftypefun mp_limb_t mpn_gcd_1 (const mp_limb_t *@var{xp}, mp_size_t @var{xn}, mp_limb_t @var{ylimb})
would have been zero or non-zero.
A return value of zero indicates a perfect square. See also
-@code{mpz_perfect_square_p}.
+@code{mpn_perfect_square_p}.
@end deftypefun
@deftypefun mp_size_t mpn_get_str (unsigned char *@var{str}, int @var{base}, mp_limb_t *@var{s1p}, mp_size_t @var{s1n})
fast and has good randomness properties.
@end deftypefun
-@deftypefun void gmp_randinit_lc_2exp (gmp_randstate_t @var{state}, mpz_t @var{a}, @w{unsigned long @var{c}}, @w{mp_bitcnt_t @var{m2exp}})
+@deftypefun void gmp_randinit_lc_2exp (gmp_randstate_t @var{state}, const mpz_t @var{a}, @w{unsigned long @var{c}}, @w{mp_bitcnt_t @var{m2exp}})
@cindex Linear congruential random numbers
Initialize @var{state} with a linear congruential algorithm @m{X = (@var{a}X +
@var{c}) @bmod 2^{m2exp}, X = (@var{a}*X + @var{c}) mod 2^@var{m2exp}}.
@cindex Random number seeding
@cindex Seeding random numbers
-@deftypefun void gmp_randseed (gmp_randstate_t @var{state}, mpz_t @var{seed})
+@deftypefun void gmp_randseed (gmp_randstate_t @var{state}, const mpz_t @var{seed})
@deftypefunx void gmp_randseed_ui (gmp_randstate_t @var{state}, @w{unsigned long int @var{seed}})
Set an initial seed value into @var{state}.
-@node C++ Class Interface, BSD Compatible Functions, Formatted Input, Top
+@node C++ Class Interface, Custom Allocation, Formatted Input, Top
@chapter C++ Class Interface
@cindex C++ interface
@deftypefun {} mpz_class::mpz_class (type @var{n})
Construct an @code{mpz_class}. All the standard C++ types may be used, except
@code{long long} and @code{long double}, and all the GMP C++ classes can be
-used. Any necessary conversion follows the corresponding C function, for
-example @code{double} follows @code{mpz_set_d} (@pxref{Assigning Integers}).
+used, although conversions from @code{mpq_class} and @code{mpf_class} are
+@code{explicit}. Any necessary conversion follows the corresponding C
+function, for example @code{double} follows @code{mpz_set_d}
+(@pxref{Assigning Integers}).
@end deftypefun
@deftypefun explicit mpz_class::mpz_class (mpz_t @var{z})
exception is thrown. The same applies to @code{operator=}.
@end deftypefun
+@deftypefun mpz_class operator"" _mpz (const char *@var{str})
+With C++11 compilers, integers can be constructed with the syntax
+@code{123_mpz} which is equivalent to @code{mpz_class("123")}.
+@end deftypefun
+
@deftypefun mpz_class operator/ (mpz_class @var{a}, mpz_class @var{d})
@deftypefunx mpz_class operator% (mpz_class @var{a}, mpz_class @var{d})
Divisions involving @code{mpz_class} round towards zero, as per the
@end example
@end deftypefun
-@deftypefun mpz_class abs (mpz_class @var{op1})
+@deftypefun mpz_class abs (mpz_class @var{op})
@deftypefunx int cmp (mpz_class @var{op1}, type @var{op2})
@deftypefunx int cmp (type @var{op1}, mpz_class @var{op2})
@maybepagebreak
@deftypefunx int mpz_class::set_str (const string& @var{str}, int @var{base})
@deftypefunx int sgn (mpz_class @var{op})
@deftypefunx mpz_class sqrt (mpz_class @var{op})
+@maybepagebreak
+@deftypefunx void mpz_class::swap (mpz_class& @var{op})
+@deftypefunx void swap (mpz_class& @var{op1}, mpz_class& @var{op2})
These functions provide a C++ class interface to the corresponding GMP C
routines.
@deftypefun {} mpq_class::mpq_class (type @var{op})
@deftypefunx {} mpq_class::mpq_class (integer @var{num}, integer @var{den})
Construct an @code{mpq_class}. The initial value can be a single value of any
-type, or a pair of integers (@code{mpz_class} or standard C++ integer types)
-representing a fraction, except that @code{long long} and @code{long double}
-are not supported. For example,
+type (conversion from @code{mpf_class} is @code{explicit}), or a pair of
+integers (@code{mpz_class} or standard C++ integer types) representing a
+fraction, except that @code{long long} and @code{long double} are not
+supported. For example,
@example
mpq_class q (99);
exception is thrown. The same applies to @code{operator=}.
@end deftypefun
+@deftypefun mpq_class operator"" _mpq (const char *@var{str})
+With C++11 compilers, integral rationals can be constructed with the syntax
+@code{123_mpq} which is equivalent to @code{mpq_class(123_mpz)}. Other
+rationals can be built as @code{-1_mpq/2} or @code{0xb_mpq/123456_mpz}.
+@end deftypefun
+
@deftypefun void mpq_class::canonicalize ()
Put an @code{mpq_class} into canonical form, as per @ref{Rational Number
Functions}. All arithmetic operators require their operands in canonical
@deftypefunx int mpq_class::set_str (const char *@var{str}, int @var{base})
@deftypefunx int mpq_class::set_str (const string& @var{str}, int @var{base})
@deftypefunx int sgn (mpq_class @var{op})
+@maybepagebreak
+@deftypefunx void mpq_class::swap (mpq_class& @var{op})
+@deftypefunx void swap (mpq_class& @var{op1}, mpq_class& @var{op2})
These functions provide a C++ class interface to the corresponding GMP C
routines.
is thrown. The same applies to @code{operator=}.
@end deftypefun
+@deftypefun mpf_class operator"" _mpf (const char *@var{str})
+With C++11 compilers, floats can be constructed with the syntax
+@code{1.23e-1_mpf} which is equivalent to @code{mpf_class("1.23e-1")}.
+@end deftypefun
+
@deftypefun {mpf_class&} mpf_class::operator= (type @var{op})
Convert and store the given @var{op} value to an @code{mpf_class} object. The
same types are accepted as for the constructors above.
@deftypefunx int mpf_class::set_str (const string& @var{str}, int @var{base})
@deftypefunx int sgn (mpf_class @var{op})
@deftypefunx mpf_class sqrt (mpf_class @var{op})
+@maybepagebreak
+@deftypefunx void mpf_class::swap (mpf_class& @var{op})
+@deftypefunx void swap (mpf_class& @var{op1}, mpf_class& @var{op2})
@deftypefunx mpf_class trunc (mpf_class @var{op})
These functions provide a C++ class interface to the corresponding GMP C
routines.
to choose a good seed.
@end deftypefun
-@deftypefun mpz_class gmp_randclass::get_z_bits (unsigned long @var{bits})
+@deftypefun mpz_class gmp_randclass::get_z_bits (mp_bitcnt_t @var{bits})
@deftypefunx mpz_class gmp_randclass::get_z_bits (mpz_class @var{bits})
Generate a random integer with a specified number of bits.
@end deftypefun
@end table
-@node BSD Compatible Functions, Custom Allocation, C++ Class Interface, Top
-@comment node-name, next, previous, up
-@chapter Berkeley MP Compatible Functions
-@cindex Berkeley MP compatible functions
-@cindex BSD MP compatible functions
-
-These functions are intended to be fully compatible with the Berkeley MP
-library which is available on many BSD derived U*ix systems. The
-@samp{--enable-mpbsd} option must be used when building GNU MP to make these
-available (@pxref{Installing GMP}).
-
-The original Berkeley MP library has a usage restriction: you cannot use the
-same variable as both source and destination in a single function call. The
-compatible functions in GNU MP do not share this restriction---inputs and
-outputs may overlap.
-
-It is not recommended that new programs are written using these functions.
-Apart from the incomplete set of functions, the interface for initializing
-@code{MINT} objects is more error prone, and the @code{pow} function collides
-with @code{pow} in @file{libm.a}.
-
-@cindex @code{mp.h}
-@tindex MINT
-Include the header @file{mp.h} to get the definition of the necessary types and
-functions. If you are on a BSD derived system, make sure to include GNU
-@file{mp.h} if you are going to link the GNU @file{libmp.a} to your program.
-This means that you probably need to give the @samp{-I<dir>} option to the
-compiler, where @samp{<dir>} is the directory where you have GNU @file{mp.h}.
-
-@deftypefun {MINT *} itom (signed short int @var{initial_value})
-Allocate an integer consisting of a @code{MINT} object and dynamic limb space.
-Initialize the integer to @var{initial_value}. Return a pointer to the
-@code{MINT} object.
-@end deftypefun
-
-@deftypefun {MINT *} xtom (char *@var{initial_value})
-Allocate an integer consisting of a @code{MINT} object and dynamic limb space.
-Initialize the integer from @var{initial_value}, a hexadecimal,
-null-terminated C string. Return a pointer to the @code{MINT} object.
-@end deftypefun
-
-@deftypefun void move (MINT *@var{src}, MINT *@var{dest})
-Set @var{dest} to @var{src} by copying. Both variables must be previously
-initialized.
-@end deftypefun
-
-@deftypefun void madd (MINT *@var{src_1}, MINT *@var{src_2}, MINT *@var{destination})
-Add @var{src_1} and @var{src_2} and put the sum in @var{destination}.
-@end deftypefun
-
-@deftypefun void msub (MINT *@var{src_1}, MINT *@var{src_2}, MINT *@var{destination})
-Subtract @var{src_2} from @var{src_1} and put the difference in
-@var{destination}.
-@end deftypefun
-
-@deftypefun void mult (MINT *@var{src_1}, MINT *@var{src_2}, MINT *@var{destination})
-Multiply @var{src_1} and @var{src_2} and put the product in @var{destination}.
-@end deftypefun
-
-@deftypefun void mdiv (MINT *@var{dividend}, MINT *@var{divisor}, MINT *@var{quotient}, MINT *@var{remainder})
-@deftypefunx void sdiv (MINT *@var{dividend}, signed short int @var{divisor}, MINT *@var{quotient}, signed short int *@var{remainder})
-Set @var{quotient} to @var{dividend}/@var{divisor}, and @var{remainder} to
-@var{dividend} mod @var{divisor}. The quotient is rounded towards zero; the
-remainder has the same sign as the dividend unless it is zero.
-
-Some implementations of these functions work differently---or not at all---for
-negative arguments.
-@end deftypefun
-
-@deftypefun void msqrt (MINT *@var{op}, MINT *@var{root}, MINT *@var{remainder})
-Set @var{root} to @m{\lfloor\sqrt{@var{op}}\rfloor, the truncated integer part
-of the square root of @var{op}}, like @code{mpz_sqrt}. Set @var{remainder} to
-@m{(@var{op} - @var{root}^2), @var{op}@minus{}@var{root}*@var{root}}, i.e.
-zero if @var{op} is a perfect square.
-
-If @var{root} and @var{remainder} are the same variable, the results are
-undefined.
-@end deftypefun
-
-@deftypefun void pow (MINT *@var{base}, MINT *@var{exp}, MINT *@var{mod}, MINT *@var{dest})
-Set @var{dest} to (@var{base} raised to @var{exp}) modulo @var{mod}.
-
-Note that the name @code{pow} clashes with @code{pow} from the standard C math
-library (@pxref{Exponents and Logarithms,, Exponentiation and Logarithms,
-libc, The GNU C Library Reference Manual}). An application will only be able
-to use one or the other.
-@end deftypefun
-
-@deftypefun void rpow (MINT *@var{base}, signed short int @var{exp}, MINT *@var{dest})
-Set @var{dest} to @var{base} raised to @var{exp}.
-@end deftypefun
-
-@deftypefun void gcd (MINT *@var{op1}, MINT *@var{op2}, MINT *@var{res})
-Set @var{res} to the greatest common divisor of @var{op1} and @var{op2}.
-@end deftypefun
-
-@deftypefun int mcmp (MINT *@var{op1}, MINT *@var{op2})
-Compare @var{op1} and @var{op2}. Return a positive value if @var{op1} >
-@var{op2}, zero if @var{op1} = @var{op2}, and a negative value if @var{op1} <
-@var{op2}.
-@end deftypefun
-
-@deftypefun void min (MINT *@var{dest})
-Input a decimal string from @code{stdin}, and put the read integer in
-@var{dest}. SPC and TAB are allowed in the number string, and are ignored.
-@end deftypefun
-
-@deftypefun void mout (MINT *@var{src})
-Output @var{src} to @code{stdout}, as a decimal string. Also output a newline.
-@end deftypefun
-
-@deftypefun {char *} mtox (MINT *@var{op})
-Convert @var{op} to a hexadecimal string, and return a pointer to the string.
-The returned string is allocated using the default memory allocation function,
-@code{malloc} by default. It will be @code{strlen(str)+1} bytes, that being
-exactly enough for the string and null-terminator.
-@end deftypefun
-
-@deftypefun void mfree (MINT *@var{op})
-De-allocate, the space used by @var{op}. @strong{This function should only be
-passed a value returned by @code{itom} or @code{xtom}.}
-@end deftypefun
-
-
-@node Custom Allocation, Language Bindings, BSD Compatible Functions, Top
+@node Custom Allocation, Language Bindings, C++ Class Interface, Top
@comment node-name, next, previous, up
@chapter Custom Allocation
@cindex Custom allocation
Alternate functions can be specified, to allocate memory in a different way or
to have a different error action on running out of memory.
-This feature is available in the Berkeley compatibility library (@pxref{BSD
-Compatible Functions}) as well as the main GMP library.
-
@deftypefun void mp_set_memory_functions (@* void *(*@var{alloc_func_ptr}) (size_t), @* void *(*@var{realloc_func_ptr}) (void *, size_t, size_t), @* void (*@var{free_func_ptr}) (void *, size_t))
Replace the current allocation functions from the arguments. If an argument
is @code{NULL}, the corresponding default function is used.
divisions saved. When @math{d} is a single limb some simplifications arise,
providing good speedups on a number of processors.
-@code{mpn_divexact_by3}, @code{mpn_modexact_1_odd} and the @code{mpn_redc_X}
-functions differ subtly in how they return @math{r}, leading to some negations
-in the above formula, but all are essentially the same.
+The functions @code{mpn_divexact_by3}, @code{mpn_modexact_1_odd} and the
+internal @code{mpn_redc_X} functions differ subtly in how they return @math{r},
+leading to some negations in the above formula, but all are essentially the
+same.
@cindex Divisibility algorithm
@cindex Congruence algorithm
@subsection Jacobi Symbol
@cindex Jacobi symbol algorithm
+[This section is obsolete. The current Jacobi code actually uses a very
+efficient algorithm.]
+
@code{mpz_jacobi} and @code{mpz_kronecker} are currently implemented with a
simple binary algorithm similar to that described for the GCDs (@pxref{Binary
GCD}). They're not very fast when both inputs are large. Lehmer's multi-step
made to minimize the average number of multiplications that must supplement
the squaring.
-The modular multiplies and squares use either a simple division or the REDC
+The modular multiplies and squarings use either a simple division or the REDC
method by Montgomery (@pxref{References}). REDC is a little faster,
essentially saving N single limb divisions in a fashion similar to an exact
remainder (@pxref{Exact Remainder}).
@subsection Factorial
@cindex Factorial algorithm
-Factorials are calculated by a combination of removal of twos, powering, and
-binary splitting. The procedure can be best illustrated with an example,
+Factorials are calculated by a combination of two algorithms. An idea is
+shared among them: to compute the odd part of the factorial; a final step
+takes account of the power of @math{2} term, by shifting.
+
+For small @math{n}, the odd factor of @math{n!} is computed with the simple
+observation that it is equal to the product of all positive odd numbers
+smaller than @math{n} times the odd factor of @m{\lfloor n/2\rfloor!, [n/2]!},
+where @m{\lfloor x\rfloor, [x]} is the integer part of @math{x}, and so on
+recursively. The procedure can be best illustrated with an example,
@quotation
-@math{23! = 1.2.3.4.5.6.7.8.9.10.11.12.13.14.15.16.17.18.19.20.21.22.23}
+@math{23! = (23.21.19.17.15.13.11.9.7.5.3)(11.9.7.5.3)(5.3)2^{19}}
@end quotation
-@noindent
-has factors of two removed,
+Current code collects all the factors in a single list, with a loop and no
+recursion, and compute the product, with no special care for repeated chunks.
-@quotation
-@math{23! = 2^{19}.1.1.3.1.5.3.7.1.9.5.11.3.13.7.15.1.17.9.19.5.21.11.23}
-@end quotation
+When @math{n} is larger, computation pass trough prime sieving. An helper
+function is used, as suggested by Peter Luschny:
+@tex
+$$\mathop{\rm msf}(n) = {n!\over\lfloor n/2\rfloor!^2\cdot2^k} = \prod_{p=3}^{n}
+p^{\mathop{\rm L}(p,n)} $$
+@end tex
+@ifnottex
-@noindent
-and the resulting terms collected up according to their multiplicity,
+@example
+ n
+ -----
+ n! | | L(p,n)
+msf(n) = -------------- = | | p
+ [n/2]!^2.2^k p=3
+@end example
+@end ifnottex
-@quotation
-@math{23! = 2^{19}.(3.5)^3.(7.9.11)^2.(13.15.17.19.21.23)}
-@end quotation
+Where @math{p} ranges on odd prime numbers. The exponent @math{k} is chosen to
+obtain an odd integer number: @math{k} is the number of 1 bits in the binary
+representation of @m{\lfloor n/2\rfloor, [n/2]}. The function L@math{(p,n)}
+can be defined as zero when @math{p} is composite, and, for any prime
+@math{p}, it is computed with:
+@tex
+$$\mathop{\rm L}(p,n) = \sum_{i>0}\left\lfloor{n\over p^i}\right\rfloor\bmod2
+\leq\log_p(n)$$
+@end tex
+@ifnottex
-Each sequence such as @math{13.15.17.19.21.23} is evaluated by splitting into
-every second term, as for instance @math{(13.17.21).(15.19.23)}, and the same
-recursively on each half. This is implemented iteratively using some bit
-twiddling.
+@example
+ ---
+ \ n
+L(p,n) = / [---] mod 2 <= log (n) .
+ --- p^i p
+ i>0
+@end example
+@end ifnottex
+
+With this helper function, we are able to compute the odd part of @math{n!}
+using the recursion implied by @m{n!=\lfloor n/2\rfloor!^2\cdot\mathop{\rm
+msf}(n)\cdot2^k , n!=[n/2]!^2*msf(n)*2^k}. The recursion stops using the
+small-@math{n} algorithm on some @m{\lfloor n/2^i\rfloor, [n/2^i]}.
+
+Both the above algorithms use binary splitting to compute the product of many
+small factors. At first as many products as possible are accumulated in a
+single register, generating a list of factors that fit in a machine word. This
+list is then split into halves, and the product is computed recursively.
Such splitting is more efficient than repeated N@cross{}1 multiplies since it
forms big multiplies, allowing Karatsuba and higher algorithms to be used.
And even below the Karatsuba threshold a big block of work can be more
efficient for the basecase algorithm.
-Splitting into subsequences of every second term keeps the resulting products
-more nearly equal in size than would the simpler approach of say taking the
-first half and second half of the sequence. Nearly equal products are more
-efficient for the current multiply implementation.
-
@node Binomial Coefficients Algorithm, Fibonacci Numbers Algorithm, Factorial Algorithm, Other Algorithms
@subsection Binomial Coefficients
@cindex Linear congruential algorithm
Linear congruential generators are described in many text books, for instance
Knuth volume 2 (@pxref{References}). With a modulus @math{M} and parameters
-@math{A} and @math{C}, a integer state @math{S} is iterated by the formula
+@math{A} and @math{C}, an integer state @math{S} is iterated by the formula
@math{S @leftarrow{} A@GMPmultiply{}S+C @bmod{} M}. At each step the new
state is a linear function of the previous, mod @math{M}, hence the name of
the generator.
Pedro Gimeno implemented the Mersenne Twister and made other random number
improvements.
-Niels M@"oller wrote the sub-quadratic GCD and extended GCD code, the
+Niels M@"oller wrote the sub-quadratic GCD, extended GCD and jacobi code, the
quadratic Hensel division code, and (with Torbj@"orn) the new divide and
conquer division code for GMP 4.3. Niels also helped implement the new Toom
multiply code for GMP 4.3 and implemented helper functions to simplify Toom
-evaluations for GMP 5.0. He wrote the original version of mpn_mulmod_bnm1.
+evaluations for GMP 5.0. He wrote the original version of mpn_mulmod_bnm1, and
+he is the main author of the mini-gmp package used for gmp bootstrapping.
Alberto Zanoni and Marco Bodrato suggested the unbalanced multiply strategy,
and found the optimal strategies for evaluation and interpolation in Toom
Marco Bodrato helped implement the new Toom multiply code for GMP 4.3 and
implemented most of the new Toom multiply and squaring code for 5.0.
He is the main author of the current mpn_mulmod_bnm1 and mpn_mullo_n. Marco
-also wrote the functions mpn_invert and mpn_invertappr.
+also wrote the functions mpn_invert and mpn_invertappr. He is the author of
+the current combinatorial functions: binomial, factorial, multifactorial,
+primorial.
David Harvey suggested the internal function @code{mpn_bdiv_dbm1}, implementing
division relevant to Toom multiplication. He also worked on fast assembly
-sequences, in particular on a fast AMD64 @code{mpn_mul_basecase}.
+sequences, in particular on a fast AMD64 @code{mpn_mul_basecase}. He wrote
+the internal middle product functions @code{mpn_mulmid_basecase},
+@code{mpn_toom42_mulmid}, @code{mpn_mulmid_n} and related helper routines.
Martin Boij wrote @code{mpn_perfect_power_p}.
+Marc Glisse improved @file{gmpxx.h}: use fewer temporaries (faster),
+specializations of @code{numeric_limits} and @code{common_type}, C++11
+features (move constructors, explicit bool conversion, UDL), make the
+conversion from @code{mpq_class} to @code{mpz_class} explicit, optimize
+operations where one argument is a small compile-time constant, replace
+some heap allocations by stack allocations. He also fixed the eofbit
+handling of C++ streams, and removed one division from @file{mpq/aors.c}.
+
(This list is chronological, not ordered after significance. If you have
contributed to GMP but are not listed above, please tell
@email{gmp-devel@@gmplib.org} about the omission!)
<font size=-1>
<pre>
-Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2009 Free Software
-Foundation, Inc.
+Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2009, 2010, 2011
+Free Software Foundation, Inc.
This file is part of the GNU MP Library.
<hr>
<!-- NB. timestamp updated automatically by emacs -->
- This file current as of 15 Nov 2009. An up-to-date version is available at
+ This file current as of 5 Dec 2011. An up-to-date version is available at
<a href="http://gmplib.org/projects.html">http://gmplib.org/projects.html</a>.
Please send comments about this page to gmp-devel<font>@</font>gmplib.org.
<ul>
<li> <strong>Faster multiplication</strong>
- <p> The current multiplication code uses Karatsuba, 3-way and 4-way Toom, and
- Fermat FFT. Several new developments are desirable:
-
<ol>
- <li> Write more toom multiply functions for unbalanced operands. We now have
- toom22, toom32, toom42, toom62, toom33, toom53, and toom44. Most
- desirable is toom43, which will require a new toom_interpolate_6pts
- function. Writing toom52 will then be straightforward. See also
- <a href="http://bodrato.it/software/toom.html">Marco Bodrato's
- site</a>
-
- <li> Perhaps consider N-way Toom, N > 4. See Knuth's Seminumerical
- Algorithms for details on the method, as well as Bodrato's site. Code
- implementing it exists. This is asymptotically inferior to FFTs, but
- is finer grained.
-
- <li> The mpn_mul call now (from GMP 4.3) uses toom22, toom32, and toom42
- for unbalanced operations. We don't use any of the other new toom
- functions currently. Write new clever code for choosing the best toom
- function from an m-limb and an n-limb operand.
+ <li> Work on the algorithm selection code for unbalanced multiplication.
<li> Implement an FFT variant computing the coefficients mod m different
limb size primes of the form l*2^k+1. i.e., compute m separate FFTs.
<p> [We now have two implementations of this algorithm, one by Tommy
Färnqvist and one by Niels Möller.]
- <li> Add support for short products, either a given number of low limbs, a
- given number of high limbs, or perhaps the middle limbs of the result.
- High short product can be used by <code>mpf_mul</code>, by
- left-to-right Newton approximations, and for quotient approximation.
- Low half short product can be of use in sub-quadratic REDC and for
- right-to-left Newton approximations. On small sizes a short product
- will be faster simply through fewer cross-products, similar to the way
- squaring is faster. But work by Thom Mulders shows that for Karatsuba
- and higher order algorithms the advantage is progressively lost, so
- for large sizes shows products turn out to be no faster.
+ <li> Work on short products. Our mullo and mulmid are probably K, but we
+ lack mulhi.
</ol>
<p> Please make sure your new routines are fast for these three situations:
<ol>
- <li> Operands that fit into the cache.
<li> Small operands of less than, say, 10 limbs.
+ <li> Medium size operands, that fit into the cache.
<li> Huge operands that does not fit into the cache.
</ol>
21-bit pieces if one allows the split operands to be negative!)
-<li> <strong>Math functions for the mpf layer</strong>
-
- <p> Implement the functions of math.h for the GMP mpf layer! Check the book
- "Pi and the AGM" by Borwein and Borwein for ideas how to do this. These
- functions are desirable: acos, acosh, asin, asinh, atan, atanh, atan2,
- cos, cosh, exp, log, log10, pow, sin, sinh, tan, tanh.
-
- <p> Note that the <a href="http://mpfr.org">mpfr</a> functions already
- provide these functions, and that we usually recommend new programs to use
- mpfr instead of mpf.
-
-
<li> <strong>Faster sqrt</strong>
<p> The current code uses divisions, which are reasonably fast, but it'd be
<li> <strong>Nth root</strong>
- <p> Improve mpn_rootrem. The current code is not too bad, but its average
- time complexity is a function of the input, while it is possible to
- make it a function of the output.
+ <p> Improve mpn_rootrem. The current code is not too bad, but its time
+ complexity is a function of the input, while it is possible to make
+ the <i>average</i> complexity a function of the output.
+
+
+<li> <strong>Fat binaries</strong>
+
+ <p> Add more functions to the set of fat functions.
+
+ <p> The speed of multipliciaton is today highly dependent on combination
+ functions like <code>addlsh1_n</code>. A fat binary will never use any such
+ functions, since they are classified as optional. Ideally, we should use
+ them, but making the current compile-time selections of optional functions
+ become run-time selections for fat binaries.
+
+ <p> If we make fat binaries work really well, we should move away frm tehe
+ current configure scheme (at least by default) and instead include all code
+ always.
<li> <strong>Exceptions</strong>
<code>gmp_restrict</code>.
-<li> <strong>Nx1 Division</strong>
-
- <p> The limb-by-limb dependencies in the existing Nx1 division (and
- remainder) code means that chips with multiple execution units or
- pipelined multipliers are not fully utilized.
-
- <p> One possibility is to follow the current preinv method but taking two
- limbs at a time. That means a 2x2->4 and a 2x1->2 multiply for
- each two limbs processed, and because the 2x2 and 2x1 can each be done in
- parallel the latency will be not much more than 2 multiplies for two
- limbs, whereas the single limb method has a 2 multiply latency for just
- one limb. A version of <code>mpn_divrem_1</code> doing this has been
- written in C, but not yet tested on likely chips. Clearly this scheme
- would extend to 3x3->9 and 3x1->3 etc, though with diminishing
- returns.
-
- <p> For <code>mpn_mod_1</code>, Peter L. Montgomery proposes the following
- scheme. For a limb R=2^<code>bits_per_mp_limb</code>, pre-calculate
- values R mod N, R^2 mod N, R^3 mod N, R^4 mod N. Then take dividend
- limbs and multiply them by those values, thereby reducing them (moving
- them down) by the corresponding factor. The products can be added to
- produce an intermediate remainder of 2 or 3 limbs to be similarly
- included in the next step. The point is that such multiplies can be done
- in parallel, meaning as little as 1 multiply worth of latency for 4
- limbs. If the modulus N is less than R/4 (or is it R/5?) the summed
- products will fit in 2 limbs, otherwise 3 will be required, but with the
- high only being small. Clearly this extends to as many factors of R as a
- chip can efficiently apply.
-
- <p> The logical conclusion for powers R^i is a whole array "p[i] = R^i mod N"
- for i up to k, the size of the dividend. This could then be applied at
- multiplier throughput speed like an inner product. If the powers took
- roughly k divide steps to calculate then there'd be an advantage any time
- the same N was used three or more times. Suggested by Victor Shoup in
- connection with chinese-remainder style decompositions, but perhaps with
- other uses.
-
- <p> <code>mpn_modexact_1_odd</code> calculates an x in the range 0<=x<d
- satisfying a = q*d + x*b^n, where b=2^bits_per_limb. The factor b^n
- needed to get the true remainder r could be calculated by a powering
- algorithm, allowing <code>mpn_modexact_1_odd</code> to be pressed into
- service for an <code>mpn_mod_1</code>. <code>modexact_1</code> is
- simpler and on some chips can run noticeably faster than plain
- <code>mod_1</code>, on Athlon for instance 11 cycles/limb instead of 17.
- Such a difference could soon overcome the time to calculate b^n. The
- requirement for an odd divisor in <code>modexact</code> can be handled by
- some shifting on-the-fly, or perhaps by an extra partial-limb step at the
- end.
-
<li> <strong>Factorial</strong>
- <p> The removal of twos in the current code could be extended to factors of 3
- or 5. Taking this to its logical conclusion would be a complete
- decomposition into powers of primes. The power for a prime p is of
- course floor(n/p)+floor(n/p^2)+... Conrad Curry found this is quite fast
- (using simultaneous powering as per Handbook of Applied Cryptography
- algorithm 14.88).
-
- <p> A difficulty with using all primes is that quite large n can be
- calculated on a system with enough memory, larger than we'd probably want
- for a table of primes, so some sort of sieving would be wanted. Perhaps
- just taking out the factors of 3 and 5 would give most of the speedup
- that a prime decomposition can offer.
+ <p> Rewrite for simplicty and speed. Work is in progress.
<li> <strong>Binomial Coefficients</strong>
- <p> An obvious improvement to the current code would be to strip factors of 2
- from each multiplier and divisor and count them separately, to be applied
- with a bit shift at the end. Factors of 3 and perhaps 5 could even be
- handled similarly.
-
- <p> Conrad Curry reports a big speedup for binomial coefficients using a
- prime powering scheme, at least for k near n/2. Of course this is only
- practical for moderate size n since again it requires primes up to n.
-
- <p> When k is small the current (n-k+1)...n/1...k will be fastest. Some sort
- of rule would be needed for when to use this or when to use prime
- powering. Such a rule will be a function of both n and k. Some
- investigation is needed to see what sort of shape the crossover line will
- have, the usual parameter tuning can of course find machine dependent
- constants to fill in where necessary.
-
- <p> An easier possibility also reported by Conrad Curry is that it may be
- faster not to divide out the denominator (1...k) one-limb at a time, but
- do one big division at the end. Is this because a big divisor in
- <code>mpn_bdivmod</code> trades the latency of
- <code>mpn_divexact_1</code> for the throughput of
- <code>mpn_submul_1</code>? Overheads must hurt though.
-
- <p> Another reason a big divisor might help is that
- <code>mpn_divexact_1</code> won't be getting a full limb in
- <code>mpz_bin_uiui</code>. It's called when the n accumulator is full
- but the k may be far from full. Perhaps the two could be decoupled so k
- is applied when full. It'd be necessary to delay consideration of k
- terms until the corresponding n terms had been applied though, since
- otherwise the division won't be exact.
-
-
-<li> <strong>Perfect Power Testing</strong>
-
- <p> <code>mpz_perfect_power_p</code> could be improved in a number of ways,
- for instance p-adic arithmetic to find possible roots.
-
- <p> Non-powers can be quickly identified by checking for Nth power residues
- modulo small primes, like <code>mpn_perfect_square_p</code> does for
- squares. The residues to each power N for a given remainder could be
- grouped into a bit mask, the masks for the remainders to each divisor
- would then be "and"ed together to hopefully leave only a few candidate
- powers. Need to think about how wide to make such masks, ie. how many
- powers to examine in this way.
-
- <p> Any zero remainders found in residue testing reveal factors which can be
- divided out, with the multiplicity restricting the powers that need to be
- considered, as per the current code. Further prime dividing should be
- grouped into limbs like <code>PP</code>. Need to think about how much
- dividing to do like that, probably more for bigger inputs, less for
- smaller inputs.
-
- <p> <code>mpn_gcd_1</code> would probably be better than the current private
- GCD routine. The use it's put to isn't time-critical, and it might help
- ensure correctness to just use the main GCD routine.
-
- <p> [There is work-in-progress with a very fast function.]
+ <p> Rewrite for simplicty and speed. Work is in progress.
<li> <strong>Prime Testing</strong>
selecting public symbols (used now for libmp).
+<li> <strong>Math functions for the mpf layer</strong>
+
+ <p> Implement the functions of math.h for the GMP mpf layer! Check the book
+ "Pi and the AGM" by Borwein and Borwein for ideas how to do this. These
+ functions are desirable: acos, acosh, asin, asinh, atan, atanh, atan2,
+ cos, cosh, exp, log, log10, pow, sin, sinh, tan, tanh.
+
+ <p> Note that the <a href="http://mpfr.org">mpfr</a> functions already
+ provide these functions, and that we usually recommend new programs to use
+ mpfr instead of mpf.
</ul>
<hr>
-@set UPDATED 6 May 2012
-@set UPDATED-MONTH May 2012
-@set EDITION 5.0.5
-@set VERSION 5.0.5
+@set UPDATED 30 September 2013
+@set UPDATED-MONTH September 2013
+@set EDITION 5.1.3
+@set VERSION 5.1.3
<hr>
<!-- NB. timestamp updated automatically by emacs -->
- This file current as of 28 Dec 2009. An up-to-date version is available at
+ This file current as of 5 Dec 2011. An up-to-date version is available at
<a href="http://gmplib.org/tasks.html">http://gmplib.org/tasks.html</a>.
Please send comments about this page to gmp-devel<font>@</font>gmplib.org.
<code>_mpz_realloc</code> with a small (1 limb) size.
<li> One reuse case is missing from mpX/tests/reuse.c:
<code>mpz_XXX(a,a,a)</code>.
-<li> When printing <code>mpf_t</code> numbers with exponents >2^53 on
- machines with 64-bit <code>mp_exp_t</code>, the precision of
- <code>__mp_bases[base].chars_per_bit_exactly</code> is insufficient and
- <code>mpf_get_str</code> aborts. Detect and compensate. Alternately,
- think seriously about using some sort of fixed-point integer value.
- Avoiding unnecessary floating point is probably a good thing in general,
- and it might be faster on some CPUs.
<li> Make the string reading functions allow the `0x' prefix when the base is
explicitly 16. They currently only allow that prefix when the base is
unspecified (zero).
subsequent operations, especially if the value is otherwise only small.
If low bits of the low limb are zero, use <code>mpn_rshift</code> so as
to not increase the size.
-<li> <code>mpn_dc_sqrtrem</code>: Don't use <code>mpn_addmul_1</code> with
- multiplier==2, instead either <code>mpn_addlsh1_n</code> when available,
- or <code>mpn_lshift</code>+<code>mpn_add_n</code> if not.
<li> <code>mpn_dc_sqrtrem</code>, <code>mpn_sqrtrem2</code>: Don't use
<code>mpn_add_1</code> and <code>mpn_sub_1</code> for 1 limb operations,
instead <code>ADDC_LIMB</code> and <code>SUBC_LIMB</code>.
aliasing between <code>sp</code> and <code>rp</code>.
<li> <code>mpn_sqrtrem</code>: Some work can be saved in the last step when
the remainder is not required, as noted in Paul's paper.
-<li> <code>mpq_add</code>, <code>mpq_add</code>: The division "op1.den / gcd"
- is done twice, where of course only once is necessary. Reported by Larry
- Lambe.
<li> <code>mpq_add</code>, <code>mpq_sub</code>: The gcd fits a single limb
- with high probability and in this case <code>modlimb_invert</code> could
+ with high probability and in this case <code>binvert_limb</code> could
be used to calculate the inverse just once for the two exact divisions
"op1.den / gcd" and "op2.den / gcd", rather than letting
- <code>mpn_divexact_1</code> do it each time. This would require a new
- <code>mpn_preinv_divexact_1</code> interface. Not sure if it'd be worth
- the trouble.
-<li> <code>mpq_add</code>, <code>mpq_sub</code>: The use of
- <code>mpz_mul(x,y,x)</code> causes temp allocation or copying in
- <code>mpz_mul</code> which can probably be avoided. A rewrite using
- <code>mpn</code> might be best.
+ <code>mpn_bdiv_q_1</code> do it each time. This would require calling
+ <code>mpn_pi1_bdiv_q_1</code>.
<li> <code>mpn_gcdext</code>: Don't test <code>count_leading_zeros</code> for
zero, instead check the high bit of the operand and avoid invoking
<code>count_leading_zeros</code>. This is an optimization on all
since there's no apparent way to get <code>SHRT_MAX</code> with an
expression (since <code>short</code> and <code>unsigned short</code> can
be different sizes).
-<li> <code>mpz_powm</code> and <code>mpz_powm_ui</code> aren't very
- fast on one or two limb moduli, due to a lot of function call
- overheads. These could perhaps be handled as special cases.
-<li> <code>mpz_powm</code> and <code>mpz_powm_ui</code> want better
- algorithm selection, and the latter should use REDC. Both could
- change to use an <code>mpn_powm</code> and <code>mpn_redc</code>.
+<li> <code>mpz_powm</code> and <code>mpz_powm_ui</code> aren't very fast on one
+ or two limb moduli, due to a lot of function call overheads. These could
+ perhaps be handled as special cases.
+<li> Make sure <code>mpz_powm_ui</code> is never slower than the corresponding
+ computation using <code>mpz_powm</code>.
<li> <code>mpz_powm</code> REDC should do multiplications by <code>g[]</code>
using the division method when they're small, since the REDC form of a
small multiplier is normally a full size product. Probably would need a
new tuned parameter to say what size multiplier is "small", as a function
of the size of the modulus.
-<li> <code>mpz_powm</code> REDC should handle even moduli if possible. Maybe
- this would mean for m=n*2^k doing mod n using REDC and an auxiliary
- calculation mod 2^k, then putting them together at the end.
-<li> <code>mpn_gcd</code> might be able to be sped up on small to
- moderate sizes by improving <code>find_a</code>, possibly just by
- providing an alternate implementation for CPUs with slowish
+<li> <code>mpn_gcd</code> might be able to be sped up on small to moderate
+ sizes by improving <code>find_a</code>, possibly just by providing an
+ alternate implementation for CPUs with slowish
<code>count_leading_zeros</code>.
-<li> Toom3 could use a low to high cache localized evaluate and interpolate.
- The necessary <code>mpn_divexact_by3c</code> exists.
<li> <code>mpf_set_str</code> produces low zero limbs when a string has a
fraction but is exactly representable, eg. 0.5 in decimal. These could be
stripped to save work in later operations.
<li> UltraSPARC/32: <code>mpn_divexact_by3c</code> can work 64-bits at a time
using <code>mulx</code>, in assembler. This would be the same as for
sparc64.
-<li> UltraSPARC: <code>modlimb_invert</code> might save a few cycles from
+<li> UltraSPARC: <code>binvert_limb</code> might save a few cycles from
masking down to just the useful bits at each point in the calculation,
since <code>mulx</code> speed depends on the highest bit set. Either
explicit masks or small types like <code>short</code> and
-@set UPDATED 6 May 2012
-@set UPDATED-MONTH May 2012
-@set EDITION 5.0.5
-@set VERSION 5.0.5
+@set UPDATED 30 September 2013
+@set UPDATED-MONTH September 2013
+@set EDITION 5.1.3
+@set VERSION 5.1.3
+++ /dev/null
-/* dumbmp mini GMP compatible library.
-
-Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-
-/* The code here implements a subset (a very limited subset) of the main GMP
- functions. It's designed for use in a few build-time calculations and
- will be slow, but highly portable.
-
- None of the normal GMP configure things are used, nor any of the normal
- gmp.h or gmp-impl.h. To use this file in a program just #include
- "dumbmp.c".
-
- ANSI function definitions can be used here, since ansi2knr is run if
- necessary. But other ANSI-isms like "const" should be avoided.
-
- mp_limb_t here is an unsigned long, since that's a sensible type
- everywhere we know of, with 8*sizeof(unsigned long) giving the number of
- bits in the type (that not being true for instance with int or short on
- Cray vector systems.)
-
- Only the low half of each mp_limb_t is used, so as to make carry handling
- and limb multiplies easy. GMP_LIMB_BITS is the number of bits used. */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-
-typedef unsigned long mp_limb_t;
-
-typedef struct {
- int _mp_alloc;
- int _mp_size;
- mp_limb_t *_mp_d;
-} mpz_t[1];
-
-#define GMP_LIMB_BITS (sizeof (mp_limb_t) * 8 / 2)
-
-#define ABS(x) ((x) >= 0 ? (x) : -(x))
-#define MIN(l,o) ((l) < (o) ? (l) : (o))
-#define MAX(h,i) ((h) > (i) ? (h) : (i))
-
-#define ALLOC(x) ((x)->_mp_alloc)
-#define PTR(x) ((x)->_mp_d)
-#define SIZ(x) ((x)->_mp_size)
-#define ABSIZ(x) ABS (SIZ (x))
-#define LOMASK ((1L << GMP_LIMB_BITS) - 1)
-#define LO(x) ((x) & LOMASK)
-#define HI(x) ((x) >> GMP_LIMB_BITS)
-
-#define ASSERT(cond) \
- do { \
- if (! (cond)) \
- { \
- fprintf (stderr, "Assertion failure\n"); \
- abort (); \
- } \
- } while (0)
-
-
-char *
-xmalloc (int n)
-{
- char *p;
- p = malloc (n);
- if (p == NULL)
- {
- fprintf (stderr, "Out of memory (alloc %d bytes)\n", n);
- abort ();
- }
- return p;
-}
-
-mp_limb_t *
-xmalloc_limbs (int n)
-{
- return (mp_limb_t *) xmalloc (n * sizeof (mp_limb_t));
-}
-
-void
-mem_copyi (char *dst, char *src, int size)
-{
- int i;
- for (i = 0; i < size; i++)
- dst[i] = src[i];
-}
-
-static int
-isprime (unsigned long int t)
-{
- unsigned long int q, r, d;
-
- if (t < 32)
- return (0xa08a28acUL >> t) & 1;
- if ((t & 1) == 0)
- return 0;
-
- if (t % 3 == 0)
- return 0;
- if (t % 5 == 0)
- return 0;
- if (t % 7 == 0)
- return 0;
-
- for (d = 11;;)
- {
- q = t / d;
- r = t - q * d;
- if (q < d)
- return 1;
- if (r == 0)
- break;
- d += 2;
- q = t / d;
- r = t - q * d;
- if (q < d)
- return 1;
- if (r == 0)
- break;
- d += 4;
- }
- return 0;
-}
-
-int
-log2_ceil (int n)
-{
- int e;
- ASSERT (n >= 1);
- for (e = 0; ; e++)
- if ((1 << e) >= n)
- break;
- return e;
-}
-
-void
-mpz_realloc (mpz_t r, int n)
-{
- if (n <= ALLOC(r))
- return;
-
- ALLOC(r) = n;
- PTR(r) = (mp_limb_t *) realloc (PTR(r), n * sizeof (mp_limb_t));
- if (PTR(r) == NULL)
- {
- fprintf (stderr, "Out of memory (realloc to %d)\n", n);
- abort ();
- }
-}
-
-void
-mpn_normalize (mp_limb_t *rp, int *rnp)
-{
- int rn = *rnp;
- while (rn > 0 && rp[rn-1] == 0)
- rn--;
- *rnp = rn;
-}
-
-void
-mpn_copyi (mp_limb_t *dst, mp_limb_t *src, int n)
-{
- int i;
- for (i = 0; i < n; i++)
- dst[i] = src[i];
-}
-
-void
-mpn_zero (mp_limb_t *rp, int rn)
-{
- int i;
- for (i = 0; i < rn; i++)
- rp[i] = 0;
-}
-
-void
-mpz_init (mpz_t r)
-{
- ALLOC(r) = 1;
- PTR(r) = xmalloc_limbs (ALLOC(r));
- PTR(r)[0] = 0;
- SIZ(r) = 0;
-}
-
-void
-mpz_clear (mpz_t r)
-{
- free (PTR (r));
- ALLOC(r) = -1;
- SIZ (r) = 0xbadcafeL;
- PTR (r) = (mp_limb_t *) 0xdeadbeefL;
-}
-
-int
-mpz_sgn (mpz_t a)
-{
- return (SIZ(a) > 0 ? 1 : SIZ(a) == 0 ? 0 : -1);
-}
-
-int
-mpz_odd_p (mpz_t a)
-{
- if (SIZ(a) == 0)
- return 0;
- else
- return (PTR(a)[0] & 1) != 0;
-}
-
-int
-mpz_even_p (mpz_t a)
-{
- if (SIZ(a) == 0)
- return 1;
- else
- return (PTR(a)[0] & 1) == 0;
-}
-
-size_t
-mpz_sizeinbase (mpz_t a, int base)
-{
- int an = ABSIZ (a);
- mp_limb_t *ap = PTR (a);
- int cnt;
- mp_limb_t hi;
-
- if (base != 2)
- abort ();
-
- if (an == 0)
- return 1;
-
- cnt = 0;
- for (hi = ap[an - 1]; hi != 0; hi >>= 1)
- cnt += 1;
- return (an - 1) * GMP_LIMB_BITS + cnt;
-}
-
-void
-mpz_set (mpz_t r, mpz_t a)
-{
- mpz_realloc (r, ABSIZ (a));
- SIZ(r) = SIZ(a);
- mpn_copyi (PTR(r), PTR(a), ABSIZ (a));
-}
-
-void
-mpz_init_set (mpz_t r, mpz_t a)
-{
- mpz_init (r);
- mpz_set (r, a);
-}
-
-void
-mpz_set_ui (mpz_t r, unsigned long ui)
-{
- int rn;
- mpz_realloc (r, 2);
- PTR(r)[0] = LO(ui);
- PTR(r)[1] = HI(ui);
- rn = 2;
- mpn_normalize (PTR(r), &rn);
- SIZ(r) = rn;
-}
-
-void
-mpz_init_set_ui (mpz_t r, unsigned long ui)
-{
- mpz_init (r);
- mpz_set_ui (r, ui);
-}
-
-void
-mpz_setbit (mpz_t r, unsigned long bit)
-{
- int limb, rn, extend;
- mp_limb_t *rp;
-
- rn = SIZ(r);
- if (rn < 0)
- abort (); /* only r>=0 */
-
- limb = bit / GMP_LIMB_BITS;
- bit %= GMP_LIMB_BITS;
-
- mpz_realloc (r, limb+1);
- rp = PTR(r);
- extend = (limb+1) - rn;
- if (extend > 0)
- mpn_zero (rp + rn, extend);
-
- rp[limb] |= (mp_limb_t) 1 << bit;
- SIZ(r) = MAX (rn, limb+1);
-}
-
-int
-mpz_tstbit (mpz_t r, unsigned long bit)
-{
- int limb;
-
- if (SIZ(r) < 0)
- abort (); /* only r>=0 */
-
- limb = bit / GMP_LIMB_BITS;
- if (SIZ(r) <= limb)
- return 0;
-
- bit %= GMP_LIMB_BITS;
- return (PTR(r)[limb] >> bit) & 1;
-}
-
-int
-popc_limb (mp_limb_t a)
-{
- int ret = 0;
- while (a != 0)
- {
- ret += (a & 1);
- a >>= 1;
- }
- return ret;
-}
-
-unsigned long
-mpz_popcount (mpz_t a)
-{
- unsigned long ret;
- int i;
-
- if (SIZ(a) < 0)
- abort ();
-
- ret = 0;
- for (i = 0; i < SIZ(a); i++)
- ret += popc_limb (PTR(a)[i]);
- return ret;
-}
-
-void
-mpz_add (mpz_t r, mpz_t a, mpz_t b)
-{
- int an = ABSIZ (a), bn = ABSIZ (b), rn;
- mp_limb_t *rp, *ap, *bp;
- int i;
- mp_limb_t t, cy;
-
- if ((SIZ (a) ^ SIZ (b)) < 0)
- abort (); /* really subtraction */
- if (SIZ (a) < 0)
- abort ();
-
- mpz_realloc (r, MAX (an, bn) + 1);
- ap = PTR (a); bp = PTR (b); rp = PTR (r);
- if (an < bn)
- {
- mp_limb_t *tp; int tn;
- tn = an; an = bn; bn = tn;
- tp = ap; ap = bp; bp = tp;
- }
-
- cy = 0;
- for (i = 0; i < bn; i++)
- {
- t = ap[i] + bp[i] + cy;
- rp[i] = LO (t);
- cy = HI (t);
- }
- for (i = bn; i < an; i++)
- {
- t = ap[i] + cy;
- rp[i] = LO (t);
- cy = HI (t);
- }
- rp[an] = cy;
- rn = an + 1;
-
- mpn_normalize (rp, &rn);
- SIZ (r) = rn;
-}
-
-void
-mpz_add_ui (mpz_t r, mpz_t a, unsigned long int ui)
-{
- mpz_t b;
-
- mpz_init (b);
- mpz_set_ui (b, ui);
- mpz_add (r, a, b);
- mpz_clear (b);
-}
-
-void
-mpz_sub (mpz_t r, mpz_t a, mpz_t b)
-{
- int an = ABSIZ (a), bn = ABSIZ (b), rn;
- mp_limb_t *rp, *ap, *bp;
- int i;
- mp_limb_t t, cy;
-
- if ((SIZ (a) ^ SIZ (b)) < 0)
- abort (); /* really addition */
- if (SIZ (a) < 0)
- abort ();
-
- mpz_realloc (r, MAX (an, bn) + 1);
- ap = PTR (a); bp = PTR (b); rp = PTR (r);
- if (an < bn)
- {
- mp_limb_t *tp; int tn;
- tn = an; an = bn; bn = tn;
- tp = ap; ap = bp; bp = tp;
- }
-
- cy = 0;
- for (i = 0; i < bn; i++)
- {
- t = ap[i] - bp[i] - cy;
- rp[i] = LO (t);
- cy = LO (-HI (t));
- }
- for (i = bn; i < an; i++)
- {
- t = ap[i] - cy;
- rp[i] = LO (t);
- cy = LO (-HI (t));
- }
- rp[an] = cy;
- rn = an + 1;
-
- if (cy != 0)
- {
- cy = 0;
- for (i = 0; i < rn; i++)
- {
- t = -rp[i] - cy;
- rp[i] = LO (t);
- cy = LO (-HI (t));
- }
- SIZ (r) = -rn;
- return;
- }
-
- mpn_normalize (rp, &rn);
- SIZ (r) = rn;
-}
-
-void
-mpz_sub_ui (mpz_t r, mpz_t a, unsigned long int ui)
-{
- mpz_t b;
-
- mpz_init (b);
- mpz_set_ui (b, ui);
- mpz_sub (r, a, b);
- mpz_clear (b);
-}
-
-void
-mpz_mul (mpz_t r, mpz_t a, mpz_t b)
-{
- int an = ABSIZ (a), bn = ABSIZ (b), rn;
- mp_limb_t *scratch, *tmp, *ap = PTR (a), *bp = PTR (b);
- int ai, bi;
- mp_limb_t t, cy;
-
- scratch = xmalloc_limbs (an + bn);
- tmp = scratch;
-
- for (bi = 0; bi < bn; bi++)
- tmp[bi] = 0;
-
- for (ai = 0; ai < an; ai++)
- {
- tmp = scratch + ai;
- cy = 0;
- for (bi = 0; bi < bn; bi++)
- {
- t = ap[ai] * bp[bi] + tmp[bi] + cy;
- tmp[bi] = LO (t);
- cy = HI (t);
- }
- tmp[bn] = cy;
- }
-
- rn = an + bn;
- mpn_normalize (scratch, &rn);
- free (PTR (r));
- PTR (r) = scratch;
- SIZ (r) = (SIZ (a) ^ SIZ (b)) >= 0 ? rn : -rn;
- ALLOC (r) = an + bn;
-}
-
-void
-mpz_mul_ui (mpz_t r, mpz_t a, unsigned long int ui)
-{
- mpz_t b;
-
- mpz_init (b);
- mpz_set_ui (b, ui);
- mpz_mul (r, a, b);
- mpz_clear (b);
-}
-
-void
-mpz_mul_2exp (mpz_t r, mpz_t a, unsigned long int bcnt)
-{
- mpz_set (r, a);
- while (bcnt)
- {
- mpz_add (r, r, r);
- bcnt -= 1;
- }
-}
-
-void
-mpz_ui_pow_ui (mpz_t r, unsigned long b, unsigned long e)
-{
- unsigned long i;
- mpz_t bz;
-
- mpz_init (bz);
- mpz_set_ui (bz, b);
-
- mpz_set_ui (r, 1L);
- for (i = 0; i < e; i++)
- mpz_mul (r, r, bz);
-
- mpz_clear (bz);
-}
-
-void
-mpz_tdiv_q_2exp (mpz_t r, mpz_t a, unsigned long int bcnt)
-{
- int as, rn;
- int cnt, tnc;
- int lcnt;
- mp_limb_t high_limb, low_limb;
- int i;
-
- as = SIZ (a);
- lcnt = bcnt / GMP_LIMB_BITS;
- rn = ABS (as) - lcnt;
- if (rn <= 0)
- SIZ (r) = 0;
- else
- {
- mp_limb_t *rp, *ap;
-
- mpz_realloc (r, rn);
-
- rp = PTR (r);
- ap = PTR (a);
-
- cnt = bcnt % GMP_LIMB_BITS;
- if (cnt != 0)
- {
- ap += lcnt;
- tnc = GMP_LIMB_BITS - cnt;
- high_limb = *ap++;
- low_limb = high_limb >> cnt;
-
- for (i = rn - 1; i != 0; i--)
- {
- high_limb = *ap++;
- *rp++ = low_limb | LO (high_limb << tnc);
- low_limb = high_limb >> cnt;
- }
- *rp = low_limb;
- rn -= low_limb == 0;
- }
- else
- {
- ap += lcnt;
- mpn_copyi (rp, ap, rn);
- }
-
- SIZ (r) = as >= 0 ? rn : -rn;
- }
-}
-
-void
-mpz_tdiv_r_2exp (mpz_t r, mpz_t a, unsigned long int bcnt)
-{
- int rn, bwhole;
-
- mpz_set (r, a);
- rn = ABSIZ(r);
-
- bwhole = bcnt / GMP_LIMB_BITS;
- bcnt %= GMP_LIMB_BITS;
- if (rn > bwhole)
- {
- rn = bwhole+1;
- PTR(r)[rn-1] &= ((mp_limb_t) 1 << bcnt) - 1;
- mpn_normalize (PTR(r), &rn);
- SIZ(r) = (SIZ(r) >= 0 ? rn : -rn);
- }
-}
-
-int
-mpz_cmp (mpz_t a, mpz_t b)
-{
- mp_limb_t *ap, *bp, al, bl;
- int as = SIZ (a), bs = SIZ (b);
- int i;
- int sign;
-
- if (as != bs)
- return as > bs ? 1 : -1;
-
- sign = as > 0 ? 1 : -1;
-
- ap = PTR (a);
- bp = PTR (b);
- for (i = ABS (as) - 1; i >= 0; i--)
- {
- al = ap[i];
- bl = bp[i];
- if (al != bl)
- return al > bl ? sign : -sign;
- }
- return 0;
-}
-
-int
-mpz_cmp_ui (mpz_t a, unsigned long b)
-{
- mpz_t bz;
- int ret;
- mpz_init_set_ui (bz, b);
- ret = mpz_cmp (a, bz);
- mpz_clear (bz);
- return ret;
-}
-
-void
-mpz_tdiv_qr (mpz_t q, mpz_t r, mpz_t a, mpz_t b)
-{
- mpz_t tmpr, tmpb;
- unsigned long cnt;
-
- ASSERT (mpz_sgn(a) >= 0);
- ASSERT (mpz_sgn(b) > 0);
-
- mpz_init_set (tmpr, a);
- mpz_init_set (tmpb, b);
- mpz_set_ui (q, 0L);
-
- if (mpz_cmp (tmpr, tmpb) > 0)
- {
- cnt = mpz_sizeinbase (tmpr, 2) - mpz_sizeinbase (tmpb, 2) + 1;
- mpz_mul_2exp (tmpb, tmpb, cnt);
-
- for ( ; cnt > 0; cnt--)
- {
- mpz_mul_2exp (q, q, 1);
- mpz_tdiv_q_2exp (tmpb, tmpb, 1L);
- if (mpz_cmp (tmpr, tmpb) >= 0)
- {
- mpz_sub (tmpr, tmpr, tmpb);
- mpz_add_ui (q, q, 1L);
- ASSERT (mpz_cmp (tmpr, tmpb) < 0);
- }
- }
- }
-
- mpz_set (r, tmpr);
- mpz_clear (tmpr);
- mpz_clear (tmpb);
-}
-
-void
-mpz_tdiv_qr_ui (mpz_t q, mpz_t r, mpz_t a, unsigned long b)
-{
- mpz_t bz;
- mpz_init_set_ui (bz, b);
- mpz_tdiv_qr (q, r, a, bz);
- mpz_clear (bz);
-}
-
-void
-mpz_tdiv_q (mpz_t q, mpz_t a, mpz_t b)
-{
- mpz_t r;
-
- mpz_init (r);
- mpz_tdiv_qr (q, r, a, b);
- mpz_clear (r);
-}
-
-void
-mpz_tdiv_r (mpz_t r, mpz_t a, mpz_t b)
-{
- mpz_t q;
-
- mpz_init (q);
- mpz_tdiv_qr (q, r, a, b);
- mpz_clear (q);
-}
-
-void
-mpz_tdiv_q_ui (mpz_t q, mpz_t n, unsigned long d)
-{
- mpz_t dz;
- mpz_init_set_ui (dz, d);
- mpz_tdiv_q (q, n, dz);
- mpz_clear (dz);
-}
-
-/* Set inv to the inverse of d, in the style of invert_limb, ie. for
- udiv_qrnnd_preinv. */
-void
-mpz_preinv_invert (mpz_t inv, mpz_t d, int numb_bits)
-{
- mpz_t t;
- int norm;
- ASSERT (SIZ(d) > 0);
-
- norm = numb_bits - mpz_sizeinbase (d, 2);
- ASSERT (norm >= 0);
- mpz_init_set_ui (t, 1L);
- mpz_mul_2exp (t, t, 2*numb_bits - norm);
- mpz_tdiv_q (inv, t, d);
- mpz_set_ui (t, 1L);
- mpz_mul_2exp (t, t, numb_bits);
- mpz_sub (inv, inv, t);
-
- mpz_clear (t);
-}
-
-/* Remove leading '0' characters from the start of a string, by copying the
- remainder down. */
-void
-strstrip_leading_zeros (char *s)
-{
- char c, *p;
-
- p = s;
- while (*s == '0')
- s++;
-
- do
- {
- c = *s++;
- *p++ = c;
- }
- while (c != '\0');
-}
-
-char *
-mpz_get_str (char *buf, int base, mpz_t a)
-{
- static char tohex[] = "0123456789abcdef";
-
- mp_limb_t alimb, *ap;
- int an, bn, i, j;
- char *bp;
-
- if (base != 16)
- abort ();
- if (SIZ (a) < 0)
- abort ();
-
- if (buf == 0)
- buf = xmalloc (ABSIZ (a) * (GMP_LIMB_BITS / 4) + 3);
-
- an = ABSIZ (a);
- if (an == 0)
- {
- buf[0] = '0';
- buf[1] = '\0';
- return buf;
- }
-
- ap = PTR (a);
- bn = an * (GMP_LIMB_BITS / 4);
- bp = buf + bn;
-
- for (i = 0; i < an; i++)
- {
- alimb = ap[i];
- for (j = 0; j < GMP_LIMB_BITS / 4; j++)
- {
- bp--;
- *bp = tohex [alimb & 0xF];
- alimb >>= 4;
- }
- ASSERT (alimb == 0);
- }
- ASSERT (bp == buf);
-
- buf[bn] = '\0';
-
- strstrip_leading_zeros (buf);
- return buf;
-}
-
-void
-mpz_out_str (FILE *file, int base, mpz_t a)
-{
- char *str;
-
- if (file == 0)
- file = stdout;
-
- str = mpz_get_str (0, 16, a);
- fputs (str, file);
- free (str);
-}
-
-/* Calculate r satisfying r*d == 1 mod 2^n. */
-void
-mpz_invert_2exp (mpz_t r, mpz_t a, unsigned long n)
-{
- unsigned long i;
- mpz_t inv, prod;
-
- ASSERT (mpz_odd_p (a));
-
- mpz_init_set_ui (inv, 1L);
- mpz_init (prod);
-
- for (i = 1; i < n; i++)
- {
- mpz_mul (prod, inv, a);
- if (mpz_tstbit (prod, i) != 0)
- mpz_setbit (inv, i);
- }
-
- mpz_mul (prod, inv, a);
- mpz_tdiv_r_2exp (prod, prod, n);
- ASSERT (mpz_cmp_ui (prod, 1L) == 0);
-
- mpz_set (r, inv);
-
- mpz_clear (inv);
- mpz_clear (prod);
-}
-
-/* Calculate inv satisfying r*a == 1 mod 2^n. */
-void
-mpz_invert_ui_2exp (mpz_t r, unsigned long a, unsigned long n)
-{
- mpz_t az;
- mpz_init_set_ui (az, a);
- mpz_invert_2exp (r, az, n);
- mpz_clear (az);
-}
-
-/* x=y^z */
-void
-mpz_pow_ui (mpz_t x, mpz_t y, unsigned long z)
-{
- mpz_t t;
-
- mpz_init_set_ui (t, 1);
- for (; z != 0; z--)
- mpz_mul (t, t, y);
- mpz_set (x, t);
- mpz_clear (t);
-}
-
-/* x=x+y*z */
-void
-mpz_addmul_ui (mpz_t x, mpz_t y, unsigned long z)
-{
- mpz_t t;
-
- mpz_init (t);
- mpz_mul_ui (t, y, z);
- mpz_add (x, x, t);
- mpz_clear (t);
-}
-
-/* x=floor(y^(1/z)) */
-void
-mpz_root (mpz_t x, mpz_t y, unsigned long z)
-{
- mpz_t t, u;
-
- if (mpz_sgn (y) < 0)
- {
- fprintf (stderr, "mpz_root does not accept negative values\n");
- abort ();
- }
- if (mpz_cmp_ui (y, 1) <= 0)
- {
- mpz_set (x, y);
- return;
- }
- mpz_init (t);
- mpz_init_set (u, y);
- do
- {
- mpz_pow_ui (t, u, z - 1);
- mpz_tdiv_q (t, y, t);
- mpz_addmul_ui (t, u, z - 1);
- mpz_tdiv_q_ui (t, t, z);
- if (mpz_cmp (t, u) >= 0)
- break;
- mpz_set (u, t);
- }
- while (1);
- mpz_set (x, u);
- mpz_clear (t);
- mpz_clear (u);
-}
/* __gmp_extract_double -- convert from double to array of mp_limb_t.
-Copyright 1996, 1999, 2000, 2001, 2002, 2006 Free Software Foundation, Inc.
+Copyright 1996, 1999, 2000, 2001, 2002, 2006, 2012 Free Software Foundation,
+Inc.
This file is part of the GNU MP Library.
#define _GMP_IEEE_FLOATS 0
#endif
-#define BITS_IN_MANTISSA 53
-
/* Extract a non-negative double in d. */
int
/* Generate mp_bases data.
-Copyright 1991, 1993, 1994, 1996, 2000, 2002, 2004 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1996, 2000, 2002, 2004, 2011, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-#include <math.h>
-
-#include "dumbmp.c"
+#include "bootstrap.c"
int chars_per_limb;
-double chars_per_bit_exactly;
mpz_t big_base;
int normalization_steps;
mpz_t big_base_inverted;
chars_per_limb++;
}
- chars_per_bit_exactly = 0.69314718055994530942 / log ((double) base);
-
mpz_ui_pow_ui (big_base, (long) base, (long) chars_per_limb);
normalization_steps = limb_bits - mpz_sizeinbase (big_base, 2);
printf ("#define MP_BASES_NORMALIZATION_STEPS_10 %d\n", normalization_steps);
}
+
+#define EXTRA 16
+
+/* Compute log(2)/log(b) as a fixnum. */
+void
+mp_2logb (mpz_t r, int bi, int prec)
+{
+ mpz_t t, t2, two, b;
+ int i;
+
+ mpz_init_set_ui (t, 1);
+ mpz_mul_2exp (t, t, prec+EXTRA);
+
+ mpz_init (t2);
+
+ mpz_init_set_ui (two, 2);
+ mpz_mul_2exp (two, two, prec+EXTRA);
+
+ mpz_set_ui (r, 0);
+
+ mpz_init_set_ui (b, bi);
+ mpz_mul_2exp (b, b, prec+EXTRA);
+
+ for (i = prec-1; i >= 0; i--)
+ {
+ mpz_mul_2exp (b, b, prec+EXTRA);
+ mpz_sqrt (b, b);
+
+ mpz_mul (t2, t, b);
+ mpz_tdiv_q_2exp (t2, t2, prec+EXTRA);
+
+ if (mpz_cmp (t2, two) < 0) /* not too large? */
+ {
+ mpz_setbit (r, i); /* set next less significant bit */
+ mpz_set (t, t2); /* new value acceptable */
+ }
+ }
+
+ mpz_clear (t);
+ mpz_clear (t2);
+ mpz_clear (two);
+ mpz_clear (b);
+}
+
void
table (int limb_bits, int nail_bits)
{
int numb_bits = limb_bits - nail_bits;
int base;
+ mpz_t r, t, logb2, log2b;
+
+ mpz_init (r);
+ mpz_init (t);
+ mpz_init (logb2);
+ mpz_init (log2b);
printf ("/* This file generated by gen-bases.c - DO NOT EDIT. */\n");
printf ("\n");
printf ("#endif\n");
printf ("\n");
puts ("const struct bases mp_bases[257] =\n{");
- puts (" /* 0 */ { 0, 0.0, 0 },");
- puts (" /* 1 */ { 0, 1e37, 0 },");
+ puts (" /* 0 */ { 0, 0, 0, 0, 0 },");
+ puts (" /* 1 */ { 0, 0, 0, 0, 0 },");
for (base = 2; base <= 256; base++)
{
generate (limb_bits, nail_bits, base);
+ mp_2logb (r, base, limb_bits + 8);
+ mpz_tdiv_q_2exp (logb2, r, 8);
+ mpz_set_ui (t, 1);
+ mpz_mul_2exp (t, t, 2*limb_bits + 5);
+ mpz_sub_ui (t, t, 1);
+ mpz_add_ui (r, r, 1);
+ mpz_tdiv_q (log2b, t, r);
printf (" /* %3u */ { ", base);
if (POW2_P (base))
{
- printf ("%u, %.16f, 0x%x },\n",
- chars_per_limb, chars_per_bit_exactly, ulog2 (base) - 1);
- }
- else
- {
- printf ("%u, %.16f, CNST_LIMB(0x",
- chars_per_limb, chars_per_bit_exactly);
- mpz_out_str (stdout, 16, big_base);
- printf ("), CNST_LIMB(0x");
- mpz_out_str (stdout, 16, big_base_inverted);
- printf (") },\n");
+ mpz_set_ui (big_base, ulog2 (base) - 1);
+ mpz_set_ui (big_base_inverted, 0);
}
+
+ printf ("%u,", chars_per_limb);
+ printf (" CNST_LIMB(0x");
+ mpz_out_str (stdout, 16, logb2);
+ printf ("), CNST_LIMB(0x");
+ mpz_out_str (stdout, 16, log2b);
+ printf ("), CNST_LIMB(0x");
+ mpz_out_str (stdout, 16, big_base);
+ printf ("), CNST_LIMB(0x");
+ mpz_out_str (stdout, 16, big_base_inverted);
+ printf (") },\n");
}
puts ("};");
+
+ mpz_clear (r);
+ mpz_clear (t);
+ mpz_clear (logb2);
+ mpz_clear (log2b);
+
}
int
--- /dev/null
+/* Generate data for combinatorics: fac_ui, bin_uiui, ...
+
+Copyright 2002, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "bootstrap.c"
+
+int
+mpz_remove_twos (mpz_t x)
+{
+ int r = 0;
+ for (;mpz_even_p (x);r++)
+ mpz_tdiv_q_2exp (x, x, 1);
+ return r;
+}
+
+/* returns 0 on success */
+int
+gen_consts (int numb, int nail, int limb)
+{
+ mpz_t x, mask, y, last;
+ unsigned long a, b;
+ unsigned long ofl, ofe;
+
+ printf ("/* This file is automatically generated by gen-fac.c */\n\n");
+ printf ("#if GMP_NUMB_BITS != %d\n", numb);
+ printf ("Error , error this data is for %d GMP_NUMB_BITS only\n", numb);
+ printf ("#endif\n");
+#if 0
+ printf ("#if GMP_LIMB_BITS != %d\n", limb);
+ printf ("Error , error this data is for %d GMP_LIMB_BITS only\n", limb);
+ printf ("#endif\n");
+#endif
+
+ printf
+ ("/* This table is 0!,1!,2!,3!,...,n! where n! has <= GMP_NUMB_BITS bits */\n");
+ printf
+ ("#define ONE_LIMB_FACTORIAL_TABLE CNST_LIMB(0x1),CNST_LIMB(0x1");
+ mpz_init_set_ui (x, 1);
+ mpz_init (last);
+ for (b = 2;; b++)
+ {
+ mpz_mul_ui (x, x, b); /* so b!=a */
+ if (mpz_sizeinbase (x, 2) > numb)
+ break;
+ printf ("),CNST_LIMB(0x");
+ mpz_out_str (stdout, 16, x);
+ }
+ printf (")\n");
+
+ printf
+ ("\n/* This table is 0!,1!,2!/2,3!/2,...,n!/2^sn where n!/2^sn is an */\n");
+ printf
+ ("/* odd integer for each n, and n!/2^sn has <= GMP_NUMB_BITS bits */\n");
+ printf
+ ("#define ONE_LIMB_ODD_FACTORIAL_TABLE CNST_LIMB(0x1),CNST_LIMB(0x1),CNST_LIMB(0x1");
+ mpz_set_ui (x, 1);
+ for (b = 3;; b++)
+ {
+ for (a = b; (a & 1) == 0; a >>= 1);
+ mpz_set (last, x);
+ mpz_mul_ui (x, x, a);
+ if (mpz_sizeinbase (x, 2) > numb)
+ break;
+ printf ("),CNST_LIMB(0x");
+ mpz_out_str (stdout, 16, x);
+ }
+ printf (")\n");
+ printf
+ ("#define ODD_FACTORIAL_TABLE_MAX CNST_LIMB(0x");
+ mpz_out_str (stdout, 16, last);
+ printf (")\n");
+
+ ofl = b - 1;
+ printf
+ ("#define ODD_FACTORIAL_TABLE_LIMIT (%lu)\n", ofl);
+ mpz_init (mask);
+ mpz_setbit (mask, numb);
+ mpz_sub_ui (mask, mask, 1);
+ printf
+ ("\n/* Previous table, continued, values modulo 2^GMP_NUMB_BITS */\n");
+ printf
+ ("#define ONE_LIMB_ODD_FACTORIAL_EXTTABLE CNST_LIMB(0x");
+ mpz_and (x, x, mask);
+ mpz_out_str (stdout, 16, x);
+ mpz_init (y);
+ mpz_bin_uiui (y, b, b/2);
+ b++;
+ for (;; b++)
+ {
+ for (a = b; (a & 1) == 0; a >>= 1);
+ if (a == b) {
+ mpz_divexact_ui (y, y, a/2+1);
+ mpz_mul_ui (y, y, a);
+ } else
+ mpz_mul_2exp (y, y, 1);
+ if (mpz_sizeinbase (y, 2) > numb)
+ break;
+ mpz_mul_ui (x, x, a);
+ mpz_and (x, x, mask);
+ printf ("),CNST_LIMB(0x");
+ mpz_out_str (stdout, 16, x);
+ }
+ printf (")\n");
+ ofe = b - 1;
+ printf
+ ("#define ODD_FACTORIAL_EXTTABLE_LIMIT (%lu)\n", ofe);
+
+ printf
+ ("\n/* This table is 1!!,3!!,...,(2n+1)!! where (2n+1)!! has <= GMP_NUMB_BITS bits */\n");
+ printf
+ ("#define ONE_LIMB_ODD_DOUBLEFACTORIAL_TABLE CNST_LIMB(0x1");
+ mpz_set_ui (x, 1);
+ for (b = 3;; b+=2)
+ {
+ mpz_set (last, x);
+ mpz_mul_ui (x, x, b);
+ if (mpz_sizeinbase (x, 2) > numb)
+ break;
+ printf ("),CNST_LIMB(0x");
+ mpz_out_str (stdout, 16, x);
+ }
+ printf (")\n");
+ printf
+ ("#define ODD_DOUBLEFACTORIAL_TABLE_MAX CNST_LIMB(0x");
+ mpz_out_str (stdout, 16, last);
+ printf (")\n");
+
+ printf
+ ("#define ODD_DOUBLEFACTORIAL_TABLE_LIMIT (%lu)\n", b - 2);
+
+ printf
+ ("\n/* This table x_1, x_2,... contains values s.t. x_n^n has <= GMP_NUMB_BITS bits */\n");
+ printf
+ ("#define NTH_ROOT_NUMB_MASK_TABLE (GMP_NUMB_MASK");
+ for (b = 2;b <= 8; b++)
+ {
+ mpz_root (x, mask, b);
+ printf ("),CNST_LIMB(0x");
+ mpz_out_str (stdout, 16, x);
+ }
+ printf (")\n");
+
+ mpz_add_ui (mask, mask, 1);
+ printf
+ ("\n/* This table contains inverses of odd factorials, modulo 2^GMP_NUMB_BITS */\n");
+ printf
+ ("\n/* It begins with (2!/2)^-1=1 */\n");
+ printf
+ ("#define ONE_LIMB_ODD_FACTORIAL_INVERSES_TABLE CNST_LIMB(0x1");
+ mpz_set_ui (x, 1);
+ for (b = 3;b <= ofe - 2; b++)
+ {
+ for (a = b; (a & 1) == 0; a >>= 1);
+ mpz_mul_ui (x, x, a);
+ mpz_invert (y, x, mask);
+ printf ("),CNST_LIMB(0x");
+ mpz_out_str (stdout, 16, y);
+ }
+ printf (")\n");
+
+ ofe = (ofe / 16 + 1) * 16;
+
+ printf
+ ("\n/* This table contains 2n-popc(2n) for small n */\n");
+ printf
+ ("\n/* It begins with 2-1=1 (n=1) */\n");
+ printf
+ ("#define TABLE_2N_MINUS_POPC_2N 1");
+ for (b = 4; b <= ofe; b += 2)
+ {
+ mpz_set_ui (x, b);
+ printf (",%lu",b - mpz_popcount (x));
+ }
+ printf ("\n");
+ printf
+ ("#define TABLE_LIMIT_2N_MINUS_POPC_2N %lu\n", ofe + 1);
+
+
+ ofl = (ofl + 1) / 2;
+ printf
+ ("#define ODD_CENTRAL_BINOMIAL_OFFSET (%lu)\n", ofl);
+ printf
+ ("\n/* This table contains binomial(2k,k)/2^t */\n");
+ printf
+ ("\n/* It begins with ODD_CENTRAL_BINOMIAL_TABLE_MIN */\n");
+ printf
+ ("#define ONE_LIMB_ODD_CENTRAL_BINOMIAL_TABLE ");
+ for (b = ofl;; b++)
+ {
+ mpz_bin_uiui (x, 2 * b, b);
+ mpz_remove_twos (x);
+ if (mpz_sizeinbase (x, 2) > numb)
+ break;
+ if (b != ofl)
+ printf ("),");
+ printf("CNST_LIMB(0x");
+ mpz_out_str (stdout, 16, x);
+ }
+ printf (")\n");
+
+ ofe = b - 1;
+ printf
+ ("#define ODD_CENTRAL_BINOMIAL_TABLE_LIMIT (%lu)\n", ofe);
+
+ printf
+ ("\n/* This table contains the inverses of elements in the previous table. */\n");
+ printf
+ ("#define ONE_LIMB_ODD_CENTRAL_BINOMIAL_INVERSE_TABLE CNST_LIMB(0x");
+ for (b = ofl; b <= ofe; b++)
+ {
+ mpz_bin_uiui (x, 2 * b, b);
+ mpz_remove_twos (x);
+ mpz_invert (x, x, mask);
+ mpz_out_str (stdout, 16, x);
+ if (b != ofe)
+ printf ("),CNST_LIMB(0x");
+ }
+ printf (")\n");
+
+ printf
+ ("\n/* This table contains the values t in the formula binomial(2k,k)/2^t */\n");
+ printf
+ ("#define CENTRAL_BINOMIAL_2FAC_TABLE ");
+ for (b = ofl; b <= ofe; b++)
+ {
+ mpz_bin_uiui (x, 2 * b, b);
+ printf ("%d", mpz_remove_twos (x));
+ if (b != ofe)
+ printf (",");
+ }
+ printf ("\n");
+
+#if 0
+ mpz_set_ui (x, 1);
+ mpz_mul_2exp (x, x, limb + 1); /* x=2^(limb+1) */
+ mpz_init (y);
+ mpz_set_ui (y, 10000);
+ mpz_mul (x, x, y); /* x=2^(limb+1)*10^4 */
+ mpz_set_ui (y, 27182); /* exp(1)*10^4 */
+ mpz_tdiv_q (x, x, y); /* x=2^(limb+1)/exp(1) */
+ printf ("\n/* is 2^(GMP_LIMB_BITS+1)/exp(1) */\n");
+ printf ("#define FAC2OVERE CNST_LIMB(0x");
+ mpz_out_str (stdout, 16, x);
+ printf (")\n");
+
+
+ printf
+ ("\n/* FACMULn is largest odd x such that x*(x+2)*...*(x+2(n-1))<=2^GMP_NUMB_BITS-1 */\n\n");
+ mpz_init (z);
+ mpz_init (t);
+ for (a = 2; a <= 4; a++)
+ {
+ mpz_set_ui (x, 1);
+ mpz_mul_2exp (x, x, numb);
+ mpz_root (x, x, a);
+ /* so x is approx sol */
+ if (mpz_even_p (x))
+ mpz_sub_ui (x, x, 1);
+ mpz_set_ui (y, 1);
+ mpz_mul_2exp (y, y, numb);
+ mpz_sub_ui (y, y, 1);
+ /* decrement x until we are <= real sol */
+ do
+ {
+ mpz_sub_ui (x, x, 2);
+ odd_products (t, x, a);
+ if (mpz_cmp (t, y) <= 0)
+ break;
+ }
+ while (1);
+ /* increment x until > real sol */
+ do
+ {
+ mpz_add_ui (x, x, 2);
+ odd_products (t, x, a);
+ if (mpz_cmp (t, y) > 0)
+ break;
+ }
+ while (1);
+ /* dec once to get real sol */
+ mpz_sub_ui (x, x, 2);
+ printf ("#define FACMUL%lu CNST_LIMB(0x", a);
+ mpz_out_str (stdout, 16, x);
+ printf (")\n");
+ }
+#endif
+
+ return 0;
+}
+
+int
+main (int argc, char *argv[])
+{
+ int nail_bits, limb_bits, numb_bits;
+
+ if (argc != 3)
+ {
+ fprintf (stderr, "Usage: gen-fac_ui limbbits nailbits\n");
+ exit (1);
+ }
+ limb_bits = atoi (argv[1]);
+ nail_bits = atoi (argv[2]);
+ numb_bits = limb_bits - nail_bits;
+ if (limb_bits < 2 || nail_bits < 0 || numb_bits < 1)
+ {
+ fprintf (stderr, "Invalid limb/nail bits %d,%d\n", limb_bits,
+ nail_bits);
+ exit (1);
+ }
+ gen_consts (numb_bits, nail_bits, limb_bits);
+ return 0;
+}
+++ /dev/null
-/* Generate mpz_fac_ui data.
-
-Copyright 2002 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "dumbmp.c"
-
-
-/* sets x=y*(y+2)*(y+4)*....*(y+2*(z-1)) */
-void
-odd_products (mpz_t x, mpz_t y, int z)
-{
- mpz_t t;
-
- mpz_init_set (t, y);
- mpz_set_ui (x, 1);
- for (; z != 0; z--)
- {
- mpz_mul (x, x, t);
- mpz_add_ui (t, t, 2);
- }
- mpz_clear (t);
- return;
-}
-
-/* returns 0 on success */
-int
-gen_consts (int numb, int nail, int limb)
-{
- mpz_t x, y, z, t;
- unsigned long a, b, first = 1;
-
- printf ("/* This file is automatically generated by gen-fac_ui.c */\n\n");
- printf ("#if GMP_NUMB_BITS != %d\n", numb);
- printf ("Error , error this data is for %d GMP_NUMB_BITS only\n", numb);
- printf ("#endif\n");
- printf ("#if GMP_LIMB_BITS != %d\n", limb);
- printf ("Error , error this data is for %d GMP_LIMB_BITS only\n", limb);
- printf ("#endif\n");
-
- printf
- ("/* This table is 0!,1!,2!,3!,...,n! where n! has <= GMP_NUMB_BITS bits */\n");
- printf
- ("#define ONE_LIMB_FACTORIAL_TABLE CNST_LIMB(0x1),CNST_LIMB(0x1),CNST_LIMB(0x2),");
- mpz_init_set_ui (x, 2);
- for (b = 3;; b++)
- {
- mpz_mul_ui (x, x, b); /* so b!=a */
- if (mpz_sizeinbase (x, 2) > numb)
- break;
- if (first)
- {
- first = 0;
- }
- else
- {
- printf ("),");
- }
- printf ("CNST_LIMB(0x");
- mpz_out_str (stdout, 16, x);
- }
- printf (")\n");
-
-
- mpz_set_ui (x, 1);
- mpz_mul_2exp (x, x, limb + 1); /* x=2^(limb+1) */
- mpz_init (y);
- mpz_set_ui (y, 10000);
- mpz_mul (x, x, y); /* x=2^(limb+1)*10^4 */
- mpz_set_ui (y, 27182); /* exp(1)*10^4 */
- mpz_tdiv_q (x, x, y); /* x=2^(limb+1)/exp(1) */
- printf ("\n/* is 2^(GMP_LIMB_BITS+1)/exp(1) */\n");
- printf ("#define FAC2OVERE CNST_LIMB(0x");
- mpz_out_str (stdout, 16, x);
- printf (")\n");
-
-
- printf
- ("\n/* FACMULn is largest odd x such that x*(x+2)*...*(x+2(n-1))<=2^GMP_NUMB_BITS-1 */\n\n");
- mpz_init (z);
- mpz_init (t);
- for (a = 2; a <= 4; a++)
- {
- mpz_set_ui (x, 1);
- mpz_mul_2exp (x, x, numb);
- mpz_root (x, x, a);
- /* so x is approx sol */
- if (mpz_even_p (x))
- mpz_sub_ui (x, x, 1);
- mpz_set_ui (y, 1);
- mpz_mul_2exp (y, y, numb);
- mpz_sub_ui (y, y, 1);
- /* decrement x until we are <= real sol */
- do
- {
- mpz_sub_ui (x, x, 2);
- odd_products (t, x, a);
- if (mpz_cmp (t, y) <= 0)
- break;
- }
- while (1);
- /* increment x until > real sol */
- do
- {
- mpz_add_ui (x, x, 2);
- odd_products (t, x, a);
- if (mpz_cmp (t, y) > 0)
- break;
- }
- while (1);
- /* dec once to get real sol */
- mpz_sub_ui (x, x, 2);
- printf ("#define FACMUL%lu CNST_LIMB(0x", a);
- mpz_out_str (stdout, 16, x);
- printf (")\n");
- }
-
- return 0;
-}
-
-int
-main (int argc, char *argv[])
-{
- int nail_bits, limb_bits, numb_bits;
-
- if (argc != 3)
- {
- fprintf (stderr, "Usage: gen-fac_ui limbbits nailbits\n");
- exit (1);
- }
- limb_bits = atoi (argv[1]);
- nail_bits = atoi (argv[2]);
- numb_bits = limb_bits - nail_bits;
- if (limb_bits < 0 || nail_bits < 0 || numb_bits < 0)
- {
- fprintf (stderr, "Invalid limb/nail bits %d,%d\n", limb_bits,
- nail_bits);
- exit (1);
- }
- gen_consts (numb_bits, nail_bits, limb_bits);
- return 0;
-}
/* Generate Fibonacci table data.
-Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2004, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
-#include "dumbmp.c"
+#include "bootstrap.c"
mpz_t *f;
int fnum, fib_limit, luc_limit;
/* fib(2n) > 2^n, so use 2n as a limit for the table size */
falloc = 2 * numb_bits;
- f = (mpz_t *) xmalloc (falloc * sizeof (*f));
+ f = xmalloc (falloc * sizeof (*f));
mpz_init_set_ui (f[0], 1L); /* F[-1] */
mpz_init_set_ui (f[1], 0L); /* F[0] */
for (i = 2; ; i++)
{
- ASSERT (i < falloc);
+ assert (i < falloc);
/* F[i] = F[i-1] + F[i-2] */
mpz_init (f[i]);
--- /dev/null
+/* gen-jacobi.c
+
+ Contributed to the GNU project by Niels Möller.
+
+Copyright 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+/* Generate the lookup table needed for fast left-to-right computation
+ of the Jacobi symbol. */
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+static const struct
+{
+ unsigned char a;
+ unsigned char b;
+} decode_table[13] = {
+ /* 0 */ { 0, 1 },
+ /* 1 */ { 0, 3 },
+ /* 2 */ { 1, 1 },
+ /* 3 */ { 1, 3 },
+ /* 4 */ { 2, 1 },
+ /* 5 */ { 2, 3 },
+ /* 6 */ { 3, 1 },
+ /* 7 */ { 3, 3 }, /* d = 1 */
+ /* 8 */ { 1, 0 },
+ /* 9 */ { 1, 2 },
+ /* 10 */ { 3, 0 },
+ /* 11 */ { 3, 2 },
+ /* 12 */ { 3, 3 }, /* d = 0 */
+
+};
+#define JACOBI_A(bits) (decode_table[(bits)>>1].a)
+#define JACOBI_B(bits) (decode_table[(bits)>>1].b)
+
+#define JACOBI_E(bits) ((bits) & 1)
+#define JACOBI_D(bits) (((bits)>>1) == 7) /* Gives 0 for don't care states. */
+
+static unsigned
+encode (unsigned a, unsigned b, unsigned d)
+{
+ unsigned i;
+
+ assert (d < 2);
+ assert (a < 4);
+ assert (b < 4);
+ assert ( (a | b ) & 1);
+
+ if (a == 3 && b == 3)
+ return d ? 7 : 12;
+
+ for (i = 0; i < 12; i++)
+ if (decode_table[i].a == a
+ && decode_table[i].b == b)
+ return i;
+
+ abort ();
+}
+
+int
+main (int argc, char **argv)
+{
+ unsigned bits;
+
+ for (bits = 0; bits < 208; bits++)
+ {
+ unsigned e, a, b, d_old, d, q;
+
+ if (bits && !(bits & 0xf))
+ printf("\n");
+
+ q = bits & 3;
+ d = (bits >> 2) & 1;
+
+ e = JACOBI_E (bits >> 3);
+ a = JACOBI_A (bits >> 3);
+ b = JACOBI_B (bits >> 3);
+ d_old = JACOBI_D (bits >> 3);
+
+ if (d != d_old && a == 3 && b == 3)
+ e ^= 1;
+
+ if (d == 1)
+ {
+ if (b == 2)
+ e ^= (q & (a >> 1)) ^ (q >> 1);
+ a = (a - q * b) & 3;
+ }
+ else
+ {
+ if (a == 2)
+ e ^= (q & (b >> 1)) ^ (q >> 1);
+ b = (b - q * a) & 3;
+ }
+
+ printf("%2d,", (encode (a, b, d) << 1) | e);
+ }
+ printf("\n");
+
+ return 0;
+}
/* Generate perfect square testing data.
-Copyright 2002, 2003, 2004 Free Software Foundation, Inc.
+Copyright 2002, 2003, 2004, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#include <stdio.h>
#include <stdlib.h>
-#include "dumbmp.c"
+#include "bootstrap.c"
/* The aim of this program is to choose either mpn_mod_34lsub1 or mpn_mod_1
accordingly. */
#define COLLAPSE_ELEMENT(array, idx, narray) \
do { \
- mem_copyi ((char *) &(array)[idx], \
- (char *) &(array)[idx+1], \
- ((narray)-((idx)+1)) * sizeof (array[0])); \
+ memmove (&(array)[idx], \
+ &(array)[idx+1], \
+ ((narray)-((idx)+1)) * sizeof (array[0])); \
(narray)--; \
} while (0)
int
neg_mod (int n, int m)
{
- ASSERT (n >= 0 && n < m);
+ assert (n >= 0 && n < m);
return (n == 0 ? 0 : m-n);
}
int i, res;
nsq_res_0x100 = (0x100 + limb_bits - 1) / limb_bits;
- sq_res_0x100 = (mpz_t *) xmalloc (nsq_res_0x100 * sizeof (*sq_res_0x100));
+ sq_res_0x100 = xmalloc (nsq_res_0x100 * sizeof (*sq_res_0x100));
for (i = 0; i < nsq_res_0x100; i++)
mpz_init_set_ui (sq_res_0x100[i], 0L);
/* no more than limb_bits many factors in a one limb modulus (and of
course in reality nothing like that many) */
factor_alloc = limb_bits;
- factor = (struct factor_t *) xmalloc (factor_alloc * sizeof (*factor));
- rawfactor = (struct rawfactor_t *)
- xmalloc (factor_alloc * sizeof (*rawfactor));
+ factor = xmalloc (factor_alloc * sizeof (*factor));
+ rawfactor = xmalloc (factor_alloc * sizeof (*rawfactor));
if (numb_bits % 4 != 0)
{
}
while (mpz_sgn (r) == 0);
- ASSERT (nrawfactor < factor_alloc);
+ assert (nrawfactor < factor_alloc);
rawfactor[nrawfactor].divisor = i;
rawfactor[nrawfactor].multiplicity = multiplicity;
nrawfactor++;
break;
mpz_set (pp, new_pp);
- ASSERT (nrawfactor < factor_alloc);
+ assert (nrawfactor < factor_alloc);
rawfactor[nrawfactor].divisor = i;
rawfactor[nrawfactor].multiplicity = 1;
nrawfactor++;
for (i = 0; i < nrawfactor; i++)
{
int j;
- ASSERT (nfactor < factor_alloc);
+ assert (nfactor < factor_alloc);
factor[nfactor].divisor = 1;
for (j = 0; j < rawfactor[i].multiplicity; j++)
factor[nfactor].divisor *= rawfactor[i].divisor;
Contributed to the GNU project by Torbjorn Granlund.
-Copyright 2009 Free Software Foundation, Inc.
+Copyright 2009, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
License for more details.
You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
/*
Generate tables for fast, division-free trial division for GMP.
#include <stdlib.h>
#include <stdio.h>
-#include "dumbmp.c"
+#include "bootstrap.c"
int sumspills (mpz_t, mpz_t *, int);
void mpn_mod_1s_4p_cps (mpz_t [7], mpz_t);
unsigned long
mpz_log2 (mpz_t x)
{
- mpz_t y;
- unsigned long cnt;
-
- mpz_init (y);
- mpz_set (y, x);
- cnt = 0;
- while (mpz_sgn (y) != 0)
- {
- mpz_tdiv_q_2exp (y, y, 1);
- cnt++;
- }
- mpz_clear (y);
-
- return cnt;
+ return mpz_sgn (x) ? mpz_sizeinbase (x, 2) : 0;
}
void
/* Definitions for GNU multiple precision functions. -*- mode: c -*-
-Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
-2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002,
+2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free
+Software Foundation, Inc.
This file is part of the GNU MP Library.
#endif
-/* __STDC__ - some ANSI compilers define this only to 0, hence the use of
- "defined" and not "__STDC__-0". In particular Sun workshop C 5.0
- sets __STDC__ to 0, but requires "##" for token pasting.
-
- _AIX - gnu ansidecl.h asserts that all known AIX compilers are ANSI but
- don't always define __STDC__.
-
- __DECC - current versions of DEC C (5.9 for instance) for alpha are ANSI,
- but don't define __STDC__ in their default mode. Don't know if old
- versions might have been K&R, but let's not worry about that unless
- someone is still using one.
-
- _mips - gnu ansidecl.h says the RISC/OS MIPS compiler is ANSI in SVR4
- mode, but doesn't define __STDC__.
-
- _MSC_VER - Microsoft C is ANSI, but __STDC__ is undefined unless the /Za
- option is given (in which case it's 1).
-
- _WIN32 - tested for by gnu ansidecl.h, no doubt on the assumption that
- all w32 compilers are ansi.
-
- Note: This same set of tests is used by gen-psqr.c and
- demos/expr/expr-impl.h, so if anything needs adding, then be sure to
- update those too. */
-
-#if defined (__STDC__) \
- || defined (__cplusplus) \
- || defined (_AIX) \
- || defined (__DECC) \
- || (defined (__mips) && defined (_SYSTYPE_SVR4)) \
- || defined (_MSC_VER) \
- || defined (_WIN32)
-#define __GMP_HAVE_CONST 1
-#define __GMP_HAVE_PROTOTYPES 1
-#define __GMP_HAVE_TOKEN_PASTE 1
-#else
-#define __GMP_HAVE_CONST 0
-#define __GMP_HAVE_PROTOTYPES 0
-#define __GMP_HAVE_TOKEN_PASTE 0
-#endif
-
-
-#if __GMP_HAVE_CONST
-#define __gmp_const const
-#define __gmp_signed signed
-#else
-#define __gmp_const
-#define __gmp_signed
-#endif
-
-
/* __GMP_DECLSPEC supports Windows DLL versions of libgmp, and is empty in
all other circumstances.
#endif
#if __GMP_LIBGMP_DLL
-#if __GMP_WITHIN_GMP
+#ifdef __GMP_WITHIN_GMP
/* compiling to go into a DLL libgmp */
#define __GMP_DECLSPEC __GMP_DECLSPEC_EXPORT
#else
typedef __mpz_struct mpz_t[1];
typedef mp_limb_t * mp_ptr;
-typedef __gmp_const mp_limb_t * mp_srcptr;
+typedef const mp_limb_t * mp_srcptr;
#if defined (_CRAY) && ! defined (_CRAYMPP)
/* plain `int' is much faster (48 bits) */
#define __GMP_MP_SIZE_T_INT 1
/* Types for function declarations in gmp files. */
/* ??? Should not pollute user name space with these ??? */
-typedef __gmp_const __mpz_struct *mpz_srcptr;
+typedef const __mpz_struct *mpz_srcptr;
typedef __mpz_struct *mpz_ptr;
-typedef __gmp_const __mpf_struct *mpf_srcptr;
+typedef const __mpf_struct *mpf_srcptr;
typedef __mpf_struct *mpf_ptr;
-typedef __gmp_const __mpq_struct *mpq_srcptr;
+typedef const __mpq_struct *mpq_srcptr;
typedef __mpq_struct *mpq_ptr;
/* This is not wanted in mp.h, so put it outside the __GNU_MP__ common
section. */
#if __GMP_LIBGMP_DLL
-#if __GMP_WITHIN_GMPXX
+#ifdef __GMP_WITHIN_GMPXX
/* compiling to go into a DLL libgmpxx */
#define __GMP_DECLSPEC_XX __GMP_DECLSPEC_EXPORT
#else
#endif
-#if __GMP_HAVE_PROTOTYPES
-#define __GMP_PROTO(x) x
-#else
-#define __GMP_PROTO(x) ()
-#endif
-
#ifndef __MPN
-#if __GMP_HAVE_TOKEN_PASTE
#define __MPN(x) __gmpn_##x
-#else
-#define __MPN(x) __gmpn_/**/x
-#endif
#endif
/* For reference, "defined(EOF)" cannot be used here. In g++ 2.95.4,
GCC 4.3 and above with -std=c99 or -std=gnu99 implements ISO C99
inline semantics, unless -fgnu89-inline is used. */
#ifdef __GNUC__
-#if (defined __GNUC_STDC_INLINE__) || (__GNUC__ == 4 && __GNUC_MINOR__ == 2)
+#if (defined __GNUC_STDC_INLINE__) || (__GNUC__ == 4 && __GNUC_MINOR__ == 2) \
+ || (defined __GNUC_GNU_INLINE__ && defined __cplusplus)
#define __GMP_EXTERN_INLINE extern __inline__ __attribute__ ((__gnu_inline__))
#else
#define __GMP_EXTERN_INLINE extern __inline__
#endif
#define mp_set_memory_functions __gmp_set_memory_functions
-__GMP_DECLSPEC void mp_set_memory_functions __GMP_PROTO ((void *(*) (size_t),
+__GMP_DECLSPEC void mp_set_memory_functions (void *(*) (size_t),
void *(*) (void *, size_t, size_t),
- void (*) (void *, size_t))) __GMP_NOTHROW;
+ void (*) (void *, size_t)) __GMP_NOTHROW;
#define mp_get_memory_functions __gmp_get_memory_functions
-__GMP_DECLSPEC void mp_get_memory_functions __GMP_PROTO ((void *(**) (size_t),
+__GMP_DECLSPEC void mp_get_memory_functions (void *(**) (size_t),
void *(**) (void *, size_t, size_t),
- void (**) (void *, size_t))) __GMP_NOTHROW;
+ void (**) (void *, size_t)) __GMP_NOTHROW;
#define mp_bits_per_limb __gmp_bits_per_limb
-__GMP_DECLSPEC extern __gmp_const int mp_bits_per_limb;
+__GMP_DECLSPEC extern const int mp_bits_per_limb;
#define gmp_errno __gmp_errno
__GMP_DECLSPEC extern int gmp_errno;
#define gmp_version __gmp_version
-__GMP_DECLSPEC extern __gmp_const char * __gmp_const gmp_version;
+__GMP_DECLSPEC extern const char * const gmp_version;
/**************** Random number routines. ****************/
/* obsolete */
#define gmp_randinit __gmp_randinit
-__GMP_DECLSPEC void gmp_randinit __GMP_PROTO ((gmp_randstate_t, gmp_randalg_t, ...));
+__GMP_DECLSPEC void gmp_randinit (gmp_randstate_t, gmp_randalg_t, ...);
#define gmp_randinit_default __gmp_randinit_default
-__GMP_DECLSPEC void gmp_randinit_default __GMP_PROTO ((gmp_randstate_t));
+__GMP_DECLSPEC void gmp_randinit_default (gmp_randstate_t);
#define gmp_randinit_lc_2exp __gmp_randinit_lc_2exp
-__GMP_DECLSPEC void gmp_randinit_lc_2exp __GMP_PROTO ((gmp_randstate_t,
- mpz_srcptr, unsigned long int,
- mp_bitcnt_t));
+__GMP_DECLSPEC void gmp_randinit_lc_2exp (gmp_randstate_t, mpz_srcptr, unsigned long int, mp_bitcnt_t);
#define gmp_randinit_lc_2exp_size __gmp_randinit_lc_2exp_size
-__GMP_DECLSPEC int gmp_randinit_lc_2exp_size __GMP_PROTO ((gmp_randstate_t, mp_bitcnt_t));
+__GMP_DECLSPEC int gmp_randinit_lc_2exp_size (gmp_randstate_t, mp_bitcnt_t);
#define gmp_randinit_mt __gmp_randinit_mt
-__GMP_DECLSPEC void gmp_randinit_mt __GMP_PROTO ((gmp_randstate_t));
+__GMP_DECLSPEC void gmp_randinit_mt (gmp_randstate_t);
#define gmp_randinit_set __gmp_randinit_set
-__GMP_DECLSPEC void gmp_randinit_set __GMP_PROTO ((gmp_randstate_t, __gmp_const __gmp_randstate_struct *));
+__GMP_DECLSPEC void gmp_randinit_set (gmp_randstate_t, const __gmp_randstate_struct *);
#define gmp_randseed __gmp_randseed
-__GMP_DECLSPEC void gmp_randseed __GMP_PROTO ((gmp_randstate_t, mpz_srcptr));
+__GMP_DECLSPEC void gmp_randseed (gmp_randstate_t, mpz_srcptr);
#define gmp_randseed_ui __gmp_randseed_ui
-__GMP_DECLSPEC void gmp_randseed_ui __GMP_PROTO ((gmp_randstate_t, unsigned long int));
+__GMP_DECLSPEC void gmp_randseed_ui (gmp_randstate_t, unsigned long int);
#define gmp_randclear __gmp_randclear
-__GMP_DECLSPEC void gmp_randclear __GMP_PROTO ((gmp_randstate_t));
+__GMP_DECLSPEC void gmp_randclear (gmp_randstate_t);
#define gmp_urandomb_ui __gmp_urandomb_ui
-__GMP_DECLSPEC unsigned long gmp_urandomb_ui __GMP_PROTO ((gmp_randstate_t, unsigned long));
+__GMP_DECLSPEC unsigned long gmp_urandomb_ui (gmp_randstate_t, unsigned long);
#define gmp_urandomm_ui __gmp_urandomm_ui
-__GMP_DECLSPEC unsigned long gmp_urandomm_ui __GMP_PROTO ((gmp_randstate_t, unsigned long));
+__GMP_DECLSPEC unsigned long gmp_urandomm_ui (gmp_randstate_t, unsigned long);
/**************** Formatted output routines. ****************/
#define gmp_asprintf __gmp_asprintf
-__GMP_DECLSPEC int gmp_asprintf __GMP_PROTO ((char **, __gmp_const char *, ...));
+__GMP_DECLSPEC int gmp_asprintf (char **, const char *, ...);
#define gmp_fprintf __gmp_fprintf
#ifdef _GMP_H_HAVE_FILE
-__GMP_DECLSPEC int gmp_fprintf __GMP_PROTO ((FILE *, __gmp_const char *, ...));
+__GMP_DECLSPEC int gmp_fprintf (FILE *, const char *, ...);
#endif
#define gmp_obstack_printf __gmp_obstack_printf
#if defined (_GMP_H_HAVE_OBSTACK)
-__GMP_DECLSPEC int gmp_obstack_printf __GMP_PROTO ((struct obstack *, __gmp_const char *, ...));
+__GMP_DECLSPEC int gmp_obstack_printf (struct obstack *, const char *, ...);
#endif
#define gmp_obstack_vprintf __gmp_obstack_vprintf
#if defined (_GMP_H_HAVE_OBSTACK) && defined (_GMP_H_HAVE_VA_LIST)
-__GMP_DECLSPEC int gmp_obstack_vprintf __GMP_PROTO ((struct obstack *, __gmp_const char *, va_list));
+__GMP_DECLSPEC int gmp_obstack_vprintf (struct obstack *, const char *, va_list);
#endif
#define gmp_printf __gmp_printf
-__GMP_DECLSPEC int gmp_printf __GMP_PROTO ((__gmp_const char *, ...));
+__GMP_DECLSPEC int gmp_printf (const char *, ...);
#define gmp_snprintf __gmp_snprintf
-__GMP_DECLSPEC int gmp_snprintf __GMP_PROTO ((char *, size_t, __gmp_const char *, ...));
+__GMP_DECLSPEC int gmp_snprintf (char *, size_t, const char *, ...);
#define gmp_sprintf __gmp_sprintf
-__GMP_DECLSPEC int gmp_sprintf __GMP_PROTO ((char *, __gmp_const char *, ...));
+__GMP_DECLSPEC int gmp_sprintf (char *, const char *, ...);
#define gmp_vasprintf __gmp_vasprintf
#if defined (_GMP_H_HAVE_VA_LIST)
-__GMP_DECLSPEC int gmp_vasprintf __GMP_PROTO ((char **, __gmp_const char *, va_list));
+__GMP_DECLSPEC int gmp_vasprintf (char **, const char *, va_list);
#endif
#define gmp_vfprintf __gmp_vfprintf
#if defined (_GMP_H_HAVE_FILE) && defined (_GMP_H_HAVE_VA_LIST)
-__GMP_DECLSPEC int gmp_vfprintf __GMP_PROTO ((FILE *, __gmp_const char *, va_list));
+__GMP_DECLSPEC int gmp_vfprintf (FILE *, const char *, va_list);
#endif
#define gmp_vprintf __gmp_vprintf
#if defined (_GMP_H_HAVE_VA_LIST)
-__GMP_DECLSPEC int gmp_vprintf __GMP_PROTO ((__gmp_const char *, va_list));
+__GMP_DECLSPEC int gmp_vprintf (const char *, va_list);
#endif
#define gmp_vsnprintf __gmp_vsnprintf
#if defined (_GMP_H_HAVE_VA_LIST)
-__GMP_DECLSPEC int gmp_vsnprintf __GMP_PROTO ((char *, size_t, __gmp_const char *, va_list));
+__GMP_DECLSPEC int gmp_vsnprintf (char *, size_t, const char *, va_list);
#endif
#define gmp_vsprintf __gmp_vsprintf
#if defined (_GMP_H_HAVE_VA_LIST)
-__GMP_DECLSPEC int gmp_vsprintf __GMP_PROTO ((char *, __gmp_const char *, va_list));
+__GMP_DECLSPEC int gmp_vsprintf (char *, const char *, va_list);
#endif
#define gmp_fscanf __gmp_fscanf
#ifdef _GMP_H_HAVE_FILE
-__GMP_DECLSPEC int gmp_fscanf __GMP_PROTO ((FILE *, __gmp_const char *, ...));
+__GMP_DECLSPEC int gmp_fscanf (FILE *, const char *, ...);
#endif
#define gmp_scanf __gmp_scanf
-__GMP_DECLSPEC int gmp_scanf __GMP_PROTO ((__gmp_const char *, ...));
+__GMP_DECLSPEC int gmp_scanf (const char *, ...);
#define gmp_sscanf __gmp_sscanf
-__GMP_DECLSPEC int gmp_sscanf __GMP_PROTO ((__gmp_const char *, __gmp_const char *, ...));
+__GMP_DECLSPEC int gmp_sscanf (const char *, const char *, ...);
#define gmp_vfscanf __gmp_vfscanf
#if defined (_GMP_H_HAVE_FILE) && defined (_GMP_H_HAVE_VA_LIST)
-__GMP_DECLSPEC int gmp_vfscanf __GMP_PROTO ((FILE *, __gmp_const char *, va_list));
+__GMP_DECLSPEC int gmp_vfscanf (FILE *, const char *, va_list);
#endif
#define gmp_vscanf __gmp_vscanf
#if defined (_GMP_H_HAVE_VA_LIST)
-__GMP_DECLSPEC int gmp_vscanf __GMP_PROTO ((__gmp_const char *, va_list));
+__GMP_DECLSPEC int gmp_vscanf (const char *, va_list);
#endif
#define gmp_vsscanf __gmp_vsscanf
#if defined (_GMP_H_HAVE_VA_LIST)
-__GMP_DECLSPEC int gmp_vsscanf __GMP_PROTO ((__gmp_const char *, __gmp_const char *, va_list));
+__GMP_DECLSPEC int gmp_vsscanf (const char *, const char *, va_list);
#endif
#define _mpz_realloc __gmpz_realloc
#define mpz_realloc __gmpz_realloc
-__GMP_DECLSPEC void *_mpz_realloc __GMP_PROTO ((mpz_ptr, mp_size_t));
+__GMP_DECLSPEC void *_mpz_realloc (mpz_ptr, mp_size_t);
#define mpz_abs __gmpz_abs
#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_abs)
-__GMP_DECLSPEC void mpz_abs __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_abs (mpz_ptr, mpz_srcptr);
#endif
#define mpz_add __gmpz_add
-__GMP_DECLSPEC void mpz_add __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_add (mpz_ptr, mpz_srcptr, mpz_srcptr);
#define mpz_add_ui __gmpz_add_ui
-__GMP_DECLSPEC void mpz_add_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC void mpz_add_ui (mpz_ptr, mpz_srcptr, unsigned long int);
#define mpz_addmul __gmpz_addmul
-__GMP_DECLSPEC void mpz_addmul __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_addmul (mpz_ptr, mpz_srcptr, mpz_srcptr);
#define mpz_addmul_ui __gmpz_addmul_ui
-__GMP_DECLSPEC void mpz_addmul_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC void mpz_addmul_ui (mpz_ptr, mpz_srcptr, unsigned long int);
#define mpz_and __gmpz_and
-__GMP_DECLSPEC void mpz_and __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_and (mpz_ptr, mpz_srcptr, mpz_srcptr);
#define mpz_array_init __gmpz_array_init
-__GMP_DECLSPEC void mpz_array_init __GMP_PROTO ((mpz_ptr, mp_size_t, mp_size_t));
+__GMP_DECLSPEC void mpz_array_init (mpz_ptr, mp_size_t, mp_size_t);
#define mpz_bin_ui __gmpz_bin_ui
-__GMP_DECLSPEC void mpz_bin_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC void mpz_bin_ui (mpz_ptr, mpz_srcptr, unsigned long int);
#define mpz_bin_uiui __gmpz_bin_uiui
-__GMP_DECLSPEC void mpz_bin_uiui __GMP_PROTO ((mpz_ptr, unsigned long int, unsigned long int));
+__GMP_DECLSPEC void mpz_bin_uiui (mpz_ptr, unsigned long int, unsigned long int);
#define mpz_cdiv_q __gmpz_cdiv_q
-__GMP_DECLSPEC void mpz_cdiv_q __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_cdiv_q (mpz_ptr, mpz_srcptr, mpz_srcptr);
#define mpz_cdiv_q_2exp __gmpz_cdiv_q_2exp
-__GMP_DECLSPEC void mpz_cdiv_q_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_cdiv_q_2exp (mpz_ptr, mpz_srcptr, mp_bitcnt_t);
#define mpz_cdiv_q_ui __gmpz_cdiv_q_ui
-__GMP_DECLSPEC unsigned long int mpz_cdiv_q_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC unsigned long int mpz_cdiv_q_ui (mpz_ptr, mpz_srcptr, unsigned long int);
#define mpz_cdiv_qr __gmpz_cdiv_qr
-__GMP_DECLSPEC void mpz_cdiv_qr __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_cdiv_qr (mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr);
#define mpz_cdiv_qr_ui __gmpz_cdiv_qr_ui
-__GMP_DECLSPEC unsigned long int mpz_cdiv_qr_ui __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC unsigned long int mpz_cdiv_qr_ui (mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int);
#define mpz_cdiv_r __gmpz_cdiv_r
-__GMP_DECLSPEC void mpz_cdiv_r __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_cdiv_r (mpz_ptr, mpz_srcptr, mpz_srcptr);
#define mpz_cdiv_r_2exp __gmpz_cdiv_r_2exp
-__GMP_DECLSPEC void mpz_cdiv_r_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_cdiv_r_2exp (mpz_ptr, mpz_srcptr, mp_bitcnt_t);
#define mpz_cdiv_r_ui __gmpz_cdiv_r_ui
-__GMP_DECLSPEC unsigned long int mpz_cdiv_r_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC unsigned long int mpz_cdiv_r_ui (mpz_ptr, mpz_srcptr, unsigned long int);
#define mpz_cdiv_ui __gmpz_cdiv_ui
-__GMP_DECLSPEC unsigned long int mpz_cdiv_ui __GMP_PROTO ((mpz_srcptr, unsigned long int)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC unsigned long int mpz_cdiv_ui (mpz_srcptr, unsigned long int) __GMP_ATTRIBUTE_PURE;
#define mpz_clear __gmpz_clear
-__GMP_DECLSPEC void mpz_clear __GMP_PROTO ((mpz_ptr));
+__GMP_DECLSPEC void mpz_clear (mpz_ptr);
#define mpz_clears __gmpz_clears
-__GMP_DECLSPEC void mpz_clears __GMP_PROTO ((mpz_ptr, ...));
+__GMP_DECLSPEC void mpz_clears (mpz_ptr, ...);
#define mpz_clrbit __gmpz_clrbit
-__GMP_DECLSPEC void mpz_clrbit __GMP_PROTO ((mpz_ptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_clrbit (mpz_ptr, mp_bitcnt_t);
#define mpz_cmp __gmpz_cmp
-__GMP_DECLSPEC int mpz_cmp __GMP_PROTO ((mpz_srcptr, mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_cmp (mpz_srcptr, mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpz_cmp_d __gmpz_cmp_d
-__GMP_DECLSPEC int mpz_cmp_d __GMP_PROTO ((mpz_srcptr, double)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_cmp_d (mpz_srcptr, double) __GMP_ATTRIBUTE_PURE;
#define _mpz_cmp_si __gmpz_cmp_si
-__GMP_DECLSPEC int _mpz_cmp_si __GMP_PROTO ((mpz_srcptr, signed long int)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int _mpz_cmp_si (mpz_srcptr, signed long int) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define _mpz_cmp_ui __gmpz_cmp_ui
-__GMP_DECLSPEC int _mpz_cmp_ui __GMP_PROTO ((mpz_srcptr, unsigned long int)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int _mpz_cmp_ui (mpz_srcptr, unsigned long int) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpz_cmpabs __gmpz_cmpabs
-__GMP_DECLSPEC int mpz_cmpabs __GMP_PROTO ((mpz_srcptr, mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_cmpabs (mpz_srcptr, mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpz_cmpabs_d __gmpz_cmpabs_d
-__GMP_DECLSPEC int mpz_cmpabs_d __GMP_PROTO ((mpz_srcptr, double)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_cmpabs_d (mpz_srcptr, double) __GMP_ATTRIBUTE_PURE;
#define mpz_cmpabs_ui __gmpz_cmpabs_ui
-__GMP_DECLSPEC int mpz_cmpabs_ui __GMP_PROTO ((mpz_srcptr, unsigned long int)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_cmpabs_ui (mpz_srcptr, unsigned long int) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpz_com __gmpz_com
-__GMP_DECLSPEC void mpz_com __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_com (mpz_ptr, mpz_srcptr);
#define mpz_combit __gmpz_combit
-__GMP_DECLSPEC void mpz_combit __GMP_PROTO ((mpz_ptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_combit (mpz_ptr, mp_bitcnt_t);
#define mpz_congruent_p __gmpz_congruent_p
-__GMP_DECLSPEC int mpz_congruent_p __GMP_PROTO ((mpz_srcptr, mpz_srcptr, mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_congruent_p (mpz_srcptr, mpz_srcptr, mpz_srcptr) __GMP_ATTRIBUTE_PURE;
#define mpz_congruent_2exp_p __gmpz_congruent_2exp_p
-__GMP_DECLSPEC int mpz_congruent_2exp_p __GMP_PROTO ((mpz_srcptr, mpz_srcptr, mp_bitcnt_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_congruent_2exp_p (mpz_srcptr, mpz_srcptr, mp_bitcnt_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpz_congruent_ui_p __gmpz_congruent_ui_p
-__GMP_DECLSPEC int mpz_congruent_ui_p __GMP_PROTO ((mpz_srcptr, unsigned long, unsigned long)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_congruent_ui_p (mpz_srcptr, unsigned long, unsigned long) __GMP_ATTRIBUTE_PURE;
#define mpz_divexact __gmpz_divexact
-__GMP_DECLSPEC void mpz_divexact __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_divexact (mpz_ptr, mpz_srcptr, mpz_srcptr);
#define mpz_divexact_ui __gmpz_divexact_ui
-__GMP_DECLSPEC void mpz_divexact_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long));
+__GMP_DECLSPEC void mpz_divexact_ui (mpz_ptr, mpz_srcptr, unsigned long);
#define mpz_divisible_p __gmpz_divisible_p
-__GMP_DECLSPEC int mpz_divisible_p __GMP_PROTO ((mpz_srcptr, mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_divisible_p (mpz_srcptr, mpz_srcptr) __GMP_ATTRIBUTE_PURE;
#define mpz_divisible_ui_p __gmpz_divisible_ui_p
-__GMP_DECLSPEC int mpz_divisible_ui_p __GMP_PROTO ((mpz_srcptr, unsigned long)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_divisible_ui_p (mpz_srcptr, unsigned long) __GMP_ATTRIBUTE_PURE;
#define mpz_divisible_2exp_p __gmpz_divisible_2exp_p
-__GMP_DECLSPEC int mpz_divisible_2exp_p __GMP_PROTO ((mpz_srcptr, mp_bitcnt_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_divisible_2exp_p (mpz_srcptr, mp_bitcnt_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpz_dump __gmpz_dump
-__GMP_DECLSPEC void mpz_dump __GMP_PROTO ((mpz_srcptr));
+__GMP_DECLSPEC void mpz_dump (mpz_srcptr);
#define mpz_export __gmpz_export
-__GMP_DECLSPEC void *mpz_export __GMP_PROTO ((void *, size_t *, int, size_t, int, size_t, mpz_srcptr));
+__GMP_DECLSPEC void *mpz_export (void *, size_t *, int, size_t, int, size_t, mpz_srcptr);
#define mpz_fac_ui __gmpz_fac_ui
-__GMP_DECLSPEC void mpz_fac_ui __GMP_PROTO ((mpz_ptr, unsigned long int));
+__GMP_DECLSPEC void mpz_fac_ui (mpz_ptr, unsigned long int);
+
+#define mpz_2fac_ui __gmpz_2fac_ui
+__GMP_DECLSPEC void mpz_2fac_ui (mpz_ptr, unsigned long int);
+
+#define mpz_mfac_uiui __gmpz_mfac_uiui
+__GMP_DECLSPEC void mpz_mfac_uiui (mpz_ptr, unsigned long int, unsigned long int);
+
+#define mpz_primorial_ui __gmpz_primorial_ui
+__GMP_DECLSPEC void mpz_primorial_ui (mpz_ptr, unsigned long int);
#define mpz_fdiv_q __gmpz_fdiv_q
-__GMP_DECLSPEC void mpz_fdiv_q __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_fdiv_q (mpz_ptr, mpz_srcptr, mpz_srcptr);
#define mpz_fdiv_q_2exp __gmpz_fdiv_q_2exp
-__GMP_DECLSPEC void mpz_fdiv_q_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_fdiv_q_2exp (mpz_ptr, mpz_srcptr, mp_bitcnt_t);
#define mpz_fdiv_q_ui __gmpz_fdiv_q_ui
-__GMP_DECLSPEC unsigned long int mpz_fdiv_q_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC unsigned long int mpz_fdiv_q_ui (mpz_ptr, mpz_srcptr, unsigned long int);
#define mpz_fdiv_qr __gmpz_fdiv_qr
-__GMP_DECLSPEC void mpz_fdiv_qr __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_fdiv_qr (mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr);
#define mpz_fdiv_qr_ui __gmpz_fdiv_qr_ui
-__GMP_DECLSPEC unsigned long int mpz_fdiv_qr_ui __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC unsigned long int mpz_fdiv_qr_ui (mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int);
#define mpz_fdiv_r __gmpz_fdiv_r
-__GMP_DECLSPEC void mpz_fdiv_r __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_fdiv_r (mpz_ptr, mpz_srcptr, mpz_srcptr);
#define mpz_fdiv_r_2exp __gmpz_fdiv_r_2exp
-__GMP_DECLSPEC void mpz_fdiv_r_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_fdiv_r_2exp (mpz_ptr, mpz_srcptr, mp_bitcnt_t);
#define mpz_fdiv_r_ui __gmpz_fdiv_r_ui
-__GMP_DECLSPEC unsigned long int mpz_fdiv_r_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC unsigned long int mpz_fdiv_r_ui (mpz_ptr, mpz_srcptr, unsigned long int);
#define mpz_fdiv_ui __gmpz_fdiv_ui
-__GMP_DECLSPEC unsigned long int mpz_fdiv_ui __GMP_PROTO ((mpz_srcptr, unsigned long int)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC unsigned long int mpz_fdiv_ui (mpz_srcptr, unsigned long int) __GMP_ATTRIBUTE_PURE;
#define mpz_fib_ui __gmpz_fib_ui
-__GMP_DECLSPEC void mpz_fib_ui __GMP_PROTO ((mpz_ptr, unsigned long int));
+__GMP_DECLSPEC void mpz_fib_ui (mpz_ptr, unsigned long int);
#define mpz_fib2_ui __gmpz_fib2_ui
-__GMP_DECLSPEC void mpz_fib2_ui __GMP_PROTO ((mpz_ptr, mpz_ptr, unsigned long int));
+__GMP_DECLSPEC void mpz_fib2_ui (mpz_ptr, mpz_ptr, unsigned long int);
#define mpz_fits_sint_p __gmpz_fits_sint_p
-__GMP_DECLSPEC int mpz_fits_sint_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_fits_sint_p (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpz_fits_slong_p __gmpz_fits_slong_p
-__GMP_DECLSPEC int mpz_fits_slong_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_fits_slong_p (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpz_fits_sshort_p __gmpz_fits_sshort_p
-__GMP_DECLSPEC int mpz_fits_sshort_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_fits_sshort_p (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpz_fits_uint_p __gmpz_fits_uint_p
#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_fits_uint_p)
-__GMP_DECLSPEC int mpz_fits_uint_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_fits_uint_p (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#endif
#define mpz_fits_ulong_p __gmpz_fits_ulong_p
#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_fits_ulong_p)
-__GMP_DECLSPEC int mpz_fits_ulong_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_fits_ulong_p (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#endif
#define mpz_fits_ushort_p __gmpz_fits_ushort_p
#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_fits_ushort_p)
-__GMP_DECLSPEC int mpz_fits_ushort_p __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_fits_ushort_p (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#endif
#define mpz_gcd __gmpz_gcd
-__GMP_DECLSPEC void mpz_gcd __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_gcd (mpz_ptr, mpz_srcptr, mpz_srcptr);
#define mpz_gcd_ui __gmpz_gcd_ui
-__GMP_DECLSPEC unsigned long int mpz_gcd_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC unsigned long int mpz_gcd_ui (mpz_ptr, mpz_srcptr, unsigned long int);
#define mpz_gcdext __gmpz_gcdext
-__GMP_DECLSPEC void mpz_gcdext __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_gcdext (mpz_ptr, mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr);
#define mpz_get_d __gmpz_get_d
-__GMP_DECLSPEC double mpz_get_d __GMP_PROTO ((mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC double mpz_get_d (mpz_srcptr) __GMP_ATTRIBUTE_PURE;
#define mpz_get_d_2exp __gmpz_get_d_2exp
-__GMP_DECLSPEC double mpz_get_d_2exp __GMP_PROTO ((signed long int *, mpz_srcptr));
+__GMP_DECLSPEC double mpz_get_d_2exp (signed long int *, mpz_srcptr);
#define mpz_get_si __gmpz_get_si
-__GMP_DECLSPEC /* signed */ long int mpz_get_si __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC /* signed */ long int mpz_get_si (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpz_get_str __gmpz_get_str
-__GMP_DECLSPEC char *mpz_get_str __GMP_PROTO ((char *, int, mpz_srcptr));
+__GMP_DECLSPEC char *mpz_get_str (char *, int, mpz_srcptr);
#define mpz_get_ui __gmpz_get_ui
#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_get_ui)
-__GMP_DECLSPEC unsigned long int mpz_get_ui __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC unsigned long int mpz_get_ui (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#endif
#define mpz_getlimbn __gmpz_getlimbn
#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_getlimbn)
-__GMP_DECLSPEC mp_limb_t mpz_getlimbn __GMP_PROTO ((mpz_srcptr, mp_size_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_limb_t mpz_getlimbn (mpz_srcptr, mp_size_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#endif
#define mpz_hamdist __gmpz_hamdist
-__GMP_DECLSPEC mp_bitcnt_t mpz_hamdist __GMP_PROTO ((mpz_srcptr, mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_bitcnt_t mpz_hamdist (mpz_srcptr, mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpz_import __gmpz_import
-__GMP_DECLSPEC void mpz_import __GMP_PROTO ((mpz_ptr, size_t, int, size_t, int, size_t, __gmp_const void *));
+__GMP_DECLSPEC void mpz_import (mpz_ptr, size_t, int, size_t, int, size_t, const void *);
#define mpz_init __gmpz_init
-__GMP_DECLSPEC void mpz_init __GMP_PROTO ((mpz_ptr));
+__GMP_DECLSPEC void mpz_init (mpz_ptr);
#define mpz_init2 __gmpz_init2
-__GMP_DECLSPEC void mpz_init2 __GMP_PROTO ((mpz_ptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_init2 (mpz_ptr, mp_bitcnt_t);
#define mpz_inits __gmpz_inits
-__GMP_DECLSPEC void mpz_inits __GMP_PROTO ((mpz_ptr, ...));
+__GMP_DECLSPEC void mpz_inits (mpz_ptr, ...);
#define mpz_init_set __gmpz_init_set
-__GMP_DECLSPEC void mpz_init_set __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_init_set (mpz_ptr, mpz_srcptr);
#define mpz_init_set_d __gmpz_init_set_d
-__GMP_DECLSPEC void mpz_init_set_d __GMP_PROTO ((mpz_ptr, double));
+__GMP_DECLSPEC void mpz_init_set_d (mpz_ptr, double);
#define mpz_init_set_si __gmpz_init_set_si
-__GMP_DECLSPEC void mpz_init_set_si __GMP_PROTO ((mpz_ptr, signed long int));
+__GMP_DECLSPEC void mpz_init_set_si (mpz_ptr, signed long int);
#define mpz_init_set_str __gmpz_init_set_str
-__GMP_DECLSPEC int mpz_init_set_str __GMP_PROTO ((mpz_ptr, __gmp_const char *, int));
+__GMP_DECLSPEC int mpz_init_set_str (mpz_ptr, const char *, int);
#define mpz_init_set_ui __gmpz_init_set_ui
-__GMP_DECLSPEC void mpz_init_set_ui __GMP_PROTO ((mpz_ptr, unsigned long int));
+__GMP_DECLSPEC void mpz_init_set_ui (mpz_ptr, unsigned long int);
#define mpz_inp_raw __gmpz_inp_raw
#ifdef _GMP_H_HAVE_FILE
-__GMP_DECLSPEC size_t mpz_inp_raw __GMP_PROTO ((mpz_ptr, FILE *));
+__GMP_DECLSPEC size_t mpz_inp_raw (mpz_ptr, FILE *);
#endif
#define mpz_inp_str __gmpz_inp_str
#ifdef _GMP_H_HAVE_FILE
-__GMP_DECLSPEC size_t mpz_inp_str __GMP_PROTO ((mpz_ptr, FILE *, int));
+__GMP_DECLSPEC size_t mpz_inp_str (mpz_ptr, FILE *, int);
#endif
#define mpz_invert __gmpz_invert
-__GMP_DECLSPEC int mpz_invert __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC int mpz_invert (mpz_ptr, mpz_srcptr, mpz_srcptr);
#define mpz_ior __gmpz_ior
-__GMP_DECLSPEC void mpz_ior __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_ior (mpz_ptr, mpz_srcptr, mpz_srcptr);
#define mpz_jacobi __gmpz_jacobi
-__GMP_DECLSPEC int mpz_jacobi __GMP_PROTO ((mpz_srcptr, mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_jacobi (mpz_srcptr, mpz_srcptr) __GMP_ATTRIBUTE_PURE;
#define mpz_kronecker mpz_jacobi /* alias */
#define mpz_kronecker_si __gmpz_kronecker_si
-__GMP_DECLSPEC int mpz_kronecker_si __GMP_PROTO ((mpz_srcptr, long)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_kronecker_si (mpz_srcptr, long) __GMP_ATTRIBUTE_PURE;
#define mpz_kronecker_ui __gmpz_kronecker_ui
-__GMP_DECLSPEC int mpz_kronecker_ui __GMP_PROTO ((mpz_srcptr, unsigned long)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_kronecker_ui (mpz_srcptr, unsigned long) __GMP_ATTRIBUTE_PURE;
#define mpz_si_kronecker __gmpz_si_kronecker
-__GMP_DECLSPEC int mpz_si_kronecker __GMP_PROTO ((long, mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_si_kronecker (long, mpz_srcptr) __GMP_ATTRIBUTE_PURE;
#define mpz_ui_kronecker __gmpz_ui_kronecker
-__GMP_DECLSPEC int mpz_ui_kronecker __GMP_PROTO ((unsigned long, mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_ui_kronecker (unsigned long, mpz_srcptr) __GMP_ATTRIBUTE_PURE;
#define mpz_lcm __gmpz_lcm
-__GMP_DECLSPEC void mpz_lcm __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_lcm (mpz_ptr, mpz_srcptr, mpz_srcptr);
#define mpz_lcm_ui __gmpz_lcm_ui
-__GMP_DECLSPEC void mpz_lcm_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long));
+__GMP_DECLSPEC void mpz_lcm_ui (mpz_ptr, mpz_srcptr, unsigned long);
#define mpz_legendre mpz_jacobi /* alias */
#define mpz_lucnum_ui __gmpz_lucnum_ui
-__GMP_DECLSPEC void mpz_lucnum_ui __GMP_PROTO ((mpz_ptr, unsigned long int));
+__GMP_DECLSPEC void mpz_lucnum_ui (mpz_ptr, unsigned long int);
#define mpz_lucnum2_ui __gmpz_lucnum2_ui
-__GMP_DECLSPEC void mpz_lucnum2_ui __GMP_PROTO ((mpz_ptr, mpz_ptr, unsigned long int));
+__GMP_DECLSPEC void mpz_lucnum2_ui (mpz_ptr, mpz_ptr, unsigned long int);
#define mpz_millerrabin __gmpz_millerrabin
-__GMP_DECLSPEC int mpz_millerrabin __GMP_PROTO ((mpz_srcptr, int)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_millerrabin (mpz_srcptr, int) __GMP_ATTRIBUTE_PURE;
#define mpz_mod __gmpz_mod
-__GMP_DECLSPEC void mpz_mod __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_mod (mpz_ptr, mpz_srcptr, mpz_srcptr);
#define mpz_mod_ui mpz_fdiv_r_ui /* same as fdiv_r because divisor unsigned */
#define mpz_mul __gmpz_mul
-__GMP_DECLSPEC void mpz_mul __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_mul (mpz_ptr, mpz_srcptr, mpz_srcptr);
#define mpz_mul_2exp __gmpz_mul_2exp
-__GMP_DECLSPEC void mpz_mul_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_mul_2exp (mpz_ptr, mpz_srcptr, mp_bitcnt_t);
#define mpz_mul_si __gmpz_mul_si
-__GMP_DECLSPEC void mpz_mul_si __GMP_PROTO ((mpz_ptr, mpz_srcptr, long int));
+__GMP_DECLSPEC void mpz_mul_si (mpz_ptr, mpz_srcptr, long int);
#define mpz_mul_ui __gmpz_mul_ui
-__GMP_DECLSPEC void mpz_mul_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC void mpz_mul_ui (mpz_ptr, mpz_srcptr, unsigned long int);
#define mpz_neg __gmpz_neg
#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_neg)
-__GMP_DECLSPEC void mpz_neg __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_neg (mpz_ptr, mpz_srcptr);
#endif
#define mpz_nextprime __gmpz_nextprime
-__GMP_DECLSPEC void mpz_nextprime __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_nextprime (mpz_ptr, mpz_srcptr);
#define mpz_out_raw __gmpz_out_raw
#ifdef _GMP_H_HAVE_FILE
-__GMP_DECLSPEC size_t mpz_out_raw __GMP_PROTO ((FILE *, mpz_srcptr));
+__GMP_DECLSPEC size_t mpz_out_raw (FILE *, mpz_srcptr);
#endif
#define mpz_out_str __gmpz_out_str
#ifdef _GMP_H_HAVE_FILE
-__GMP_DECLSPEC size_t mpz_out_str __GMP_PROTO ((FILE *, int, mpz_srcptr));
+__GMP_DECLSPEC size_t mpz_out_str (FILE *, int, mpz_srcptr);
#endif
#define mpz_perfect_power_p __gmpz_perfect_power_p
-__GMP_DECLSPEC int mpz_perfect_power_p __GMP_PROTO ((mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_perfect_power_p (mpz_srcptr) __GMP_ATTRIBUTE_PURE;
#define mpz_perfect_square_p __gmpz_perfect_square_p
#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_perfect_square_p)
-__GMP_DECLSPEC int mpz_perfect_square_p __GMP_PROTO ((mpz_srcptr)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_perfect_square_p (mpz_srcptr) __GMP_ATTRIBUTE_PURE;
#endif
#define mpz_popcount __gmpz_popcount
#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_popcount)
-__GMP_DECLSPEC mp_bitcnt_t mpz_popcount __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_bitcnt_t mpz_popcount (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#endif
#define mpz_pow_ui __gmpz_pow_ui
-__GMP_DECLSPEC void mpz_pow_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC void mpz_pow_ui (mpz_ptr, mpz_srcptr, unsigned long int);
#define mpz_powm __gmpz_powm
-__GMP_DECLSPEC void mpz_powm __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_powm (mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr);
#define mpz_powm_sec __gmpz_powm_sec
-__GMP_DECLSPEC void mpz_powm_sec __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_powm_sec (mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr);
#define mpz_powm_ui __gmpz_powm_ui
-__GMP_DECLSPEC void mpz_powm_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int, mpz_srcptr));
+__GMP_DECLSPEC void mpz_powm_ui (mpz_ptr, mpz_srcptr, unsigned long int, mpz_srcptr);
#define mpz_probab_prime_p __gmpz_probab_prime_p
-__GMP_DECLSPEC int mpz_probab_prime_p __GMP_PROTO ((mpz_srcptr, int)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_probab_prime_p (mpz_srcptr, int) __GMP_ATTRIBUTE_PURE;
#define mpz_random __gmpz_random
-__GMP_DECLSPEC void mpz_random __GMP_PROTO ((mpz_ptr, mp_size_t));
+__GMP_DECLSPEC void mpz_random (mpz_ptr, mp_size_t);
#define mpz_random2 __gmpz_random2
-__GMP_DECLSPEC void mpz_random2 __GMP_PROTO ((mpz_ptr, mp_size_t));
+__GMP_DECLSPEC void mpz_random2 (mpz_ptr, mp_size_t);
#define mpz_realloc2 __gmpz_realloc2
-__GMP_DECLSPEC void mpz_realloc2 __GMP_PROTO ((mpz_ptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_realloc2 (mpz_ptr, mp_bitcnt_t);
#define mpz_remove __gmpz_remove
-__GMP_DECLSPEC mp_bitcnt_t mpz_remove __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC mp_bitcnt_t mpz_remove (mpz_ptr, mpz_srcptr, mpz_srcptr);
#define mpz_root __gmpz_root
-__GMP_DECLSPEC int mpz_root __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC int mpz_root (mpz_ptr, mpz_srcptr, unsigned long int);
#define mpz_rootrem __gmpz_rootrem
-__GMP_DECLSPEC void mpz_rootrem __GMP_PROTO ((mpz_ptr,mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC void mpz_rootrem (mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int);
#define mpz_rrandomb __gmpz_rrandomb
-__GMP_DECLSPEC void mpz_rrandomb __GMP_PROTO ((mpz_ptr, gmp_randstate_t, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_rrandomb (mpz_ptr, gmp_randstate_t, mp_bitcnt_t);
#define mpz_scan0 __gmpz_scan0
-__GMP_DECLSPEC mp_bitcnt_t mpz_scan0 __GMP_PROTO ((mpz_srcptr, mp_bitcnt_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_bitcnt_t mpz_scan0 (mpz_srcptr, mp_bitcnt_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpz_scan1 __gmpz_scan1
-__GMP_DECLSPEC mp_bitcnt_t mpz_scan1 __GMP_PROTO ((mpz_srcptr, mp_bitcnt_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_bitcnt_t mpz_scan1 (mpz_srcptr, mp_bitcnt_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpz_set __gmpz_set
-__GMP_DECLSPEC void mpz_set __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_set (mpz_ptr, mpz_srcptr);
#define mpz_set_d __gmpz_set_d
-__GMP_DECLSPEC void mpz_set_d __GMP_PROTO ((mpz_ptr, double));
+__GMP_DECLSPEC void mpz_set_d (mpz_ptr, double);
#define mpz_set_f __gmpz_set_f
-__GMP_DECLSPEC void mpz_set_f __GMP_PROTO ((mpz_ptr, mpf_srcptr));
+__GMP_DECLSPEC void mpz_set_f (mpz_ptr, mpf_srcptr);
#define mpz_set_q __gmpz_set_q
#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_set_q)
-__GMP_DECLSPEC void mpz_set_q __GMP_PROTO ((mpz_ptr, mpq_srcptr));
+__GMP_DECLSPEC void mpz_set_q (mpz_ptr, mpq_srcptr);
#endif
#define mpz_set_si __gmpz_set_si
-__GMP_DECLSPEC void mpz_set_si __GMP_PROTO ((mpz_ptr, signed long int));
+__GMP_DECLSPEC void mpz_set_si (mpz_ptr, signed long int);
#define mpz_set_str __gmpz_set_str
-__GMP_DECLSPEC int mpz_set_str __GMP_PROTO ((mpz_ptr, __gmp_const char *, int));
+__GMP_DECLSPEC int mpz_set_str (mpz_ptr, const char *, int);
#define mpz_set_ui __gmpz_set_ui
-__GMP_DECLSPEC void mpz_set_ui __GMP_PROTO ((mpz_ptr, unsigned long int));
+__GMP_DECLSPEC void mpz_set_ui (mpz_ptr, unsigned long int);
#define mpz_setbit __gmpz_setbit
-__GMP_DECLSPEC void mpz_setbit __GMP_PROTO ((mpz_ptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_setbit (mpz_ptr, mp_bitcnt_t);
#define mpz_size __gmpz_size
#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpz_size)
-__GMP_DECLSPEC size_t mpz_size __GMP_PROTO ((mpz_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC size_t mpz_size (mpz_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#endif
#define mpz_sizeinbase __gmpz_sizeinbase
-__GMP_DECLSPEC size_t mpz_sizeinbase __GMP_PROTO ((mpz_srcptr, int)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC size_t mpz_sizeinbase (mpz_srcptr, int) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpz_sqrt __gmpz_sqrt
-__GMP_DECLSPEC void mpz_sqrt __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_sqrt (mpz_ptr, mpz_srcptr);
#define mpz_sqrtrem __gmpz_sqrtrem
-__GMP_DECLSPEC void mpz_sqrtrem __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_sqrtrem (mpz_ptr, mpz_ptr, mpz_srcptr);
#define mpz_sub __gmpz_sub
-__GMP_DECLSPEC void mpz_sub __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_sub (mpz_ptr, mpz_srcptr, mpz_srcptr);
#define mpz_sub_ui __gmpz_sub_ui
-__GMP_DECLSPEC void mpz_sub_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC void mpz_sub_ui (mpz_ptr, mpz_srcptr, unsigned long int);
#define mpz_ui_sub __gmpz_ui_sub
-__GMP_DECLSPEC void mpz_ui_sub __GMP_PROTO ((mpz_ptr, unsigned long int, mpz_srcptr));
+__GMP_DECLSPEC void mpz_ui_sub (mpz_ptr, unsigned long int, mpz_srcptr);
#define mpz_submul __gmpz_submul
-__GMP_DECLSPEC void mpz_submul __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_submul (mpz_ptr, mpz_srcptr, mpz_srcptr);
#define mpz_submul_ui __gmpz_submul_ui
-__GMP_DECLSPEC void mpz_submul_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC void mpz_submul_ui (mpz_ptr, mpz_srcptr, unsigned long int);
#define mpz_swap __gmpz_swap
-__GMP_DECLSPEC void mpz_swap __GMP_PROTO ((mpz_ptr, mpz_ptr)) __GMP_NOTHROW;
+__GMP_DECLSPEC void mpz_swap (mpz_ptr, mpz_ptr) __GMP_NOTHROW;
#define mpz_tdiv_ui __gmpz_tdiv_ui
-__GMP_DECLSPEC unsigned long int mpz_tdiv_ui __GMP_PROTO ((mpz_srcptr, unsigned long int)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC unsigned long int mpz_tdiv_ui (mpz_srcptr, unsigned long int) __GMP_ATTRIBUTE_PURE;
#define mpz_tdiv_q __gmpz_tdiv_q
-__GMP_DECLSPEC void mpz_tdiv_q __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_tdiv_q (mpz_ptr, mpz_srcptr, mpz_srcptr);
#define mpz_tdiv_q_2exp __gmpz_tdiv_q_2exp
-__GMP_DECLSPEC void mpz_tdiv_q_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_tdiv_q_2exp (mpz_ptr, mpz_srcptr, mp_bitcnt_t);
#define mpz_tdiv_q_ui __gmpz_tdiv_q_ui
-__GMP_DECLSPEC unsigned long int mpz_tdiv_q_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC unsigned long int mpz_tdiv_q_ui (mpz_ptr, mpz_srcptr, unsigned long int);
#define mpz_tdiv_qr __gmpz_tdiv_qr
-__GMP_DECLSPEC void mpz_tdiv_qr __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_tdiv_qr (mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr);
#define mpz_tdiv_qr_ui __gmpz_tdiv_qr_ui
-__GMP_DECLSPEC unsigned long int mpz_tdiv_qr_ui __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC unsigned long int mpz_tdiv_qr_ui (mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int);
#define mpz_tdiv_r __gmpz_tdiv_r
-__GMP_DECLSPEC void mpz_tdiv_r __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_tdiv_r (mpz_ptr, mpz_srcptr, mpz_srcptr);
#define mpz_tdiv_r_2exp __gmpz_tdiv_r_2exp
-__GMP_DECLSPEC void mpz_tdiv_r_2exp __GMP_PROTO ((mpz_ptr, mpz_srcptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_tdiv_r_2exp (mpz_ptr, mpz_srcptr, mp_bitcnt_t);
#define mpz_tdiv_r_ui __gmpz_tdiv_r_ui
-__GMP_DECLSPEC unsigned long int mpz_tdiv_r_ui __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
+__GMP_DECLSPEC unsigned long int mpz_tdiv_r_ui (mpz_ptr, mpz_srcptr, unsigned long int);
#define mpz_tstbit __gmpz_tstbit
-__GMP_DECLSPEC int mpz_tstbit __GMP_PROTO ((mpz_srcptr, mp_bitcnt_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpz_tstbit (mpz_srcptr, mp_bitcnt_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpz_ui_pow_ui __gmpz_ui_pow_ui
-__GMP_DECLSPEC void mpz_ui_pow_ui __GMP_PROTO ((mpz_ptr, unsigned long int, unsigned long int));
+__GMP_DECLSPEC void mpz_ui_pow_ui (mpz_ptr, unsigned long int, unsigned long int);
#define mpz_urandomb __gmpz_urandomb
-__GMP_DECLSPEC void mpz_urandomb __GMP_PROTO ((mpz_ptr, gmp_randstate_t, mp_bitcnt_t));
+__GMP_DECLSPEC void mpz_urandomb (mpz_ptr, gmp_randstate_t, mp_bitcnt_t);
#define mpz_urandomm __gmpz_urandomm
-__GMP_DECLSPEC void mpz_urandomm __GMP_PROTO ((mpz_ptr, gmp_randstate_t, mpz_srcptr));
+__GMP_DECLSPEC void mpz_urandomm (mpz_ptr, gmp_randstate_t, mpz_srcptr);
#define mpz_xor __gmpz_xor
#define mpz_eor __gmpz_xor
-__GMP_DECLSPEC void mpz_xor __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_xor (mpz_ptr, mpz_srcptr, mpz_srcptr);
/**************** Rational (i.e. Q) routines. ****************/
#define mpq_abs __gmpq_abs
#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpq_abs)
-__GMP_DECLSPEC void mpq_abs __GMP_PROTO ((mpq_ptr, mpq_srcptr));
+__GMP_DECLSPEC void mpq_abs (mpq_ptr, mpq_srcptr);
#endif
#define mpq_add __gmpq_add
-__GMP_DECLSPEC void mpq_add __GMP_PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr));
+__GMP_DECLSPEC void mpq_add (mpq_ptr, mpq_srcptr, mpq_srcptr);
#define mpq_canonicalize __gmpq_canonicalize
-__GMP_DECLSPEC void mpq_canonicalize __GMP_PROTO ((mpq_ptr));
+__GMP_DECLSPEC void mpq_canonicalize (mpq_ptr);
#define mpq_clear __gmpq_clear
-__GMP_DECLSPEC void mpq_clear __GMP_PROTO ((mpq_ptr));
+__GMP_DECLSPEC void mpq_clear (mpq_ptr);
#define mpq_clears __gmpq_clears
-__GMP_DECLSPEC void mpq_clears __GMP_PROTO ((mpq_ptr, ...));
+__GMP_DECLSPEC void mpq_clears (mpq_ptr, ...);
#define mpq_cmp __gmpq_cmp
-__GMP_DECLSPEC int mpq_cmp __GMP_PROTO ((mpq_srcptr, mpq_srcptr)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpq_cmp (mpq_srcptr, mpq_srcptr) __GMP_ATTRIBUTE_PURE;
#define _mpq_cmp_si __gmpq_cmp_si
-__GMP_DECLSPEC int _mpq_cmp_si __GMP_PROTO ((mpq_srcptr, long, unsigned long)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int _mpq_cmp_si (mpq_srcptr, long, unsigned long) __GMP_ATTRIBUTE_PURE;
#define _mpq_cmp_ui __gmpq_cmp_ui
-__GMP_DECLSPEC int _mpq_cmp_ui __GMP_PROTO ((mpq_srcptr, unsigned long int, unsigned long int)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int _mpq_cmp_ui (mpq_srcptr, unsigned long int, unsigned long int) __GMP_ATTRIBUTE_PURE;
#define mpq_div __gmpq_div
-__GMP_DECLSPEC void mpq_div __GMP_PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr));
+__GMP_DECLSPEC void mpq_div (mpq_ptr, mpq_srcptr, mpq_srcptr);
#define mpq_div_2exp __gmpq_div_2exp
-__GMP_DECLSPEC void mpq_div_2exp __GMP_PROTO ((mpq_ptr, mpq_srcptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpq_div_2exp (mpq_ptr, mpq_srcptr, mp_bitcnt_t);
#define mpq_equal __gmpq_equal
-__GMP_DECLSPEC int mpq_equal __GMP_PROTO ((mpq_srcptr, mpq_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpq_equal (mpq_srcptr, mpq_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpq_get_num __gmpq_get_num
-__GMP_DECLSPEC void mpq_get_num __GMP_PROTO ((mpz_ptr, mpq_srcptr));
+__GMP_DECLSPEC void mpq_get_num (mpz_ptr, mpq_srcptr);
#define mpq_get_den __gmpq_get_den
-__GMP_DECLSPEC void mpq_get_den __GMP_PROTO ((mpz_ptr, mpq_srcptr));
+__GMP_DECLSPEC void mpq_get_den (mpz_ptr, mpq_srcptr);
#define mpq_get_d __gmpq_get_d
-__GMP_DECLSPEC double mpq_get_d __GMP_PROTO ((mpq_srcptr)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC double mpq_get_d (mpq_srcptr) __GMP_ATTRIBUTE_PURE;
#define mpq_get_str __gmpq_get_str
-__GMP_DECLSPEC char *mpq_get_str __GMP_PROTO ((char *, int, mpq_srcptr));
+__GMP_DECLSPEC char *mpq_get_str (char *, int, mpq_srcptr);
#define mpq_init __gmpq_init
-__GMP_DECLSPEC void mpq_init __GMP_PROTO ((mpq_ptr));
+__GMP_DECLSPEC void mpq_init (mpq_ptr);
#define mpq_inits __gmpq_inits
-__GMP_DECLSPEC void mpq_inits __GMP_PROTO ((mpq_ptr, ...));
+__GMP_DECLSPEC void mpq_inits (mpq_ptr, ...);
#define mpq_inp_str __gmpq_inp_str
#ifdef _GMP_H_HAVE_FILE
-__GMP_DECLSPEC size_t mpq_inp_str __GMP_PROTO ((mpq_ptr, FILE *, int));
+__GMP_DECLSPEC size_t mpq_inp_str (mpq_ptr, FILE *, int);
#endif
#define mpq_inv __gmpq_inv
-__GMP_DECLSPEC void mpq_inv __GMP_PROTO ((mpq_ptr, mpq_srcptr));
+__GMP_DECLSPEC void mpq_inv (mpq_ptr, mpq_srcptr);
#define mpq_mul __gmpq_mul
-__GMP_DECLSPEC void mpq_mul __GMP_PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr));
+__GMP_DECLSPEC void mpq_mul (mpq_ptr, mpq_srcptr, mpq_srcptr);
#define mpq_mul_2exp __gmpq_mul_2exp
-__GMP_DECLSPEC void mpq_mul_2exp __GMP_PROTO ((mpq_ptr, mpq_srcptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpq_mul_2exp (mpq_ptr, mpq_srcptr, mp_bitcnt_t);
#define mpq_neg __gmpq_neg
#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpq_neg)
-__GMP_DECLSPEC void mpq_neg __GMP_PROTO ((mpq_ptr, mpq_srcptr));
+__GMP_DECLSPEC void mpq_neg (mpq_ptr, mpq_srcptr);
#endif
#define mpq_out_str __gmpq_out_str
#ifdef _GMP_H_HAVE_FILE
-__GMP_DECLSPEC size_t mpq_out_str __GMP_PROTO ((FILE *, int, mpq_srcptr));
+__GMP_DECLSPEC size_t mpq_out_str (FILE *, int, mpq_srcptr);
#endif
#define mpq_set __gmpq_set
-__GMP_DECLSPEC void mpq_set __GMP_PROTO ((mpq_ptr, mpq_srcptr));
+__GMP_DECLSPEC void mpq_set (mpq_ptr, mpq_srcptr);
#define mpq_set_d __gmpq_set_d
-__GMP_DECLSPEC void mpq_set_d __GMP_PROTO ((mpq_ptr, double));
+__GMP_DECLSPEC void mpq_set_d (mpq_ptr, double);
#define mpq_set_den __gmpq_set_den
-__GMP_DECLSPEC void mpq_set_den __GMP_PROTO ((mpq_ptr, mpz_srcptr));
+__GMP_DECLSPEC void mpq_set_den (mpq_ptr, mpz_srcptr);
#define mpq_set_f __gmpq_set_f
-__GMP_DECLSPEC void mpq_set_f __GMP_PROTO ((mpq_ptr, mpf_srcptr));
+__GMP_DECLSPEC void mpq_set_f (mpq_ptr, mpf_srcptr);
#define mpq_set_num __gmpq_set_num
-__GMP_DECLSPEC void mpq_set_num __GMP_PROTO ((mpq_ptr, mpz_srcptr));
+__GMP_DECLSPEC void mpq_set_num (mpq_ptr, mpz_srcptr);
#define mpq_set_si __gmpq_set_si
-__GMP_DECLSPEC void mpq_set_si __GMP_PROTO ((mpq_ptr, signed long int, unsigned long int));
+__GMP_DECLSPEC void mpq_set_si (mpq_ptr, signed long int, unsigned long int);
#define mpq_set_str __gmpq_set_str
-__GMP_DECLSPEC int mpq_set_str __GMP_PROTO ((mpq_ptr, __gmp_const char *, int));
+__GMP_DECLSPEC int mpq_set_str (mpq_ptr, const char *, int);
#define mpq_set_ui __gmpq_set_ui
-__GMP_DECLSPEC void mpq_set_ui __GMP_PROTO ((mpq_ptr, unsigned long int, unsigned long int));
+__GMP_DECLSPEC void mpq_set_ui (mpq_ptr, unsigned long int, unsigned long int);
#define mpq_set_z __gmpq_set_z
-__GMP_DECLSPEC void mpq_set_z __GMP_PROTO ((mpq_ptr, mpz_srcptr));
+__GMP_DECLSPEC void mpq_set_z (mpq_ptr, mpz_srcptr);
#define mpq_sub __gmpq_sub
-__GMP_DECLSPEC void mpq_sub __GMP_PROTO ((mpq_ptr, mpq_srcptr, mpq_srcptr));
+__GMP_DECLSPEC void mpq_sub (mpq_ptr, mpq_srcptr, mpq_srcptr);
#define mpq_swap __gmpq_swap
-__GMP_DECLSPEC void mpq_swap __GMP_PROTO ((mpq_ptr, mpq_ptr)) __GMP_NOTHROW;
+__GMP_DECLSPEC void mpq_swap (mpq_ptr, mpq_ptr) __GMP_NOTHROW;
/**************** Float (i.e. F) routines. ****************/
#define mpf_abs __gmpf_abs
-__GMP_DECLSPEC void mpf_abs __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+__GMP_DECLSPEC void mpf_abs (mpf_ptr, mpf_srcptr);
#define mpf_add __gmpf_add
-__GMP_DECLSPEC void mpf_add __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+__GMP_DECLSPEC void mpf_add (mpf_ptr, mpf_srcptr, mpf_srcptr);
#define mpf_add_ui __gmpf_add_ui
-__GMP_DECLSPEC void mpf_add_ui __GMP_PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+__GMP_DECLSPEC void mpf_add_ui (mpf_ptr, mpf_srcptr, unsigned long int);
#define mpf_ceil __gmpf_ceil
-__GMP_DECLSPEC void mpf_ceil __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+__GMP_DECLSPEC void mpf_ceil (mpf_ptr, mpf_srcptr);
#define mpf_clear __gmpf_clear
-__GMP_DECLSPEC void mpf_clear __GMP_PROTO ((mpf_ptr));
+__GMP_DECLSPEC void mpf_clear (mpf_ptr);
#define mpf_clears __gmpf_clears
-__GMP_DECLSPEC void mpf_clears __GMP_PROTO ((mpf_ptr, ...));
+__GMP_DECLSPEC void mpf_clears (mpf_ptr, ...);
#define mpf_cmp __gmpf_cmp
-__GMP_DECLSPEC int mpf_cmp __GMP_PROTO ((mpf_srcptr, mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpf_cmp (mpf_srcptr, mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpf_cmp_d __gmpf_cmp_d
-__GMP_DECLSPEC int mpf_cmp_d __GMP_PROTO ((mpf_srcptr, double)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpf_cmp_d (mpf_srcptr, double) __GMP_ATTRIBUTE_PURE;
#define mpf_cmp_si __gmpf_cmp_si
-__GMP_DECLSPEC int mpf_cmp_si __GMP_PROTO ((mpf_srcptr, signed long int)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpf_cmp_si (mpf_srcptr, signed long int) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpf_cmp_ui __gmpf_cmp_ui
-__GMP_DECLSPEC int mpf_cmp_ui __GMP_PROTO ((mpf_srcptr, unsigned long int)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpf_cmp_ui (mpf_srcptr, unsigned long int) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpf_div __gmpf_div
-__GMP_DECLSPEC void mpf_div __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+__GMP_DECLSPEC void mpf_div (mpf_ptr, mpf_srcptr, mpf_srcptr);
#define mpf_div_2exp __gmpf_div_2exp
-__GMP_DECLSPEC void mpf_div_2exp __GMP_PROTO ((mpf_ptr, mpf_srcptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpf_div_2exp (mpf_ptr, mpf_srcptr, mp_bitcnt_t);
#define mpf_div_ui __gmpf_div_ui
-__GMP_DECLSPEC void mpf_div_ui __GMP_PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+__GMP_DECLSPEC void mpf_div_ui (mpf_ptr, mpf_srcptr, unsigned long int);
#define mpf_dump __gmpf_dump
-__GMP_DECLSPEC void mpf_dump __GMP_PROTO ((mpf_srcptr));
+__GMP_DECLSPEC void mpf_dump (mpf_srcptr);
#define mpf_eq __gmpf_eq
-__GMP_DECLSPEC int mpf_eq __GMP_PROTO ((mpf_srcptr, mpf_srcptr, mp_bitcnt_t)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpf_eq (mpf_srcptr, mpf_srcptr, mp_bitcnt_t) __GMP_ATTRIBUTE_PURE;
#define mpf_fits_sint_p __gmpf_fits_sint_p
-__GMP_DECLSPEC int mpf_fits_sint_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpf_fits_sint_p (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpf_fits_slong_p __gmpf_fits_slong_p
-__GMP_DECLSPEC int mpf_fits_slong_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpf_fits_slong_p (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpf_fits_sshort_p __gmpf_fits_sshort_p
-__GMP_DECLSPEC int mpf_fits_sshort_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpf_fits_sshort_p (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpf_fits_uint_p __gmpf_fits_uint_p
-__GMP_DECLSPEC int mpf_fits_uint_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpf_fits_uint_p (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpf_fits_ulong_p __gmpf_fits_ulong_p
-__GMP_DECLSPEC int mpf_fits_ulong_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpf_fits_ulong_p (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpf_fits_ushort_p __gmpf_fits_ushort_p
-__GMP_DECLSPEC int mpf_fits_ushort_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpf_fits_ushort_p (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpf_floor __gmpf_floor
-__GMP_DECLSPEC void mpf_floor __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+__GMP_DECLSPEC void mpf_floor (mpf_ptr, mpf_srcptr);
#define mpf_get_d __gmpf_get_d
-__GMP_DECLSPEC double mpf_get_d __GMP_PROTO ((mpf_srcptr)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC double mpf_get_d (mpf_srcptr) __GMP_ATTRIBUTE_PURE;
#define mpf_get_d_2exp __gmpf_get_d_2exp
-__GMP_DECLSPEC double mpf_get_d_2exp __GMP_PROTO ((signed long int *, mpf_srcptr));
+__GMP_DECLSPEC double mpf_get_d_2exp (signed long int *, mpf_srcptr);
#define mpf_get_default_prec __gmpf_get_default_prec
-__GMP_DECLSPEC mp_bitcnt_t mpf_get_default_prec __GMP_PROTO ((void)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_bitcnt_t mpf_get_default_prec (void) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpf_get_prec __gmpf_get_prec
-__GMP_DECLSPEC mp_bitcnt_t mpf_get_prec __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_bitcnt_t mpf_get_prec (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpf_get_si __gmpf_get_si
-__GMP_DECLSPEC long mpf_get_si __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC long mpf_get_si (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpf_get_str __gmpf_get_str
-__GMP_DECLSPEC char *mpf_get_str __GMP_PROTO ((char *, mp_exp_t *, int, size_t, mpf_srcptr));
+__GMP_DECLSPEC char *mpf_get_str (char *, mp_exp_t *, int, size_t, mpf_srcptr);
#define mpf_get_ui __gmpf_get_ui
-__GMP_DECLSPEC unsigned long mpf_get_ui __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC unsigned long mpf_get_ui (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpf_init __gmpf_init
-__GMP_DECLSPEC void mpf_init __GMP_PROTO ((mpf_ptr));
+__GMP_DECLSPEC void mpf_init (mpf_ptr);
#define mpf_init2 __gmpf_init2
-__GMP_DECLSPEC void mpf_init2 __GMP_PROTO ((mpf_ptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpf_init2 (mpf_ptr, mp_bitcnt_t);
#define mpf_inits __gmpf_inits
-__GMP_DECLSPEC void mpf_inits __GMP_PROTO ((mpf_ptr, ...));
+__GMP_DECLSPEC void mpf_inits (mpf_ptr, ...);
#define mpf_init_set __gmpf_init_set
-__GMP_DECLSPEC void mpf_init_set __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+__GMP_DECLSPEC void mpf_init_set (mpf_ptr, mpf_srcptr);
#define mpf_init_set_d __gmpf_init_set_d
-__GMP_DECLSPEC void mpf_init_set_d __GMP_PROTO ((mpf_ptr, double));
+__GMP_DECLSPEC void mpf_init_set_d (mpf_ptr, double);
#define mpf_init_set_si __gmpf_init_set_si
-__GMP_DECLSPEC void mpf_init_set_si __GMP_PROTO ((mpf_ptr, signed long int));
+__GMP_DECLSPEC void mpf_init_set_si (mpf_ptr, signed long int);
#define mpf_init_set_str __gmpf_init_set_str
-__GMP_DECLSPEC int mpf_init_set_str __GMP_PROTO ((mpf_ptr, __gmp_const char *, int));
+__GMP_DECLSPEC int mpf_init_set_str (mpf_ptr, const char *, int);
#define mpf_init_set_ui __gmpf_init_set_ui
-__GMP_DECLSPEC void mpf_init_set_ui __GMP_PROTO ((mpf_ptr, unsigned long int));
+__GMP_DECLSPEC void mpf_init_set_ui (mpf_ptr, unsigned long int);
#define mpf_inp_str __gmpf_inp_str
#ifdef _GMP_H_HAVE_FILE
-__GMP_DECLSPEC size_t mpf_inp_str __GMP_PROTO ((mpf_ptr, FILE *, int));
+__GMP_DECLSPEC size_t mpf_inp_str (mpf_ptr, FILE *, int);
#endif
#define mpf_integer_p __gmpf_integer_p
-__GMP_DECLSPEC int mpf_integer_p __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpf_integer_p (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpf_mul __gmpf_mul
-__GMP_DECLSPEC void mpf_mul __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+__GMP_DECLSPEC void mpf_mul (mpf_ptr, mpf_srcptr, mpf_srcptr);
#define mpf_mul_2exp __gmpf_mul_2exp
-__GMP_DECLSPEC void mpf_mul_2exp __GMP_PROTO ((mpf_ptr, mpf_srcptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpf_mul_2exp (mpf_ptr, mpf_srcptr, mp_bitcnt_t);
#define mpf_mul_ui __gmpf_mul_ui
-__GMP_DECLSPEC void mpf_mul_ui __GMP_PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+__GMP_DECLSPEC void mpf_mul_ui (mpf_ptr, mpf_srcptr, unsigned long int);
#define mpf_neg __gmpf_neg
-__GMP_DECLSPEC void mpf_neg __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+__GMP_DECLSPEC void mpf_neg (mpf_ptr, mpf_srcptr);
#define mpf_out_str __gmpf_out_str
#ifdef _GMP_H_HAVE_FILE
-__GMP_DECLSPEC size_t mpf_out_str __GMP_PROTO ((FILE *, int, size_t, mpf_srcptr));
+__GMP_DECLSPEC size_t mpf_out_str (FILE *, int, size_t, mpf_srcptr);
#endif
#define mpf_pow_ui __gmpf_pow_ui
-__GMP_DECLSPEC void mpf_pow_ui __GMP_PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+__GMP_DECLSPEC void mpf_pow_ui (mpf_ptr, mpf_srcptr, unsigned long int);
#define mpf_random2 __gmpf_random2
-__GMP_DECLSPEC void mpf_random2 __GMP_PROTO ((mpf_ptr, mp_size_t, mp_exp_t));
+__GMP_DECLSPEC void mpf_random2 (mpf_ptr, mp_size_t, mp_exp_t);
#define mpf_reldiff __gmpf_reldiff
-__GMP_DECLSPEC void mpf_reldiff __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+__GMP_DECLSPEC void mpf_reldiff (mpf_ptr, mpf_srcptr, mpf_srcptr);
#define mpf_set __gmpf_set
-__GMP_DECLSPEC void mpf_set __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+__GMP_DECLSPEC void mpf_set (mpf_ptr, mpf_srcptr);
#define mpf_set_d __gmpf_set_d
-__GMP_DECLSPEC void mpf_set_d __GMP_PROTO ((mpf_ptr, double));
+__GMP_DECLSPEC void mpf_set_d (mpf_ptr, double);
#define mpf_set_default_prec __gmpf_set_default_prec
-__GMP_DECLSPEC void mpf_set_default_prec __GMP_PROTO ((mp_bitcnt_t)) __GMP_NOTHROW;
+__GMP_DECLSPEC void mpf_set_default_prec (mp_bitcnt_t) __GMP_NOTHROW;
#define mpf_set_prec __gmpf_set_prec
-__GMP_DECLSPEC void mpf_set_prec __GMP_PROTO ((mpf_ptr, mp_bitcnt_t));
+__GMP_DECLSPEC void mpf_set_prec (mpf_ptr, mp_bitcnt_t);
#define mpf_set_prec_raw __gmpf_set_prec_raw
-__GMP_DECLSPEC void mpf_set_prec_raw __GMP_PROTO ((mpf_ptr, mp_bitcnt_t)) __GMP_NOTHROW;
+__GMP_DECLSPEC void mpf_set_prec_raw (mpf_ptr, mp_bitcnt_t) __GMP_NOTHROW;
#define mpf_set_q __gmpf_set_q
-__GMP_DECLSPEC void mpf_set_q __GMP_PROTO ((mpf_ptr, mpq_srcptr));
+__GMP_DECLSPEC void mpf_set_q (mpf_ptr, mpq_srcptr);
#define mpf_set_si __gmpf_set_si
-__GMP_DECLSPEC void mpf_set_si __GMP_PROTO ((mpf_ptr, signed long int));
+__GMP_DECLSPEC void mpf_set_si (mpf_ptr, signed long int);
#define mpf_set_str __gmpf_set_str
-__GMP_DECLSPEC int mpf_set_str __GMP_PROTO ((mpf_ptr, __gmp_const char *, int));
+__GMP_DECLSPEC int mpf_set_str (mpf_ptr, const char *, int);
#define mpf_set_ui __gmpf_set_ui
-__GMP_DECLSPEC void mpf_set_ui __GMP_PROTO ((mpf_ptr, unsigned long int));
+__GMP_DECLSPEC void mpf_set_ui (mpf_ptr, unsigned long int);
#define mpf_set_z __gmpf_set_z
-__GMP_DECLSPEC void mpf_set_z __GMP_PROTO ((mpf_ptr, mpz_srcptr));
+__GMP_DECLSPEC void mpf_set_z (mpf_ptr, mpz_srcptr);
#define mpf_size __gmpf_size
-__GMP_DECLSPEC size_t mpf_size __GMP_PROTO ((mpf_srcptr)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC size_t mpf_size (mpf_srcptr) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpf_sqrt __gmpf_sqrt
-__GMP_DECLSPEC void mpf_sqrt __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+__GMP_DECLSPEC void mpf_sqrt (mpf_ptr, mpf_srcptr);
#define mpf_sqrt_ui __gmpf_sqrt_ui
-__GMP_DECLSPEC void mpf_sqrt_ui __GMP_PROTO ((mpf_ptr, unsigned long int));
+__GMP_DECLSPEC void mpf_sqrt_ui (mpf_ptr, unsigned long int);
#define mpf_sub __gmpf_sub
-__GMP_DECLSPEC void mpf_sub __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+__GMP_DECLSPEC void mpf_sub (mpf_ptr, mpf_srcptr, mpf_srcptr);
#define mpf_sub_ui __gmpf_sub_ui
-__GMP_DECLSPEC void mpf_sub_ui __GMP_PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+__GMP_DECLSPEC void mpf_sub_ui (mpf_ptr, mpf_srcptr, unsigned long int);
#define mpf_swap __gmpf_swap
-__GMP_DECLSPEC void mpf_swap __GMP_PROTO ((mpf_ptr, mpf_ptr)) __GMP_NOTHROW;
+__GMP_DECLSPEC void mpf_swap (mpf_ptr, mpf_ptr) __GMP_NOTHROW;
#define mpf_trunc __gmpf_trunc
-__GMP_DECLSPEC void mpf_trunc __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+__GMP_DECLSPEC void mpf_trunc (mpf_ptr, mpf_srcptr);
#define mpf_ui_div __gmpf_ui_div
-__GMP_DECLSPEC void mpf_ui_div __GMP_PROTO ((mpf_ptr, unsigned long int, mpf_srcptr));
+__GMP_DECLSPEC void mpf_ui_div (mpf_ptr, unsigned long int, mpf_srcptr);
#define mpf_ui_sub __gmpf_ui_sub
-__GMP_DECLSPEC void mpf_ui_sub __GMP_PROTO ((mpf_ptr, unsigned long int, mpf_srcptr));
+__GMP_DECLSPEC void mpf_ui_sub (mpf_ptr, unsigned long int, mpf_srcptr);
#define mpf_urandomb __gmpf_urandomb
-__GMP_DECLSPEC void mpf_urandomb __GMP_PROTO ((mpf_t, gmp_randstate_t, mp_bitcnt_t));
+__GMP_DECLSPEC void mpf_urandomb (mpf_t, gmp_randstate_t, mp_bitcnt_t);
/************ Low level positive-integer (i.e. N) routines. ************/
#define mpn_add __MPN(add)
#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_add)
-__GMP_DECLSPEC mp_limb_t mpn_add __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr,mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_add (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
#endif
#define mpn_add_1 __MPN(add_1)
#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_add_1)
-__GMP_DECLSPEC mp_limb_t mpn_add_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)) __GMP_NOTHROW;
+__GMP_DECLSPEC mp_limb_t mpn_add_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t) __GMP_NOTHROW;
#endif
#define mpn_add_n __MPN(add_n)
-__GMP_DECLSPEC mp_limb_t mpn_add_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_add_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
#define mpn_addmul_1 __MPN(addmul_1)
-__GMP_DECLSPEC mp_limb_t mpn_addmul_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_addmul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
#define mpn_cmp __MPN(cmp)
#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_cmp)
-__GMP_DECLSPEC int mpn_cmp __GMP_PROTO ((mp_srcptr, mp_srcptr, mp_size_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpn_cmp (mp_srcptr, mp_srcptr, mp_size_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#endif
#define mpn_divexact_by3(dst,src,size) \
mpn_divexact_by3c (dst, src, size, __GMP_CAST (mp_limb_t, 0))
#define mpn_divexact_by3c __MPN(divexact_by3c)
-__GMP_DECLSPEC mp_limb_t mpn_divexact_by3c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_divexact_by3c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
#define mpn_divmod_1(qp,np,nsize,dlimb) \
mpn_divrem_1 (qp, __GMP_CAST (mp_size_t, 0), np, nsize, dlimb)
#define mpn_divrem __MPN(divrem)
-__GMP_DECLSPEC mp_limb_t mpn_divrem __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_divrem (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr, mp_size_t);
#define mpn_divrem_1 __MPN(divrem_1)
-__GMP_DECLSPEC mp_limb_t mpn_divrem_1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_divrem_1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
#define mpn_divrem_2 __MPN(divrem_2)
-__GMP_DECLSPEC mp_limb_t mpn_divrem_2 __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr));
+__GMP_DECLSPEC mp_limb_t mpn_divrem_2 (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr);
+
+#define mpn_div_qr_2 __MPN(div_qr_2)
+__GMP_DECLSPEC mp_limb_t mpn_div_qr_2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
#define mpn_gcd __MPN(gcd)
-__GMP_DECLSPEC mp_size_t mpn_gcd __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_gcd (mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
#define mpn_gcd_1 __MPN(gcd_1)
-__GMP_DECLSPEC mp_limb_t mpn_gcd_1 __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_limb_t mpn_gcd_1 (mp_srcptr, mp_size_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
#define mpn_gcdext_1 __MPN(gcdext_1)
-__GMP_DECLSPEC mp_limb_t mpn_gcdext_1 __GMP_PROTO ((mp_limb_signed_t *, mp_limb_signed_t *, mp_limb_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_gcdext_1 (mp_limb_signed_t *, mp_limb_signed_t *, mp_limb_t, mp_limb_t);
#define mpn_gcdext __MPN(gcdext)
-__GMP_DECLSPEC mp_size_t mpn_gcdext __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_gcdext (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
#define mpn_get_str __MPN(get_str)
-__GMP_DECLSPEC size_t mpn_get_str __GMP_PROTO ((unsigned char *, int, mp_ptr, mp_size_t));
+__GMP_DECLSPEC size_t mpn_get_str (unsigned char *, int, mp_ptr, mp_size_t);
#define mpn_hamdist __MPN(hamdist)
-__GMP_DECLSPEC mp_bitcnt_t mpn_hamdist __GMP_PROTO ((mp_srcptr, mp_srcptr, mp_size_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_bitcnt_t mpn_hamdist (mp_srcptr, mp_srcptr, mp_size_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpn_lshift __MPN(lshift)
-__GMP_DECLSPEC mp_limb_t mpn_lshift __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int));
+__GMP_DECLSPEC mp_limb_t mpn_lshift (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
#define mpn_mod_1 __MPN(mod_1)
-__GMP_DECLSPEC mp_limb_t mpn_mod_1 __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_limb_t mpn_mod_1 (mp_srcptr, mp_size_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
#define mpn_mul __MPN(mul)
-__GMP_DECLSPEC mp_limb_t mpn_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
#define mpn_mul_1 __MPN(mul_1)
-__GMP_DECLSPEC mp_limb_t mpn_mul_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_mul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
#define mpn_mul_n __MPN(mul_n)
-__GMP_DECLSPEC void mpn_mul_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_mul_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
#define mpn_sqr __MPN(sqr)
-__GMP_DECLSPEC void mpn_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_sqr (mp_ptr, mp_srcptr, mp_size_t);
#define mpn_neg __MPN(neg)
#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_neg)
-__GMP_DECLSPEC mp_limb_t mpn_neg __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_neg (mp_ptr, mp_srcptr, mp_size_t);
#endif
#define mpn_com __MPN(com)
#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_com)
-__GMP_DECLSPEC void mpn_com __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_com (mp_ptr, mp_srcptr, mp_size_t);
#endif
#define mpn_perfect_square_p __MPN(perfect_square_p)
-__GMP_DECLSPEC int mpn_perfect_square_p __GMP_PROTO ((mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpn_perfect_square_p (mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
#define mpn_perfect_power_p __MPN(perfect_power_p)
-__GMP_DECLSPEC int mpn_perfect_power_p __GMP_PROTO ((mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpn_perfect_power_p (mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
#define mpn_popcount __MPN(popcount)
-__GMP_DECLSPEC mp_bitcnt_t mpn_popcount __GMP_PROTO ((mp_srcptr, mp_size_t)) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_bitcnt_t mpn_popcount (mp_srcptr, mp_size_t) __GMP_NOTHROW __GMP_ATTRIBUTE_PURE;
#define mpn_pow_1 __MPN(pow_1)
-__GMP_DECLSPEC mp_size_t mpn_pow_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr));
+__GMP_DECLSPEC mp_size_t mpn_pow_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
/* undocumented now, but retained here for upward compatibility */
#define mpn_preinv_mod_1 __MPN(preinv_mod_1)
-__GMP_DECLSPEC mp_limb_t mpn_preinv_mod_1 __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_limb_t mpn_preinv_mod_1 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
#define mpn_random __MPN(random)
-__GMP_DECLSPEC void mpn_random __GMP_PROTO ((mp_ptr, mp_size_t));
+__GMP_DECLSPEC void mpn_random (mp_ptr, mp_size_t);
#define mpn_random2 __MPN(random2)
-__GMP_DECLSPEC void mpn_random2 __GMP_PROTO ((mp_ptr, mp_size_t));
+__GMP_DECLSPEC void mpn_random2 (mp_ptr, mp_size_t);
#define mpn_rshift __MPN(rshift)
-__GMP_DECLSPEC mp_limb_t mpn_rshift __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int));
+__GMP_DECLSPEC mp_limb_t mpn_rshift (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
#define mpn_scan0 __MPN(scan0)
-__GMP_DECLSPEC mp_bitcnt_t mpn_scan0 __GMP_PROTO ((mp_srcptr, mp_bitcnt_t)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_bitcnt_t mpn_scan0 (mp_srcptr, mp_bitcnt_t) __GMP_ATTRIBUTE_PURE;
#define mpn_scan1 __MPN(scan1)
-__GMP_DECLSPEC mp_bitcnt_t mpn_scan1 __GMP_PROTO ((mp_srcptr, mp_bitcnt_t)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_bitcnt_t mpn_scan1 (mp_srcptr, mp_bitcnt_t) __GMP_ATTRIBUTE_PURE;
#define mpn_set_str __MPN(set_str)
-__GMP_DECLSPEC mp_size_t mpn_set_str __GMP_PROTO ((mp_ptr, __gmp_const unsigned char *, size_t, int));
+__GMP_DECLSPEC mp_size_t mpn_set_str (mp_ptr, const unsigned char *, size_t, int);
#define mpn_sqrtrem __MPN(sqrtrem)
-__GMP_DECLSPEC mp_size_t mpn_sqrtrem __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_sqrtrem (mp_ptr, mp_ptr, mp_srcptr, mp_size_t);
#define mpn_sub __MPN(sub)
#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_sub)
-__GMP_DECLSPEC mp_limb_t mpn_sub __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr,mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_sub (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
#endif
#define mpn_sub_1 __MPN(sub_1)
#if __GMP_INLINE_PROTOTYPES || defined (__GMP_FORCE_mpn_sub_1)
-__GMP_DECLSPEC mp_limb_t mpn_sub_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)) __GMP_NOTHROW;
+__GMP_DECLSPEC mp_limb_t mpn_sub_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t) __GMP_NOTHROW;
#endif
#define mpn_sub_n __MPN(sub_n)
-__GMP_DECLSPEC mp_limb_t mpn_sub_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_sub_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
#define mpn_submul_1 __MPN(submul_1)
-__GMP_DECLSPEC mp_limb_t mpn_submul_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_submul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
#define mpn_tdiv_qr __MPN(tdiv_qr)
-__GMP_DECLSPEC void mpn_tdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_tdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
#define mpn_and_n __MPN(and_n)
-__GMP_DECLSPEC void mpn_and_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_and_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
#define mpn_andn_n __MPN(andn_n)
-__GMP_DECLSPEC void mpn_andn_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_andn_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
#define mpn_nand_n __MPN(nand_n)
-__GMP_DECLSPEC void mpn_nand_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_nand_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
#define mpn_ior_n __MPN(ior_n)
-__GMP_DECLSPEC void mpn_ior_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_ior_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
#define mpn_iorn_n __MPN(iorn_n)
-__GMP_DECLSPEC void mpn_iorn_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_iorn_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
#define mpn_nior_n __MPN(nior_n)
-__GMP_DECLSPEC void mpn_nior_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_nior_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
#define mpn_xor_n __MPN(xor_n)
-__GMP_DECLSPEC void mpn_xor_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_xor_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
#define mpn_xnor_n __MPN(xnor_n)
-__GMP_DECLSPEC void mpn_xnor_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_xnor_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
#define mpn_copyi __MPN(copyi)
-__GMP_DECLSPEC void mpn_copyi __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_copyi (mp_ptr, mp_srcptr, mp_size_t);
#define mpn_copyd __MPN(copyd)
-__GMP_DECLSPEC void mpn_copyd __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_copyd (mp_ptr, mp_srcptr, mp_size_t);
#define mpn_zero __MPN(zero)
-__GMP_DECLSPEC void mpn_zero __GMP_PROTO ((mp_ptr, mp_size_t));
+__GMP_DECLSPEC void mpn_zero (mp_ptr, mp_size_t);
/**************** mpz inlines ****************/
#define __GMP_CFLAGS "@CFLAGS@"
/* Major version number is the value of __GNU_MP__ too, above and in mp.h. */
-#define __GNU_MP_VERSION 5
-#define __GNU_MP_VERSION_MINOR 0
-#define __GNU_MP_VERSION_PATCHLEVEL 5
+#define __GNU_MP_VERSION 5
+#define __GNU_MP_VERSION_MINOR 1
+#define __GNU_MP_VERSION_PATCHLEVEL 3
#define __GNU_MP_RELEASE (__GNU_MP_VERSION * 10000 + __GNU_MP_VERSION_MINOR * 100 + __GNU_MP_VERSION_PATCHLEVEL)
#define __GMP_H__
BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
-2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
-Inc.
+2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
/* For fat.h and other fat binary stuff.
No need for __GMP_ATTRIBUTE_PURE or __GMP_NOTHROW, since functions
- declared this way are only used to set function pointers in __gmp_cpuvec,
+ declared this way are only used to set function pointers in __gmpn_cpuvec,
they're not called directly. */
#define DECL_add_n(name) \
- __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t))
+ __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
+#define DECL_addlsh1_n(name) \
+ DECL_add_n (name)
+#define DECL_addlsh2_n(name) \
+ DECL_add_n (name)
#define DECL_addmul_1(name) \
- __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
+ __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
+#define DECL_addmul_2(name) \
+ __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)
+#define DECL_bdiv_dbm1c(name) \
+ __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)
+#define DECL_com(name) \
+ __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t)
#define DECL_copyd(name) \
- __GMP_DECLSPEC void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t))
+ __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t)
#define DECL_copyi(name) \
DECL_copyd (name)
#define DECL_divexact_1(name) \
- __GMP_DECLSPEC void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
+ __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
#define DECL_divexact_by3c(name) \
- __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t))
+ __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
#define DECL_divrem_1(name) \
- __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t))
+ __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t)
#define DECL_gcd_1(name) \
- __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t))
+ __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t)
#define DECL_lshift(name) \
- __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned))
+ __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_srcptr, mp_size_t, unsigned)
+#define DECL_lshiftc(name) \
+ DECL_lshift (name)
#define DECL_mod_1(name) \
- __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t))
+ __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t)
+#define DECL_mod_1_1p(name) \
+ __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [])
+#define DECL_mod_1_1p_cps(name) \
+ __GMP_DECLSPEC void name (mp_limb_t cps[], mp_limb_t b)
+#define DECL_mod_1s_2p(name) \
+ DECL_mod_1_1p (name)
+#define DECL_mod_1s_2p_cps(name) \
+ DECL_mod_1_1p_cps (name)
+#define DECL_mod_1s_4p(name) \
+ DECL_mod_1_1p (name)
+#define DECL_mod_1s_4p_cps(name) \
+ DECL_mod_1_1p_cps (name)
#define DECL_mod_34lsub1(name) \
- __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t))
+ __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t)
#define DECL_modexact_1c_odd(name) \
- __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t))
+ __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)
#define DECL_mul_1(name) \
DECL_addmul_1 (name)
#define DECL_mul_basecase(name) \
- __GMP_DECLSPEC void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t))
+ __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)
+#define DECL_mullo_basecase(name) \
+ __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)
#define DECL_preinv_divrem_1(name) \
- __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int))
+ __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int)
#define DECL_preinv_mod_1(name) \
- __GMP_DECLSPEC mp_limb_t name __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t))
+ __GMP_DECLSPEC mp_limb_t name (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)
+#define DECL_redc_1(name) \
+ __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t)
+#define DECL_redc_2(name) \
+ __GMP_DECLSPEC mp_limb_t name (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr)
#define DECL_rshift(name) \
DECL_lshift (name)
#define DECL_sqr_basecase(name) \
- __GMP_DECLSPEC void name __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t))
+ __GMP_DECLSPEC void name (mp_ptr, mp_srcptr, mp_size_t)
#define DECL_sub_n(name) \
DECL_add_n (name)
+#define DECL_sublsh1_n(name) \
+ DECL_add_n (name)
#define DECL_submul_1(name) \
DECL_addmul_1 (name)
-#if ! __GMP_WITHIN_CONFIGURE
+#if ! defined (__GMP_WITHIN_CONFIGURE)
#include "config.h"
#include "gmp-mparam.h"
#include "fib_table.h"
+#include "fac_table.h"
#include "mp_bases.h"
#if WANT_FAT_BINARY
#include "fat.h"
typedef struct {mp_limb_t inv21, inv32, inv53;} gmp_pi2_t;
-/* const and signed must match __gmp_const and __gmp_signed, so follow the
- decision made for those in gmp.h. */
-#if ! __GMP_HAVE_CONST
-#define const /* empty */
-#define signed /* empty */
-#endif
-
/* "const" basically means a function does nothing but examine its arguments
and give a return value, it doesn't read or write any memory (neither
global nor pointed to by arguments), and has no other side-effects. This
#endif
#if ! HAVE_MEMSET
-#define memset(p, c, n) \
- do { \
- ASSERT ((n) >= 0); \
- char *__memset__p = (p); \
- int __i; \
- for (__i = 0; __i < (n); __i++) \
- __memset__p[__i] = (c); \
+#define memset(p, c, n) \
+ do { \
+ ASSERT ((n) >= 0); \
+ char *__memset__p = (p); \
+ int __i; \
+ for (__i = 0; __i < (n); __i++) \
+ __memset__p[__i] = (c); \
} while (0)
#endif
/* Usage: TMP_DECL;
- TMP_MARK;
- ptr = TMP_ALLOC (bytes);
- TMP_FREE;
+ TMP_MARK;
+ ptr = TMP_ALLOC (bytes);
+ TMP_FREE;
Small allocations should use TMP_SALLOC, big allocations should use
TMP_BALLOC. Allocations that might be small or big should use TMP_ALLOC.
struct tmp_reentrant_t *next;
size_t size; /* bytes, including header */
};
-__GMP_DECLSPEC void *__gmp_tmp_reentrant_alloc __GMP_PROTO ((struct tmp_reentrant_t **, size_t)) ATTRIBUTE_MALLOC;
-__GMP_DECLSPEC void __gmp_tmp_reentrant_free __GMP_PROTO ((struct tmp_reentrant_t *));
+__GMP_DECLSPEC void *__gmp_tmp_reentrant_alloc (struct tmp_reentrant_t **, size_t) ATTRIBUTE_MALLOC;
+__GMP_DECLSPEC void __gmp_tmp_reentrant_free (struct tmp_reentrant_t *);
#endif
#if WANT_TMP_ALLOCA
#define TMP_ALLOC(n) \
(LIKELY ((n) < 65536) ? TMP_SALLOC(n) : TMP_BALLOC(n))
#define TMP_SFREE
-#define TMP_FREE \
- do { \
- if (UNLIKELY (__tmp_marker != 0)) __gmp_tmp_reentrant_free (__tmp_marker); \
+#define TMP_FREE \
+ do { \
+ if (UNLIKELY (__tmp_marker != 0)) \
+ __gmp_tmp_reentrant_free (__tmp_marker); \
} while (0)
#endif
struct tmp_stack *which_chunk;
void *alloc_point;
};
-__GMP_DECLSPEC void *__gmp_tmp_alloc __GMP_PROTO ((unsigned long)) ATTRIBUTE_MALLOC;
-__GMP_DECLSPEC void __gmp_tmp_mark __GMP_PROTO ((struct tmp_marker *));
-__GMP_DECLSPEC void __gmp_tmp_free __GMP_PROTO ((struct tmp_marker *));
+__GMP_DECLSPEC void *__gmp_tmp_alloc (unsigned long) ATTRIBUTE_MALLOC;
+__GMP_DECLSPEC void __gmp_tmp_mark (struct tmp_marker *);
+__GMP_DECLSPEC void __gmp_tmp_free (struct tmp_marker *);
#define TMP_SDECL TMP_DECL
#define TMP_DECL struct tmp_marker __tmp_marker
#define TMP_SMARK TMP_MARK
char *block;
size_t size;
};
-__GMP_DECLSPEC void __gmp_tmp_debug_mark __GMP_PROTO ((const char *, int, struct tmp_debug_t **,
- struct tmp_debug_t *,
- const char *, const char *));
-__GMP_DECLSPEC void *__gmp_tmp_debug_alloc __GMP_PROTO ((const char *, int, int,
- struct tmp_debug_t **, const char *,
- size_t)) ATTRIBUTE_MALLOC;
-__GMP_DECLSPEC void __gmp_tmp_debug_free __GMP_PROTO ((const char *, int, int,
- struct tmp_debug_t **,
- const char *, const char *));
+__GMP_DECLSPEC void __gmp_tmp_debug_mark (const char *, int, struct tmp_debug_t **,
+ struct tmp_debug_t *,
+ const char *, const char *);
+__GMP_DECLSPEC void *__gmp_tmp_debug_alloc (const char *, int, int,
+ struct tmp_debug_t **, const char *,
+ size_t) ATTRIBUTE_MALLOC;
+__GMP_DECLSPEC void __gmp_tmp_debug_free (const char *, int, int,
+ struct tmp_debug_t **,
+ const char *, const char *);
#define TMP_SDECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
#define TMP_DECL TMP_DECL_NAME(__tmp_xmarker, "__tmp_marker")
#define TMP_SMARK TMP_MARK_NAME(__tmp_xmarker, "__tmp_marker")
warning from the compiler if TMP_FREE is used without a TMP_MARK.
__tmp_marker_inscope does the same for TMP_ALLOC. Runtime tests pick
these things up too. */
-#define TMP_DECL_NAME(marker, marker_name) \
- int marker; \
- int __tmp_marker_inscope; \
- const char *__tmp_marker_name = marker_name; \
- struct tmp_debug_t __tmp_marker_struct; \
- /* don't demand NULL, just cast a zero */ \
+#define TMP_DECL_NAME(marker, marker_name) \
+ int marker; \
+ int __tmp_marker_inscope; \
+ const char *__tmp_marker_name = marker_name; \
+ struct tmp_debug_t __tmp_marker_struct; \
+ /* don't demand NULL, just cast a zero */ \
struct tmp_debug_t *__tmp_marker = (struct tmp_debug_t *) 0
-#define TMP_MARK_NAME(marker, marker_name) \
- do { \
- marker = 1; \
- __tmp_marker_inscope = 1; \
- __gmp_tmp_debug_mark (ASSERT_FILE, ASSERT_LINE, \
- &__tmp_marker, &__tmp_marker_struct, \
- __tmp_marker_name, marker_name); \
+#define TMP_MARK_NAME(marker, marker_name) \
+ do { \
+ marker = 1; \
+ __tmp_marker_inscope = 1; \
+ __gmp_tmp_debug_mark (ASSERT_FILE, ASSERT_LINE, \
+ &__tmp_marker, &__tmp_marker_struct, \
+ __tmp_marker_name, marker_name); \
} while (0)
#define TMP_SALLOC(n) TMP_ALLOC(n)
#define TMP_BALLOC(n) TMP_ALLOC(n)
-#define TMP_ALLOC(size) \
- __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE, \
- __tmp_marker_inscope, \
- &__tmp_marker, __tmp_marker_name, size)
-#define TMP_FREE_NAME(marker, marker_name) \
- do { \
- __gmp_tmp_debug_free (ASSERT_FILE, ASSERT_LINE, \
- marker, &__tmp_marker, \
- __tmp_marker_name, marker_name); \
+#define TMP_ALLOC(size) \
+ __gmp_tmp_debug_alloc (ASSERT_FILE, ASSERT_LINE, \
+ __tmp_marker_inscope, \
+ &__tmp_marker, __tmp_marker_name, size)
+#define TMP_FREE_NAME(marker, marker_name) \
+ do { \
+ __gmp_tmp_debug_free (ASSERT_FILE, ASSERT_LINE, \
+ marker, &__tmp_marker, \
+ __tmp_marker_name, marker_name); \
} while (0)
#endif /* WANT_TMP_DEBUG */
involves copying a chunk of stack (various RISCs), or a call to a stack
bounds check (mingw). In any case, when debugging keep separate blocks
so a redzoning malloc debugger can protect each individually. */
-#define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize) \
- do { \
- if (WANT_TMP_DEBUG) \
- { \
- (xp) = TMP_ALLOC_LIMBS (xsize); \
- (yp) = TMP_ALLOC_LIMBS (ysize); \
- } \
- else \
- { \
- (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize)); \
- (yp) = (xp) + (xsize); \
- } \
+#define TMP_ALLOC_LIMBS_2(xp,xsize, yp,ysize) \
+ do { \
+ if (WANT_TMP_DEBUG) \
+ { \
+ (xp) = TMP_ALLOC_LIMBS (xsize); \
+ (yp) = TMP_ALLOC_LIMBS (ysize); \
+ } \
+ else \
+ { \
+ (xp) = TMP_ALLOC_LIMBS ((xsize) + (ysize)); \
+ (yp) = (xp) + (xsize); \
+ } \
} while (0)
#define SIZ(x) ((x)->_mp_size)
#define ABSIZ(x) ABS (SIZ (x))
#define PTR(x) ((x)->_mp_d)
-#define LIMBS(x) ((x)->_mp_d)
#define EXP(x) ((x)->_mp_exp)
#define PREC(x) ((x)->_mp_prec)
#define ALLOC(x) ((x)->_mp_alloc)
+#define NUM(x) mpq_numref(x)
+#define DEN(x) mpq_denref(x)
/* n-1 inverts any low zeros and the lowest one bit. If n&(n-1) leaves zero
then that lowest one bit must have been the only bit set. n==0 will
return true though, so avoid that. */
#define POW2_P(n) (((n) & ((n) - 1)) == 0)
+/* This is intended for constant THRESHOLDs only, where the compiler
+ can completely fold the result. */
+#define LOG2C(n) \
+ (((n) >= 0x1) + ((n) >= 0x2) + ((n) >= 0x4) + ((n) >= 0x8) + \
+ ((n) >= 0x10) + ((n) >= 0x20) + ((n) >= 0x40) + ((n) >= 0x80) + \
+ ((n) >= 0x100) + ((n) >= 0x200) + ((n) >= 0x400) + ((n) >= 0x800) + \
+ ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000))
/* The "short" defines are a bit different because shorts are promoted to
ints by ~ or >> etc.
/* Swap macros. */
-#define MP_LIMB_T_SWAP(x, y) \
- do { \
- mp_limb_t __mp_limb_t_swap__tmp = (x); \
- (x) = (y); \
- (y) = __mp_limb_t_swap__tmp; \
+#define MP_LIMB_T_SWAP(x, y) \
+ do { \
+ mp_limb_t __mp_limb_t_swap__tmp = (x); \
+ (x) = (y); \
+ (y) = __mp_limb_t_swap__tmp; \
} while (0)
-#define MP_SIZE_T_SWAP(x, y) \
- do { \
- mp_size_t __mp_size_t_swap__tmp = (x); \
- (x) = (y); \
- (y) = __mp_size_t_swap__tmp; \
+#define MP_SIZE_T_SWAP(x, y) \
+ do { \
+ mp_size_t __mp_size_t_swap__tmp = (x); \
+ (x) = (y); \
+ (y) = __mp_size_t_swap__tmp; \
} while (0)
-#define MP_PTR_SWAP(x, y) \
- do { \
- mp_ptr __mp_ptr_swap__tmp = (x); \
- (x) = (y); \
- (y) = __mp_ptr_swap__tmp; \
+#define MP_PTR_SWAP(x, y) \
+ do { \
+ mp_ptr __mp_ptr_swap__tmp = (x); \
+ (x) = (y); \
+ (y) = __mp_ptr_swap__tmp; \
} while (0)
-#define MP_SRCPTR_SWAP(x, y) \
- do { \
- mp_srcptr __mp_srcptr_swap__tmp = (x); \
- (x) = (y); \
- (y) = __mp_srcptr_swap__tmp; \
+#define MP_SRCPTR_SWAP(x, y) \
+ do { \
+ mp_srcptr __mp_srcptr_swap__tmp = (x); \
+ (x) = (y); \
+ (y) = __mp_srcptr_swap__tmp; \
} while (0)
-#define MPN_PTR_SWAP(xp,xs, yp,ys) \
- do { \
- MP_PTR_SWAP (xp, yp); \
- MP_SIZE_T_SWAP (xs, ys); \
+#define MPN_PTR_SWAP(xp,xs, yp,ys) \
+ do { \
+ MP_PTR_SWAP (xp, yp); \
+ MP_SIZE_T_SWAP (xs, ys); \
} while(0)
-#define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \
- do { \
- MP_SRCPTR_SWAP (xp, yp); \
- MP_SIZE_T_SWAP (xs, ys); \
+#define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \
+ do { \
+ MP_SRCPTR_SWAP (xp, yp); \
+ MP_SIZE_T_SWAP (xs, ys); \
} while(0)
-#define MPZ_PTR_SWAP(x, y) \
- do { \
- mpz_ptr __mpz_ptr_swap__tmp = (x); \
- (x) = (y); \
- (y) = __mpz_ptr_swap__tmp; \
+#define MPZ_PTR_SWAP(x, y) \
+ do { \
+ mpz_ptr __mpz_ptr_swap__tmp = (x); \
+ (x) = (y); \
+ (y) = __mpz_ptr_swap__tmp; \
} while (0)
-#define MPZ_SRCPTR_SWAP(x, y) \
- do { \
- mpz_srcptr __mpz_srcptr_swap__tmp = (x); \
- (x) = (y); \
- (y) = __mpz_srcptr_swap__tmp; \
+#define MPZ_SRCPTR_SWAP(x, y) \
+ do { \
+ mpz_srcptr __mpz_srcptr_swap__tmp = (x); \
+ (x) = (y); \
+ (y) = __mpz_srcptr_swap__tmp; \
} while (0)
/* Enhancement: __gmp_allocate_func could have "__attribute__ ((malloc))",
but current gcc (3.0) doesn't seem to support that. */
-__GMP_DECLSPEC extern void * (*__gmp_allocate_func) __GMP_PROTO ((size_t));
-__GMP_DECLSPEC extern void * (*__gmp_reallocate_func) __GMP_PROTO ((void *, size_t, size_t));
-__GMP_DECLSPEC extern void (*__gmp_free_func) __GMP_PROTO ((void *, size_t));
+__GMP_DECLSPEC extern void * (*__gmp_allocate_func) (size_t);
+__GMP_DECLSPEC extern void * (*__gmp_reallocate_func) (void *, size_t, size_t);
+__GMP_DECLSPEC extern void (*__gmp_free_func) (void *, size_t);
-__GMP_DECLSPEC void *__gmp_default_allocate __GMP_PROTO ((size_t));
-__GMP_DECLSPEC void *__gmp_default_reallocate __GMP_PROTO ((void *, size_t, size_t));
-__GMP_DECLSPEC void __gmp_default_free __GMP_PROTO ((void *, size_t));
+__GMP_DECLSPEC void *__gmp_default_allocate (size_t);
+__GMP_DECLSPEC void *__gmp_default_reallocate (void *, size_t, size_t);
+__GMP_DECLSPEC void __gmp_default_free (void *, size_t);
#define __GMP_ALLOCATE_FUNC_TYPE(n,type) \
((type *) (*__gmp_allocate_func) ((n) * sizeof (type)))
#define __GMP_ALLOCATE_FUNC_LIMBS(n) __GMP_ALLOCATE_FUNC_TYPE (n, mp_limb_t)
-#define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type) \
- ((type *) (*__gmp_reallocate_func) \
+#define __GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, type) \
+ ((type *) (*__gmp_reallocate_func) \
(p, (old_size) * sizeof (type), (new_size) * sizeof (type)))
-#define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size) \
+#define __GMP_REALLOCATE_FUNC_LIMBS(p, old_size, new_size) \
__GMP_REALLOCATE_FUNC_TYPE(p, old_size, new_size, mp_limb_t)
#define __GMP_FREE_FUNC_TYPE(p,n,type) (*__gmp_free_func) (p, (n) * sizeof (type))
#define __GMP_FREE_FUNC_LIMBS(p,n) __GMP_FREE_FUNC_TYPE (p, n, mp_limb_t)
-#define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize) \
- do { \
- if ((oldsize) != (newsize)) \
- (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize); \
+#define __GMP_REALLOCATE_FUNC_MAYBE(ptr, oldsize, newsize) \
+ do { \
+ if ((oldsize) != (newsize)) \
+ (ptr) = (*__gmp_reallocate_func) (ptr, oldsize, newsize); \
} while (0)
-#define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type) \
- do { \
- if ((oldsize) != (newsize)) \
- (ptr) = (type *) (*__gmp_reallocate_func) \
- (ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type)); \
+#define __GMP_REALLOCATE_FUNC_MAYBE_TYPE(ptr, oldsize, newsize, type) \
+ do { \
+ if ((oldsize) != (newsize)) \
+ (ptr) = (type *) (*__gmp_reallocate_func) \
+ (ptr, (oldsize) * sizeof (type), (newsize) * sizeof (type)); \
} while (0)
#endif
-__GMP_DECLSPEC void __gmpz_aorsmul_1 __GMP_PROTO ((REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_limb_t, mp_size_t))) REGPARM_ATTR(1);
+__GMP_DECLSPEC void __gmpz_aorsmul_1 (REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_limb_t, mp_size_t)) REGPARM_ATTR(1);
#define mpz_aorsmul_1(w,u,v,sub) __gmpz_aorsmul_1 (REGPARM_3_1 (w, u, v, sub))
#define mpz_n_pow_ui __gmpz_n_pow_ui
-__GMP_DECLSPEC void mpz_n_pow_ui __GMP_PROTO ((mpz_ptr, mp_srcptr, mp_size_t, unsigned long));
+__GMP_DECLSPEC void mpz_n_pow_ui (mpz_ptr, mp_srcptr, mp_size_t, unsigned long);
#define mpn_addmul_1c __MPN(addmul_1c)
-__GMP_DECLSPEC mp_limb_t mpn_addmul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_addmul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+#ifndef mpn_addmul_2 /* if not done with cpuvec in a fat binary */
#define mpn_addmul_2 __MPN(addmul_2)
-__GMP_DECLSPEC mp_limb_t mpn_addmul_2 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+__GMP_DECLSPEC mp_limb_t mpn_addmul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+#endif
#define mpn_addmul_3 __MPN(addmul_3)
-__GMP_DECLSPEC mp_limb_t mpn_addmul_3 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+__GMP_DECLSPEC mp_limb_t mpn_addmul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
#define mpn_addmul_4 __MPN(addmul_4)
-__GMP_DECLSPEC mp_limb_t mpn_addmul_4 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+__GMP_DECLSPEC mp_limb_t mpn_addmul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
#define mpn_addmul_5 __MPN(addmul_5)
-__GMP_DECLSPEC mp_limb_t mpn_addmul_5 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+__GMP_DECLSPEC mp_limb_t mpn_addmul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
#define mpn_addmul_6 __MPN(addmul_6)
-__GMP_DECLSPEC mp_limb_t mpn_addmul_6 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+__GMP_DECLSPEC mp_limb_t mpn_addmul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
#define mpn_addmul_7 __MPN(addmul_7)
-__GMP_DECLSPEC mp_limb_t mpn_addmul_7 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+__GMP_DECLSPEC mp_limb_t mpn_addmul_7 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
#define mpn_addmul_8 __MPN(addmul_8)
-__GMP_DECLSPEC mp_limb_t mpn_addmul_8 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+__GMP_DECLSPEC mp_limb_t mpn_addmul_8 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+
+/* Alternative entry point in mpn_addmul_2 for the benefit of mpn_sqr_basecase. */
+#define mpn_addmul_2s __MPN(addmul_2s)
+__GMP_DECLSPEC mp_limb_t mpn_addmul_2s (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
/* mpn_addlsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+2*{b,n}, and
- returns the carry out (0, 1 or 2). */
+ returns the carry out (0, 1 or 2). Use _ip1 when a=c. */
+#ifndef mpn_addlsh1_n /* if not done with cpuvec in a fat binary */
#define mpn_addlsh1_n __MPN(addlsh1_n)
-__GMP_DECLSPEC mp_limb_t mpn_addlsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_addlsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#endif
+#define mpn_addlsh1_nc __MPN(addlsh1_nc)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+#if HAVE_NATIVE_mpn_addlsh1_n && ! HAVE_NATIVE_mpn_addlsh1_n_ip1
+#define mpn_addlsh1_n_ip1(dst,src,n) mpn_addlsh1_n(dst,dst,src,n)
+#define HAVE_NATIVE_mpn_addlsh1_n_ip1 1
+#else
+#define mpn_addlsh1_n_ip1 __MPN(addlsh1_n_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
+#endif
+#if HAVE_NATIVE_mpn_addlsh1_nc && ! HAVE_NATIVE_mpn_addlsh1_nc_ip1
+#define mpn_addlsh1_nc_ip1(dst,src,n,c) mpn_addlsh1_nc(dst,dst,src,n,c)
+#define HAVE_NATIVE_mpn_addlsh1_nc_ip1 1
+#else
+#define mpn_addlsh1_nc_ip1 __MPN(addlsh1_nc_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh1_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+#endif
+#ifndef mpn_addlsh2_n /* if not done with cpuvec in a fat binary */
/* mpn_addlsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}+4*{b,n}, and
- returns the carry out (0, ..., 4). */
+ returns the carry out (0, ..., 4). Use _ip1 when a=c. */
#define mpn_addlsh2_n __MPN(addlsh2_n)
-__GMP_DECLSPEC mp_limb_t mpn_addlsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_addlsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#endif
+#define mpn_addlsh2_nc __MPN(addlsh2_nc)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+#if HAVE_NATIVE_mpn_addlsh2_n && ! HAVE_NATIVE_mpn_addlsh2_n_ip1
+#define mpn_addlsh2_n_ip1(dst,src,n) mpn_addlsh2_n(dst,dst,src,n)
+#define HAVE_NATIVE_mpn_addlsh2_n_ip1 1
+#else
+#define mpn_addlsh2_n_ip1 __MPN(addlsh2_n_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_nc && ! HAVE_NATIVE_mpn_addlsh2_nc_ip1
+#define mpn_addlsh2_nc_ip1(dst,src,n,c) mpn_addlsh2_nc(dst,dst,src,n,c)
+#define HAVE_NATIVE_mpn_addlsh2_nc_ip1 1
+#else
+#define mpn_addlsh2_nc_ip1 __MPN(addlsh2_nc_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh2_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+#endif
/* mpn_addlsh_n(c,a,b,n,k), when it exists, sets {c,n} to {a,n}+2^k*{b,n}, and
- returns the carry out (0, ..., 2^k). */
+ returns the carry out (0, ..., 2^k). Use _ip1 when a=c. */
#define mpn_addlsh_n __MPN(addlsh_n)
- __GMP_DECLSPEC mp_limb_t mpn_addlsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int));
+__GMP_DECLSPEC mp_limb_t mpn_addlsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
+#define mpn_addlsh_nc __MPN(addlsh_nc)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
+#if HAVE_NATIVE_mpn_addlsh_n && ! HAVE_NATIVE_mpn_addlsh_n_ip1
+#define mpn_addlsh_n_ip1(dst,src,n,s) mpn_addlsh_n(dst,dst,src,n,s)
+#define HAVE_NATIVE_mpn_addlsh_n_ip1 1
+#else
+#define mpn_addlsh_n_ip1 __MPN(addlsh_n_ip1)
+ __GMP_DECLSPEC mp_limb_t mpn_addlsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
+#endif
+#if HAVE_NATIVE_mpn_addlsh_nc && ! HAVE_NATIVE_mpn_addlsh_nc_ip1
+#define mpn_addlsh_nc_ip1(dst,src,n,s,c) mpn_addlsh_nc(dst,dst,src,n,s,c)
+#define HAVE_NATIVE_mpn_addlsh_nc_ip1 1
+#else
+#define mpn_addlsh_nc_ip1 __MPN(addlsh_nc_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_addlsh_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
+#endif
+#ifndef mpn_sublsh1_n /* if not done with cpuvec in a fat binary */
/* mpn_sublsh1_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-2*{b,n}, and
- returns the borrow out (0, 1 or 2). */
+ returns the borrow out (0, 1 or 2). Use _ip1 when a=c. */
#define mpn_sublsh1_n __MPN(sublsh1_n)
-__GMP_DECLSPEC mp_limb_t mpn_sublsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_sublsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#endif
+#define mpn_sublsh1_nc __MPN(sublsh1_nc)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+#if HAVE_NATIVE_mpn_sublsh1_n && ! HAVE_NATIVE_mpn_sublsh1_n_ip1
+#define mpn_sublsh1_n_ip1(dst,src,n) mpn_sublsh1_n(dst,dst,src,n)
+#define HAVE_NATIVE_mpn_sublsh1_n_ip1 1
+#else
+#define mpn_sublsh1_n_ip1 __MPN(sublsh1_n_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
+#endif
+#if HAVE_NATIVE_mpn_sublsh1_nc && ! HAVE_NATIVE_mpn_sublsh1_nc_ip1
+#define mpn_sublsh1_nc_ip1(dst,src,n,c) mpn_sublsh1_nc(dst,dst,src,n,c)
+#define HAVE_NATIVE_mpn_sublsh1_nc_ip1 1
+#else
+#define mpn_sublsh1_nc_ip1 __MPN(sublsh1_nc_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh1_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+#endif
/* mpn_rsblsh1_n(c,a,b,n), when it exists, sets {c,n} to 2*{b,n}-{a,n}, and
returns the carry out (-1, 0, 1). */
#define mpn_rsblsh1_n __MPN(rsblsh1_n)
-__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#define mpn_rsblsh1_nc __MPN(rsblsh1_nc)
+__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
/* mpn_sublsh2_n(c,a,b,n), when it exists, sets {c,n} to {a,n}-4*{b,n}, and
- returns the borrow out (FIXME 0, 1, 2 or 3). */
+ returns the borrow out (0, ..., 4). Use _ip1 when a=c. */
#define mpn_sublsh2_n __MPN(sublsh2_n)
-__GMP_DECLSPEC mp_limb_t mpn_sublsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_sublsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#define mpn_sublsh2_nc __MPN(sublsh2_nc)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+#if HAVE_NATIVE_mpn_sublsh2_n && ! HAVE_NATIVE_mpn_sublsh2_n_ip1
+#define mpn_sublsh2_n_ip1(dst,src,n) mpn_sublsh2_n(dst,dst,src,n)
+#define HAVE_NATIVE_mpn_sublsh2_n_ip1 1
+#else
+#define mpn_sublsh2_n_ip1 __MPN(sublsh2_n_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
+#endif
+#if HAVE_NATIVE_mpn_sublsh2_nc && ! HAVE_NATIVE_mpn_sublsh2_nc_ip1
+#define mpn_sublsh2_nc_ip1(dst,src,n,c) mpn_sublsh2_nc(dst,dst,src,n,c)
+#define HAVE_NATIVE_mpn_sublsh2_nc_ip1 1
+#else
+#define mpn_sublsh2_nc_ip1 __MPN(sublsh2_nc_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh2_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+#endif
+
+/* mpn_sublsh_n(c,a,b,n,k), when it exists, sets {c,n} to {a,n}-2^k*{b,n}, and
+ returns the carry out (0, ..., 2^k). Use _ip1 when a=c. */
+#define mpn_sublsh_n __MPN(sublsh_n)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
+#if HAVE_NATIVE_mpn_sublsh_n && ! HAVE_NATIVE_mpn_sublsh_n_ip1
+#define mpn_sublsh_n_ip1(dst,src,n,s) mpn_sublsh_n(dst,dst,src,n,s)
+#define HAVE_NATIVE_mpn_sublsh_n_ip1 1
+#else
+#define mpn_sublsh_n_ip1 __MPN(sublsh_n_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
+#endif
+#if HAVE_NATIVE_mpn_sublsh_nc && ! HAVE_NATIVE_mpn_sublsh_nc_ip1
+#define mpn_sublsh_nc_ip1(dst,src,n,s,c) mpn_sublsh_nc(dst,dst,src,n,s,c)
+#define HAVE_NATIVE_mpn_sublsh_nc_ip1 1
+#else
+#define mpn_sublsh_nc_ip1 __MPN(sublsh_nc_ip1)
+__GMP_DECLSPEC mp_limb_t mpn_sublsh_nc_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
+#endif
/* mpn_rsblsh2_n(c,a,b,n), when it exists, sets {c,n} to 4*{b,n}-{a,n}, and
returns the carry out (-1, ..., 3). */
#define mpn_rsblsh2_n __MPN(rsblsh2_n)
-__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#define mpn_rsblsh2_nc __MPN(rsblsh2_nc)
+__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
/* mpn_rsblsh_n(c,a,b,n,k), when it exists, sets {c,n} to 2^k*{b,n}-{a,n}, and
returns the carry out (-1, 0, ..., 2^k-1). */
#define mpn_rsblsh_n __MPN(rsblsh_n)
-__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int));
+__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
+#define mpn_rsblsh_nc __MPN(rsblsh_nc)
+__GMP_DECLSPEC mp_limb_signed_t mpn_rsblsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
/* mpn_rsh1add_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} + {b,n}) >> 1,
and returns the bit rshifted out (0 or 1). */
#define mpn_rsh1add_n __MPN(rsh1add_n)
-__GMP_DECLSPEC mp_limb_t mpn_rsh1add_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_rsh1add_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
#define mpn_rsh1add_nc __MPN(rsh1add_nc)
-__GMP_DECLSPEC mp_limb_t mpn_rsh1add_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_rsh1add_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
/* mpn_rsh1sub_n(c,a,b,n), when it exists, sets {c,n} to ({a,n} - {b,n}) >> 1,
and returns the bit rshifted out (0 or 1). If there's a borrow from the
subtract, it's stored as a 1 in the high bit of c[n-1], like a twos
complement negative. */
#define mpn_rsh1sub_n __MPN(rsh1sub_n)
-__GMP_DECLSPEC mp_limb_t mpn_rsh1sub_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_rsh1sub_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
#define mpn_rsh1sub_nc __MPN(rsh1sub_nc)
-__GMP_DECLSPEC mp_limb_t mpn_rsh1sub_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_rsh1sub_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+#ifndef mpn_lshiftc /* if not done with cpuvec in a fat binary */
#define mpn_lshiftc __MPN(lshiftc)
-__GMP_DECLSPEC mp_limb_t mpn_lshiftc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned int));
+__GMP_DECLSPEC mp_limb_t mpn_lshiftc (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
+#endif
+
+#define mpn_add_err1_n __MPN(add_err1_n)
+__GMP_DECLSPEC mp_limb_t mpn_add_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_add_err2_n __MPN(add_err2_n)
+__GMP_DECLSPEC mp_limb_t mpn_add_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_add_err3_n __MPN(add_err3_n)
+__GMP_DECLSPEC mp_limb_t mpn_add_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_sub_err1_n __MPN(sub_err1_n)
+__GMP_DECLSPEC mp_limb_t mpn_sub_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_sub_err2_n __MPN(sub_err2_n)
+__GMP_DECLSPEC mp_limb_t mpn_sub_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_sub_err3_n __MPN(sub_err3_n)
+__GMP_DECLSPEC mp_limb_t mpn_sub_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
#define mpn_add_n_sub_n __MPN(add_n_sub_n)
-__GMP_DECLSPEC mp_limb_t mpn_add_n_sub_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC mp_limb_t mpn_add_n_sub_n (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
#define mpn_add_n_sub_nc __MPN(add_n_sub_nc)
-__GMP_DECLSPEC mp_limb_t mpn_add_n_sub_nc __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_add_n_sub_nc (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
#define mpn_addaddmul_1msb0 __MPN(addaddmul_1msb0)
-__GMP_DECLSPEC mp_limb_t mpn_addaddmul_1msb0 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_addaddmul_1msb0 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
#define mpn_divrem_1c __MPN(divrem_1c)
-__GMP_DECLSPEC mp_limb_t mpn_divrem_1c __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_divrem_1c (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
#define mpn_dump __MPN(dump)
-__GMP_DECLSPEC void mpn_dump __GMP_PROTO ((mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_dump (mp_srcptr, mp_size_t);
#define mpn_fib2_ui __MPN(fib2_ui)
-__GMP_DECLSPEC mp_size_t mpn_fib2_ui __GMP_PROTO ((mp_ptr, mp_ptr, unsigned long));
+__GMP_DECLSPEC mp_size_t mpn_fib2_ui (mp_ptr, mp_ptr, unsigned long);
/* Remap names of internal mpn functions. */
#define __clz_tab __MPN(clz_tab)
#define mpn_udiv_w_sdiv __MPN(udiv_w_sdiv)
#define mpn_jacobi_base __MPN(jacobi_base)
-__GMP_DECLSPEC int mpn_jacobi_base __GMP_PROTO ((mp_limb_t, mp_limb_t, int)) ATTRIBUTE_CONST;
+__GMP_DECLSPEC int mpn_jacobi_base (mp_limb_t, mp_limb_t, int) ATTRIBUTE_CONST;
+
+#define mpn_jacobi_2 __MPN(jacobi_2)
+__GMP_DECLSPEC int mpn_jacobi_2 (mp_srcptr, mp_srcptr, unsigned);
+
+#define mpn_jacobi_n __MPN(jacobi_n)
+__GMP_DECLSPEC int mpn_jacobi_n (mp_ptr, mp_ptr, mp_size_t, unsigned);
#define mpn_mod_1c __MPN(mod_1c)
-__GMP_DECLSPEC mp_limb_t mpn_mod_1c __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_limb_t mpn_mod_1c (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
#define mpn_mul_1c __MPN(mul_1c)
-__GMP_DECLSPEC mp_limb_t mpn_mul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_mul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
#define mpn_mul_2 __MPN(mul_2)
-__GMP_DECLSPEC mp_limb_t mpn_mul_2 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+__GMP_DECLSPEC mp_limb_t mpn_mul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
#define mpn_mul_3 __MPN(mul_3)
-__GMP_DECLSPEC mp_limb_t mpn_mul_3 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+__GMP_DECLSPEC mp_limb_t mpn_mul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
#define mpn_mul_4 __MPN(mul_4)
-__GMP_DECLSPEC mp_limb_t mpn_mul_4 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+__GMP_DECLSPEC mp_limb_t mpn_mul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+
+#define mpn_mul_5 __MPN(mul_5)
+__GMP_DECLSPEC mp_limb_t mpn_mul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+
+#define mpn_mul_6 __MPN(mul_6)
+__GMP_DECLSPEC mp_limb_t mpn_mul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
#ifndef mpn_mul_basecase /* if not done with cpuvec in a fat binary */
#define mpn_mul_basecase __MPN(mul_basecase)
-__GMP_DECLSPEC void mpn_mul_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_mul_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
#endif
#define mpn_mullo_n __MPN(mullo_n)
-__GMP_DECLSPEC void mpn_mullo_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_mullo_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#ifndef mpn_mullo_basecase /* if not done with cpuvec in a fat binary */
#define mpn_mullo_basecase __MPN(mullo_basecase)
-__GMP_DECLSPEC void mpn_mullo_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_mullo_basecase (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+#endif
#define mpn_sqr __MPN(sqr)
-__GMP_DECLSPEC void mpn_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_sqr (mp_ptr, mp_srcptr, mp_size_t);
#ifndef mpn_sqr_basecase /* if not done with cpuvec in a fat binary */
#define mpn_sqr_basecase __MPN(sqr_basecase)
-__GMP_DECLSPEC void mpn_sqr_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_sqr_basecase (mp_ptr, mp_srcptr, mp_size_t);
#endif
+#define mpn_mulmid_basecase __MPN(mulmid_basecase)
+__GMP_DECLSPEC void mpn_mulmid_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+
+#define mpn_mulmid_n __MPN(mulmid_n)
+__GMP_DECLSPEC void mpn_mulmid_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+
+#define mpn_mulmid __MPN(mulmid)
+__GMP_DECLSPEC void mpn_mulmid (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+
#define mpn_submul_1c __MPN(submul_1c)
-__GMP_DECLSPEC mp_limb_t mpn_submul_1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_submul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+#ifndef mpn_redc_1 /* if not done with cpuvec in a fat binary */
#define mpn_redc_1 __MPN(redc_1)
-__GMP_DECLSPEC void mpn_redc_1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_redc_1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+#endif
+#ifndef mpn_redc_2 /* if not done with cpuvec in a fat binary */
#define mpn_redc_2 __MPN(redc_2)
-__GMP_DECLSPEC void mpn_redc_2 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+__GMP_DECLSPEC mp_limb_t mpn_redc_2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+#endif
+
#define mpn_redc_n __MPN(redc_n)
-__GMP_DECLSPEC void mpn_redc_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr));
+__GMP_DECLSPEC void mpn_redc_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+#ifndef mpn_mod_1_1p_cps /* if not done with cpuvec in a fat binary */
#define mpn_mod_1_1p_cps __MPN(mod_1_1p_cps)
-__GMP_DECLSPEC void mpn_mod_1_1p_cps __GMP_PROTO ((mp_limb_t [4], mp_limb_t));
+__GMP_DECLSPEC void mpn_mod_1_1p_cps (mp_limb_t [4], mp_limb_t);
+#endif
+#ifndef mpn_mod_1_1p /* if not done with cpuvec in a fat binary */
#define mpn_mod_1_1p __MPN(mod_1_1p)
-__GMP_DECLSPEC mp_limb_t mpn_mod_1_1p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4])) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_limb_t mpn_mod_1_1p (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]) __GMP_ATTRIBUTE_PURE;
+#endif
+#ifndef mpn_mod_1s_2p_cps /* if not done with cpuvec in a fat binary */
#define mpn_mod_1s_2p_cps __MPN(mod_1s_2p_cps)
-__GMP_DECLSPEC void mpn_mod_1s_2p_cps __GMP_PROTO ((mp_limb_t [5], mp_limb_t));
+__GMP_DECLSPEC void mpn_mod_1s_2p_cps (mp_limb_t [5], mp_limb_t);
+#endif
+#ifndef mpn_mod_1s_2p /* if not done with cpuvec in a fat binary */
#define mpn_mod_1s_2p __MPN(mod_1s_2p)
-__GMP_DECLSPEC mp_limb_t mpn_mod_1s_2p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [5])) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_limb_t mpn_mod_1s_2p (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [5]) __GMP_ATTRIBUTE_PURE;
+#endif
+#ifndef mpn_mod_1s_3p_cps /* if not done with cpuvec in a fat binary */
#define mpn_mod_1s_3p_cps __MPN(mod_1s_3p_cps)
-__GMP_DECLSPEC void mpn_mod_1s_3p_cps __GMP_PROTO ((mp_limb_t [6], mp_limb_t));
+__GMP_DECLSPEC void mpn_mod_1s_3p_cps (mp_limb_t [6], mp_limb_t);
+#endif
+#ifndef mpn_mod_1s_3p /* if not done with cpuvec in a fat binary */
#define mpn_mod_1s_3p __MPN(mod_1s_3p)
-__GMP_DECLSPEC mp_limb_t mpn_mod_1s_3p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [6])) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_limb_t mpn_mod_1s_3p (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [6]) __GMP_ATTRIBUTE_PURE;
+#endif
+#ifndef mpn_mod_1s_4p_cps /* if not done with cpuvec in a fat binary */
#define mpn_mod_1s_4p_cps __MPN(mod_1s_4p_cps)
-__GMP_DECLSPEC void mpn_mod_1s_4p_cps __GMP_PROTO ((mp_limb_t [7], mp_limb_t));
+__GMP_DECLSPEC void mpn_mod_1s_4p_cps (mp_limb_t [7], mp_limb_t);
+#endif
+#ifndef mpn_mod_1s_4p /* if not done with cpuvec in a fat binary */
#define mpn_mod_1s_4p __MPN(mod_1s_4p)
-__GMP_DECLSPEC mp_limb_t mpn_mod_1s_4p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [7])) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_limb_t mpn_mod_1s_4p (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [7]) __GMP_ATTRIBUTE_PURE;
+#endif
#define mpn_bc_mulmod_bnm1 __MPN(bc_mulmod_bnm1)
-__GMP_DECLSPEC void mpn_bc_mulmod_bnm1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_bc_mulmod_bnm1 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_mulmod_bnm1 __MPN(mulmod_bnm1)
-__GMP_DECLSPEC void mpn_mulmod_bnm1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_mulmod_bnm1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_mulmod_bnm1_next_size __MPN(mulmod_bnm1_next_size)
-__GMP_DECLSPEC mp_size_t mpn_mulmod_bnm1_next_size __GMP_PROTO ((mp_size_t)) ATTRIBUTE_CONST;
+__GMP_DECLSPEC mp_size_t mpn_mulmod_bnm1_next_size (mp_size_t) ATTRIBUTE_CONST;
static inline mp_size_t
mpn_mulmod_bnm1_itch (mp_size_t rn, mp_size_t an, mp_size_t bn) {
mp_size_t n, itch;
}
#define mpn_sqrmod_bnm1 __MPN(sqrmod_bnm1)
-__GMP_DECLSPEC void mpn_sqrmod_bnm1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_sqrmod_bnm1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_sqrmod_bnm1_next_size __MPN(sqrmod_bnm1_next_size)
-__GMP_DECLSPEC mp_size_t mpn_sqrmod_bnm1_next_size __GMP_PROTO ((mp_size_t)) ATTRIBUTE_CONST;
+__GMP_DECLSPEC mp_size_t mpn_sqrmod_bnm1_next_size (mp_size_t) ATTRIBUTE_CONST;
static inline mp_size_t
mpn_sqrmod_bnm1_itch (mp_size_t rn, mp_size_t an) {
mp_size_t n, itch;
/* Pseudo-random number generator function pointers structure. */
typedef struct {
- void (*randseed_fn) __GMP_PROTO ((gmp_randstate_t, mpz_srcptr));
- void (*randget_fn) __GMP_PROTO ((gmp_randstate_t, mp_ptr, unsigned long int));
- void (*randclear_fn) __GMP_PROTO ((gmp_randstate_t));
- void (*randiset_fn) __GMP_PROTO ((gmp_randstate_ptr, gmp_randstate_srcptr));
+ void (*randseed_fn) (gmp_randstate_t, mpz_srcptr);
+ void (*randget_fn) (gmp_randstate_t, mp_ptr, unsigned long int);
+ void (*randclear_fn) (gmp_randstate_t);
+ void (*randiset_fn) (gmp_randstate_ptr, gmp_randstate_srcptr);
} gmp_randfnptr_t;
/* Macro to obtain a void pointer to the function pointers structure. */
#define RNG_STATE(rstate) ((rstate)->_mp_seed->_mp_d)
/* Write a given number of random bits to rp. */
-#define _gmp_rand(rp, state, bits) \
- do { \
- gmp_randstate_ptr __rstate = (state); \
- (*((gmp_randfnptr_t *) RNG_FNPTR (__rstate))->randget_fn) \
- (__rstate, rp, bits); \
+#define _gmp_rand(rp, state, bits) \
+ do { \
+ gmp_randstate_ptr __rstate = (state); \
+ (*((gmp_randfnptr_t *) RNG_FNPTR (__rstate))->randget_fn) \
+ (__rstate, rp, bits); \
} while (0)
-__GMP_DECLSPEC void __gmp_randinit_mt_noseed __GMP_PROTO ((gmp_randstate_t));
+__GMP_DECLSPEC void __gmp_randinit_mt_noseed (gmp_randstate_t);
/* __gmp_rands is the global state for the old-style random functions, and
__GMP_DECLSPEC extern char __gmp_rands_initialized;
__GMP_DECLSPEC extern gmp_randstate_t __gmp_rands;
-#define RANDS \
- ((__gmp_rands_initialized ? 0 \
- : (__gmp_rands_initialized = 1, \
- __gmp_randinit_mt_noseed (__gmp_rands), 0)), \
+#define RANDS \
+ ((__gmp_rands_initialized ? 0 \
+ : (__gmp_rands_initialized = 1, \
+ __gmp_randinit_mt_noseed (__gmp_rands), 0)), \
__gmp_rands)
/* this is used by the test programs, to free memory */
-#define RANDS_CLEAR() \
- do { \
- if (__gmp_rands_initialized) \
- { \
- __gmp_rands_initialized = 0; \
- gmp_randclear (__gmp_rands); \
- } \
+#define RANDS_CLEAR() \
+ do { \
+ if (__gmp_rands_initialized) \
+ { \
+ __gmp_rands_initialized = 0; \
+ gmp_randclear (__gmp_rands); \
+ } \
} while (0)
be compile-time constants, so the compiler should be able to eliminate
the code for the unwanted algorithm. */
-#define ABOVE_THRESHOLD(size,thresh) \
- ((thresh) == 0 \
- || ((thresh) != MP_SIZE_T_MAX \
+#if ! defined (__GNUC__) || __GNUC__ < 2
+#define ABOVE_THRESHOLD(size,thresh) \
+ ((thresh) == 0 \
+ || ((thresh) != MP_SIZE_T_MAX \
+ && (size) >= (thresh)))
+#else
+#define ABOVE_THRESHOLD(size,thresh) \
+ ((__builtin_constant_p (thresh) && (thresh) == 0) \
+ || (!(__builtin_constant_p (thresh) && (thresh) == MP_SIZE_T_MAX) \
&& (size) >= (thresh)))
+#endif
#define BELOW_THRESHOLD(size,thresh) (! ABOVE_THRESHOLD (size, thresh))
#define MPN_TOOM22_MUL_MINSIZE 4
#define MPN_TOOM32_MUL_MINSIZE 10
#define MPN_TOOM42_MUL_MINSIZE 10
-#define MPN_TOOM43_MUL_MINSIZE 49 /* ??? */
-#define MPN_TOOM53_MUL_MINSIZE 49 /* ??? */
+#define MPN_TOOM43_MUL_MINSIZE 25
+#define MPN_TOOM53_MUL_MINSIZE 17
+#define MPN_TOOM54_MUL_MINSIZE 31
#define MPN_TOOM63_MUL_MINSIZE 49
+#define MPN_TOOM42_MULMID_MINSIZE 4
+
#define mpn_sqr_diagonal __MPN(sqr_diagonal)
-__GMP_DECLSPEC void mpn_sqr_diagonal __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_sqr_diagonal (mp_ptr, mp_srcptr, mp_size_t);
+
+#define mpn_sqr_diag_addlsh1 __MPN(sqr_diag_addlsh1)
+__GMP_DECLSPEC void mpn_sqr_diag_addlsh1 (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
#define mpn_toom_interpolate_5pts __MPN(toom_interpolate_5pts)
-__GMP_DECLSPEC void mpn_toom_interpolate_5pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_limb_t));
+__GMP_DECLSPEC void mpn_toom_interpolate_5pts (mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_limb_t);
enum toom6_flags {toom6_all_pos = 0, toom6_vm1_neg = 1, toom6_vm2_neg = 2};
#define mpn_toom_interpolate_6pts __MPN(toom_interpolate_6pts)
-__GMP_DECLSPEC void mpn_toom_interpolate_6pts __GMP_PROTO ((mp_ptr, mp_size_t, enum toom6_flags, mp_ptr, mp_ptr, mp_ptr, mp_size_t));
+__GMP_DECLSPEC void mpn_toom_interpolate_6pts (mp_ptr, mp_size_t, enum toom6_flags, mp_ptr, mp_ptr, mp_ptr, mp_size_t);
enum toom7_flags { toom7_w1_neg = 1, toom7_w3_neg = 2 };
#define mpn_toom_interpolate_7pts __MPN(toom_interpolate_7pts)
-__GMP_DECLSPEC void mpn_toom_interpolate_7pts __GMP_PROTO ((mp_ptr, mp_size_t, enum toom7_flags, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_toom_interpolate_7pts (mp_ptr, mp_size_t, enum toom7_flags, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
#define mpn_toom_interpolate_8pts __MPN(toom_interpolate_8pts)
-__GMP_DECLSPEC void mpn_toom_interpolate_8pts __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_toom_interpolate_8pts (mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
#define mpn_toom_interpolate_12pts __MPN(toom_interpolate_12pts)
-__GMP_DECLSPEC void mpn_toom_interpolate_12pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr));
+__GMP_DECLSPEC void mpn_toom_interpolate_12pts (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr);
#define mpn_toom_interpolate_16pts __MPN(toom_interpolate_16pts)
-__GMP_DECLSPEC void mpn_toom_interpolate_16pts __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr));
+__GMP_DECLSPEC void mpn_toom_interpolate_16pts (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_size_t, int, mp_ptr);
#define mpn_toom_couple_handling __MPN(toom_couple_handling)
-__GMP_DECLSPEC void mpn_toom_couple_handling __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, int, mp_size_t, int, int));
+__GMP_DECLSPEC void mpn_toom_couple_handling (mp_ptr, mp_size_t, mp_ptr, int, mp_size_t, int, int);
#define mpn_toom_eval_dgr3_pm1 __MPN(toom_eval_dgr3_pm1)
-__GMP_DECLSPEC int mpn_toom_eval_dgr3_pm1 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
+__GMP_DECLSPEC int mpn_toom_eval_dgr3_pm1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
#define mpn_toom_eval_dgr3_pm2 __MPN(toom_eval_dgr3_pm2)
-__GMP_DECLSPEC int mpn_toom_eval_dgr3_pm2 __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
+__GMP_DECLSPEC int mpn_toom_eval_dgr3_pm2 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
#define mpn_toom_eval_pm1 __MPN(toom_eval_pm1)
-__GMP_DECLSPEC int mpn_toom_eval_pm1 __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
+__GMP_DECLSPEC int mpn_toom_eval_pm1 (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
#define mpn_toom_eval_pm2 __MPN(toom_eval_pm2)
-__GMP_DECLSPEC int mpn_toom_eval_pm2 __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
+__GMP_DECLSPEC int mpn_toom_eval_pm2 (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
#define mpn_toom_eval_pm2exp __MPN(toom_eval_pm2exp)
-__GMP_DECLSPEC int mpn_toom_eval_pm2exp __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr));
+__GMP_DECLSPEC int mpn_toom_eval_pm2exp (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr);
#define mpn_toom_eval_pm2rexp __MPN(toom_eval_pm2rexp)
-__GMP_DECLSPEC int mpn_toom_eval_pm2rexp __GMP_PROTO ((mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr));
+__GMP_DECLSPEC int mpn_toom_eval_pm2rexp (mp_ptr, mp_ptr, unsigned, mp_srcptr, mp_size_t, mp_size_t, unsigned, mp_ptr);
#define mpn_toom22_mul __MPN(toom22_mul)
-__GMP_DECLSPEC void mpn_toom22_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_toom22_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_toom32_mul __MPN(toom32_mul)
-__GMP_DECLSPEC void mpn_toom32_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_toom32_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_toom42_mul __MPN(toom42_mul)
-__GMP_DECLSPEC void mpn_toom42_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_toom42_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_toom52_mul __MPN(toom52_mul)
-__GMP_DECLSPEC void mpn_toom52_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_toom52_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_toom62_mul __MPN(toom62_mul)
-__GMP_DECLSPEC void mpn_toom62_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_toom62_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_toom2_sqr __MPN(toom2_sqr)
-__GMP_DECLSPEC void mpn_toom2_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_toom2_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_toom33_mul __MPN(toom33_mul)
-__GMP_DECLSPEC void mpn_toom33_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_toom33_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_toom43_mul __MPN(toom43_mul)
-__GMP_DECLSPEC void mpn_toom43_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_toom43_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_toom53_mul __MPN(toom53_mul)
-__GMP_DECLSPEC void mpn_toom53_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_toom53_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+
+#define mpn_toom54_mul __MPN(toom54_mul)
+__GMP_DECLSPEC void mpn_toom54_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_toom63_mul __MPN(toom63_mul)
-__GMP_DECLSPEC void mpn_toom63_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_toom63_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_toom3_sqr __MPN(toom3_sqr)
-__GMP_DECLSPEC void mpn_toom3_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_toom3_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_toom44_mul __MPN(toom44_mul)
-__GMP_DECLSPEC void mpn_toom44_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_toom44_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_toom4_sqr __MPN(toom4_sqr)
-__GMP_DECLSPEC void mpn_toom4_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_toom4_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_toom6h_mul __MPN(toom6h_mul)
-__GMP_DECLSPEC void mpn_toom6h_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_toom6h_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_toom6_sqr __MPN(toom6_sqr)
-__GMP_DECLSPEC void mpn_toom6_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_toom6_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_toom8h_mul __MPN(toom8h_mul)
-__GMP_DECLSPEC void mpn_toom8h_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_toom8h_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_toom8_sqr __MPN(toom8_sqr)
-__GMP_DECLSPEC void mpn_toom8_sqr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_toom8_sqr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
+
+#define mpn_toom42_mulmid __MPN(toom42_mulmid)
+__GMP_DECLSPEC void mpn_toom42_mulmid (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_fft_best_k __MPN(fft_best_k)
-__GMP_DECLSPEC int mpn_fft_best_k __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST;
+__GMP_DECLSPEC int mpn_fft_best_k (mp_size_t, int) ATTRIBUTE_CONST;
#define mpn_mul_fft __MPN(mul_fft)
-__GMP_DECLSPEC mp_limb_t mpn_mul_fft __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int));
+__GMP_DECLSPEC mp_limb_t mpn_mul_fft (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int);
#define mpn_mul_fft_full __MPN(mul_fft_full)
-__GMP_DECLSPEC void mpn_mul_fft_full __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_mul_fft_full (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
#define mpn_nussbaumer_mul __MPN(nussbaumer_mul)
-__GMP_DECLSPEC void mpn_nussbaumer_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_nussbaumer_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
#define mpn_fft_next_size __MPN(fft_next_size)
-__GMP_DECLSPEC mp_size_t mpn_fft_next_size __GMP_PROTO ((mp_size_t, int)) ATTRIBUTE_CONST;
+__GMP_DECLSPEC mp_size_t mpn_fft_next_size (mp_size_t, int) ATTRIBUTE_CONST;
+
+#define mpn_div_qr_2n_pi1 __MPN(div_qr_2n_pi1)
+ __GMP_DECLSPEC mp_limb_t mpn_div_qr_2n_pi1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t);
+
+#define mpn_div_qr_2u_pi1 __MPN(div_qr_2u_pi1)
+ __GMP_DECLSPEC mp_limb_t mpn_div_qr_2u_pi1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int, mp_limb_t);
#define mpn_sbpi1_div_qr __MPN(sbpi1_div_qr)
-__GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
#define mpn_sbpi1_div_q __MPN(sbpi1_div_q)
-__GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
#define mpn_sbpi1_divappr_q __MPN(sbpi1_divappr_q)
-__GMP_DECLSPEC mp_limb_t mpn_sbpi1_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_sbpi1_divappr_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
#define mpn_dcpi1_div_qr __MPN(dcpi1_div_qr)
-__GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *));
+__GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *);
#define mpn_dcpi1_div_qr_n __MPN(dcpi1_div_qr_n)
-__GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr));
+__GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_qr_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr);
#define mpn_dcpi1_div_q __MPN(dcpi1_div_q)
-__GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *));
+__GMP_DECLSPEC mp_limb_t mpn_dcpi1_div_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *);
#define mpn_dcpi1_divappr_q __MPN(dcpi1_divappr_q)
-__GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *));
+__GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, gmp_pi1_t *);
#define mpn_dcpi1_divappr_q_n __MPN(dcpi1_divappr_q_n)
-__GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr));
+__GMP_DECLSPEC mp_limb_t mpn_dcpi1_divappr_q_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, gmp_pi1_t *, mp_ptr);
#define mpn_mu_div_qr __MPN(mu_div_qr)
-__GMP_DECLSPEC mp_limb_t mpn_mu_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC mp_limb_t mpn_mu_div_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_mu_div_qr_itch __MPN(mu_div_qr_itch)
-__GMP_DECLSPEC mp_size_t mpn_mu_div_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t, int));
+__GMP_DECLSPEC mp_size_t mpn_mu_div_qr_itch (mp_size_t, mp_size_t, int);
#define mpn_mu_div_qr_choose_in __MPN(mu_div_qr_choose_in)
-__GMP_DECLSPEC mp_size_t mpn_mu_div_qr_choose_in __GMP_PROTO ((mp_size_t, mp_size_t, int));
+__GMP_DECLSPEC mp_size_t mpn_mu_div_qr_choose_in (mp_size_t, mp_size_t, int);
#define mpn_preinv_mu_div_qr __MPN(preinv_mu_div_qr)
-__GMP_DECLSPEC mp_limb_t mpn_preinv_mu_div_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC mp_limb_t mpn_preinv_mu_div_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_preinv_mu_div_qr_itch __MPN(preinv_mu_div_qr_itch)
-__GMP_DECLSPEC mp_size_t mpn_preinv_mu_div_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t, mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_preinv_mu_div_qr_itch (mp_size_t, mp_size_t, mp_size_t);
#define mpn_mu_divappr_q __MPN(mu_divappr_q)
-__GMP_DECLSPEC mp_limb_t mpn_mu_divappr_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC mp_limb_t mpn_mu_divappr_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_mu_divappr_q_itch __MPN(mu_divappr_q_itch)
-__GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_itch __GMP_PROTO ((mp_size_t, mp_size_t, int));
+__GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_itch (mp_size_t, mp_size_t, int);
#define mpn_mu_divappr_q_choose_in __MPN(mu_divappr_q_choose_in)
-__GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_choose_in __GMP_PROTO ((mp_size_t, mp_size_t, int));
+__GMP_DECLSPEC mp_size_t mpn_mu_divappr_q_choose_in (mp_size_t, mp_size_t, int);
#define mpn_preinv_mu_divappr_q __MPN(preinv_mu_divappr_q)
-__GMP_DECLSPEC mp_limb_t mpn_preinv_mu_divappr_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC mp_limb_t mpn_preinv_mu_divappr_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_mu_div_q __MPN(mu_div_q)
-__GMP_DECLSPEC mp_limb_t mpn_mu_div_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC mp_limb_t mpn_mu_div_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_mu_div_q_itch __MPN(mu_div_q_itch)
-__GMP_DECLSPEC mp_size_t mpn_mu_div_q_itch __GMP_PROTO ((mp_size_t, mp_size_t, int));
+__GMP_DECLSPEC mp_size_t mpn_mu_div_q_itch (mp_size_t, mp_size_t, int);
#define mpn_div_q __MPN(div_q)
-__GMP_DECLSPEC void mpn_div_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_div_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_invert __MPN(invert)
-__GMP_DECLSPEC void mpn_invert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_invert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_invert_itch(n) mpn_invertappr_itch(n)
#define mpn_ni_invertappr __MPN(ni_invertappr)
-__GMP_DECLSPEC mp_limb_t mpn_ni_invertappr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC mp_limb_t mpn_ni_invertappr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_invertappr __MPN(invertappr)
-__GMP_DECLSPEC mp_limb_t mpn_invertappr __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC mp_limb_t mpn_invertappr (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_invertappr_itch(n) (3 * (n) + 2)
#define mpn_binvert __MPN(binvert)
-__GMP_DECLSPEC void mpn_binvert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_binvert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_binvert_itch __MPN(binvert_itch)
-__GMP_DECLSPEC mp_size_t mpn_binvert_itch __GMP_PROTO ((mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_binvert_itch (mp_size_t);
#define mpn_bdiv_q_1 __MPN(bdiv_q_1)
-__GMP_DECLSPEC mp_limb_t mpn_bdiv_q_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_bdiv_q_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
#define mpn_pi1_bdiv_q_1 __MPN(pi1_bdiv_q_1)
-__GMP_DECLSPEC mp_limb_t mpn_pi1_bdiv_q_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int));
+__GMP_DECLSPEC mp_limb_t mpn_pi1_bdiv_q_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int);
#define mpn_sbpi1_bdiv_qr __MPN(sbpi1_bdiv_qr)
-__GMP_DECLSPEC mp_limb_t mpn_sbpi1_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_sbpi1_bdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
#define mpn_sbpi1_bdiv_q __MPN(sbpi1_bdiv_q)
-__GMP_DECLSPEC void mpn_sbpi1_bdiv_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC void mpn_sbpi1_bdiv_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
#define mpn_dcpi1_bdiv_qr __MPN(dcpi1_bdiv_qr)
-__GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
#define mpn_dcpi1_bdiv_qr_n_itch __MPN(dcpi1_bdiv_qr_n_itch)
-__GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_qr_n_itch __GMP_PROTO ((mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_qr_n_itch (mp_size_t);
#define mpn_dcpi1_bdiv_qr_n __MPN(dcpi1_bdiv_qr_n)
-__GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr));
+__GMP_DECLSPEC mp_limb_t mpn_dcpi1_bdiv_qr_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
#define mpn_dcpi1_bdiv_q __MPN(dcpi1_bdiv_q)
-__GMP_DECLSPEC void mpn_dcpi1_bdiv_q __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC void mpn_dcpi1_bdiv_q (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
#define mpn_dcpi1_bdiv_q_n_itch __MPN(dcpi1_bdiv_q_n_itch)
-__GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_q_n_itch __GMP_PROTO ((mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_dcpi1_bdiv_q_n_itch (mp_size_t);
#define mpn_dcpi1_bdiv_q_n __MPN(dcpi1_bdiv_q_n)
-__GMP_DECLSPEC void mpn_dcpi1_bdiv_q_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr));
+__GMP_DECLSPEC void mpn_dcpi1_bdiv_q_n (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
#define mpn_mu_bdiv_qr __MPN(mu_bdiv_qr)
-__GMP_DECLSPEC mp_limb_t mpn_mu_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC mp_limb_t mpn_mu_bdiv_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_mu_bdiv_qr_itch __MPN(mu_bdiv_qr_itch)
-__GMP_DECLSPEC mp_size_t mpn_mu_bdiv_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_mu_bdiv_qr_itch (mp_size_t, mp_size_t);
#define mpn_mu_bdiv_q __MPN(mu_bdiv_q)
-__GMP_DECLSPEC void mpn_mu_bdiv_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_mu_bdiv_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_mu_bdiv_q_itch __MPN(mu_bdiv_q_itch)
-__GMP_DECLSPEC mp_size_t mpn_mu_bdiv_q_itch __GMP_PROTO ((mp_size_t, mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_mu_bdiv_q_itch (mp_size_t, mp_size_t);
#define mpn_bdiv_qr __MPN(bdiv_qr)
-__GMP_DECLSPEC mp_limb_t mpn_bdiv_qr __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC mp_limb_t mpn_bdiv_qr (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_bdiv_qr_itch __MPN(bdiv_qr_itch)
-__GMP_DECLSPEC mp_size_t mpn_bdiv_qr_itch __GMP_PROTO ((mp_size_t, mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_bdiv_qr_itch (mp_size_t, mp_size_t);
#define mpn_bdiv_q __MPN(bdiv_q)
-__GMP_DECLSPEC void mpn_bdiv_q __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_bdiv_q (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_bdiv_q_itch __MPN(bdiv_q_itch)
-__GMP_DECLSPEC mp_size_t mpn_bdiv_q_itch __GMP_PROTO ((mp_size_t, mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_bdiv_q_itch (mp_size_t, mp_size_t);
#define mpn_divexact __MPN(divexact)
-__GMP_DECLSPEC void mpn_divexact __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_divexact (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
#define mpn_divexact_itch __MPN(divexact_itch)
-__GMP_DECLSPEC mp_size_t mpn_divexact_itch __GMP_PROTO ((mp_size_t, mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_divexact_itch (mp_size_t, mp_size_t);
+#ifndef mpn_bdiv_dbm1c /* if not done with cpuvec in a fat binary */
#define mpn_bdiv_dbm1c __MPN(bdiv_dbm1c)
-__GMP_DECLSPEC mp_limb_t mpn_bdiv_dbm1c __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_bdiv_dbm1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+#endif
+
#define mpn_bdiv_dbm1(dst, src, size, divisor) \
mpn_bdiv_dbm1c (dst, src, size, divisor, __GMP_CAST (mp_limb_t, 0))
#define mpn_powm __MPN(powm)
-__GMP_DECLSPEC void mpn_powm __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_powm (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_powlo __MPN(powlo)
-__GMP_DECLSPEC void mpn_powlo __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_powlo (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_size_t, mp_ptr);
#define mpn_powm_sec __MPN(powm_sec)
-__GMP_DECLSPEC void mpn_powm_sec __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_powm_sec (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_powm_sec_itch __MPN(powm_sec_itch)
-__GMP_DECLSPEC mp_size_t mpn_powm_sec_itch __GMP_PROTO ((mp_size_t, mp_size_t, mp_size_t));
-#define mpn_subcnd_n __MPN(subcnd_n)
-__GMP_DECLSPEC mp_limb_t mpn_subcnd_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_size_t mpn_powm_sec_itch (mp_size_t, mp_size_t, mp_size_t);
#define mpn_tabselect __MPN(tabselect)
-__GMP_DECLSPEC void mpn_tabselect __GMP_PROTO ((volatile mp_limb_t *, volatile mp_limb_t *, mp_size_t, mp_size_t, mp_size_t));
-#define mpn_redc_1_sec __MPN(redc_1_sec)
-__GMP_DECLSPEC void mpn_redc_1_sec __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC void mpn_tabselect (volatile mp_limb_t *, volatile mp_limb_t *, mp_size_t, mp_size_t, mp_size_t);
+#define mpn_addcnd_n __MPN(addcnd_n)
+__GMP_DECLSPEC mp_limb_t mpn_addcnd_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+#define mpn_subcnd_n __MPN(subcnd_n)
+__GMP_DECLSPEC mp_limb_t mpn_subcnd_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_sb_div_qr_sec __MPN(sb_div_qr_sec)
+__GMP_DECLSPEC void mpn_sb_div_qr_sec (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+#define mpn_sbpi1_div_qr_sec __MPN(sbpi1_div_qr_sec)
+__GMP_DECLSPEC mp_limb_t mpn_sbpi1_div_qr_sec (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
+#define mpn_sb_div_r_sec __MPN(sb_div_r_sec)
+__GMP_DECLSPEC void mpn_sb_div_r_sec (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_ptr);
+#define mpn_sbpi1_div_r_sec __MPN(sbpi1_div_r_sec)
+__GMP_DECLSPEC void mpn_sbpi1_div_r_sec (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
+
#ifndef DIVEXACT_BY3_METHOD
#if GMP_NUMB_BITS % 2 == 0 && ! defined (HAVE_NATIVE_mpn_divexact_by3c)
(7 & 3 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 5)))
#endif
-#if GMP_NUMB_BITS % 6 == 0
+#if GMP_NUMB_BITS % 3 == 0
#define mpn_divexact_by7(dst,src,size) \
(7 & 1 * mpn_bdiv_dbm1 (dst, src, size, __GMP_CAST (mp_limb_t, GMP_NUMB_MASK / 7)))
#endif
#endif
#define mpz_divexact_gcd __gmpz_divexact_gcd
-__GMP_DECLSPEC void mpz_divexact_gcd __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
+__GMP_DECLSPEC void mpz_divexact_gcd (mpz_ptr, mpz_srcptr, mpz_srcptr);
+
+#define mpz_prodlimbs __gmpz_prodlimbs
+__GMP_DECLSPEC mp_size_t mpz_prodlimbs (mpz_ptr, mp_ptr, mp_size_t);
+
+#define mpz_oddfac_1 __gmpz_oddfac_1
+__GMP_DECLSPEC void mpz_oddfac_1 (mpz_ptr, mp_limb_t, unsigned);
#define mpz_inp_str_nowhite __gmpz_inp_str_nowhite
#ifdef _GMP_H_HAVE_FILE
-__GMP_DECLSPEC size_t mpz_inp_str_nowhite __GMP_PROTO ((mpz_ptr, FILE *, int, int, size_t));
+__GMP_DECLSPEC size_t mpz_inp_str_nowhite (mpz_ptr, FILE *, int, int, size_t);
#endif
#define mpn_divisible_p __MPN(divisible_p)
-__GMP_DECLSPEC int mpn_divisible_p __GMP_PROTO ((mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC int mpn_divisible_p (mp_srcptr, mp_size_t, mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
#define mpn_rootrem __MPN(rootrem)
-__GMP_DECLSPEC mp_size_t mpn_rootrem __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_size_t mpn_rootrem (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_broot __MPN(broot)
+__GMP_DECLSPEC void mpn_broot (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_broot_invm1 __MPN(broot_invm1)
+__GMP_DECLSPEC void mpn_broot_invm1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+#define mpn_brootinv __MPN(brootinv)
+__GMP_DECLSPEC void mpn_brootinv (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_ptr);
+#define mpn_bsqrt __MPN(bsqrt)
+__GMP_DECLSPEC void mpn_bsqrt (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_ptr);
+
+#define mpn_bsqrtinv __MPN(bsqrtinv)
+__GMP_DECLSPEC int mpn_bsqrtinv (mp_ptr, mp_srcptr, mp_bitcnt_t, mp_ptr);
#if defined (_CRAY)
#define MPN_COPY_INCR(dst, src, n) \
/* used by test programs, hence __GMP_DECLSPEC */
#ifndef mpn_copyi /* if not done with cpuvec in a fat binary */
#define mpn_copyi __MPN(copyi)
-__GMP_DECLSPEC void mpn_copyi __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_copyi (mp_ptr, mp_srcptr, mp_size_t);
#endif
#if ! defined (MPN_COPY_INCR) && HAVE_NATIVE_mpn_copyi
-#define MPN_COPY_INCR(dst, src, size) \
- do { \
- ASSERT ((size) >= 0); \
- ASSERT (MPN_SAME_OR_INCR_P (dst, src, size)); \
- mpn_copyi (dst, src, size); \
+#define MPN_COPY_INCR(dst, src, size) \
+ do { \
+ ASSERT ((size) >= 0); \
+ ASSERT (MPN_SAME_OR_INCR_P (dst, src, size)); \
+ mpn_copyi (dst, src, size); \
} while (0)
#endif
/* Copy N limbs from SRC to DST incrementing, N==0 allowed. */
#if ! defined (MPN_COPY_INCR)
-#define MPN_COPY_INCR(dst, src, n) \
- do { \
- ASSERT ((n) >= 0); \
- ASSERT (MPN_SAME_OR_INCR_P (dst, src, n)); \
- if ((n) != 0) \
- { \
- mp_size_t __n = (n) - 1; \
- mp_ptr __dst = (dst); \
- mp_srcptr __src = (src); \
- mp_limb_t __x; \
- __x = *__src++; \
- if (__n != 0) \
- { \
- do \
- { \
- *__dst++ = __x; \
- __x = *__src++; \
- } \
- while (--__n); \
- } \
- *__dst++ = __x; \
- } \
+#define MPN_COPY_INCR(dst, src, n) \
+ do { \
+ ASSERT ((n) >= 0); \
+ ASSERT (MPN_SAME_OR_INCR_P (dst, src, n)); \
+ if ((n) != 0) \
+ { \
+ mp_size_t __n = (n) - 1; \
+ mp_ptr __dst = (dst); \
+ mp_srcptr __src = (src); \
+ mp_limb_t __x; \
+ __x = *__src++; \
+ if (__n != 0) \
+ { \
+ do \
+ { \
+ *__dst++ = __x; \
+ __x = *__src++; \
+ } \
+ while (--__n); \
+ } \
+ *__dst++ = __x; \
+ } \
} while (0)
#endif
/* used by test programs, hence __GMP_DECLSPEC */
#ifndef mpn_copyd /* if not done with cpuvec in a fat binary */
#define mpn_copyd __MPN(copyd)
-__GMP_DECLSPEC void mpn_copyd __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
+__GMP_DECLSPEC void mpn_copyd (mp_ptr, mp_srcptr, mp_size_t);
#endif
#if ! defined (MPN_COPY_DECR) && HAVE_NATIVE_mpn_copyd
-#define MPN_COPY_DECR(dst, src, size) \
- do { \
- ASSERT ((size) >= 0); \
- ASSERT (MPN_SAME_OR_DECR_P (dst, src, size)); \
- mpn_copyd (dst, src, size); \
+#define MPN_COPY_DECR(dst, src, size) \
+ do { \
+ ASSERT ((size) >= 0); \
+ ASSERT (MPN_SAME_OR_DECR_P (dst, src, size)); \
+ mpn_copyd (dst, src, size); \
} while (0)
#endif
/* Copy N limbs from SRC to DST decrementing, N==0 allowed. */
#if ! defined (MPN_COPY_DECR)
-#define MPN_COPY_DECR(dst, src, n) \
- do { \
- ASSERT ((n) >= 0); \
- ASSERT (MPN_SAME_OR_DECR_P (dst, src, n)); \
- if ((n) != 0) \
- { \
- mp_size_t __n = (n) - 1; \
- mp_ptr __dst = (dst) + __n; \
- mp_srcptr __src = (src) + __n; \
- mp_limb_t __x; \
- __x = *__src--; \
- if (__n != 0) \
- { \
- do \
- { \
- *__dst-- = __x; \
- __x = *__src--; \
- } \
- while (--__n); \
- } \
- *__dst-- = __x; \
- } \
+#define MPN_COPY_DECR(dst, src, n) \
+ do { \
+ ASSERT ((n) >= 0); \
+ ASSERT (MPN_SAME_OR_DECR_P (dst, src, n)); \
+ if ((n) != 0) \
+ { \
+ mp_size_t __n = (n) - 1; \
+ mp_ptr __dst = (dst) + __n; \
+ mp_srcptr __src = (src) + __n; \
+ mp_limb_t __x; \
+ __x = *__src--; \
+ if (__n != 0) \
+ { \
+ do \
+ { \
+ *__dst-- = __x; \
+ __x = *__src--; \
+ } \
+ while (--__n); \
+ } \
+ *__dst-- = __x; \
+ } \
} while (0)
#endif
#ifndef MPN_COPY
-#define MPN_COPY(d,s,n) \
- do { \
- ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n)); \
- MPN_COPY_INCR (d, s, n); \
+#define MPN_COPY(d,s,n) \
+ do { \
+ ASSERT (MPN_SAME_OR_SEPARATE_P (d, s, n)); \
+ MPN_COPY_INCR (d, s, n); \
} while (0)
#endif
/* Set {dst,size} to the limbs of {src,size} in reverse order. */
-#define MPN_REVERSE(dst, src, size) \
- do { \
- mp_ptr __dst = (dst); \
- mp_size_t __size = (size); \
- mp_srcptr __src = (src) + __size - 1; \
- mp_size_t __i; \
- ASSERT ((size) >= 0); \
- ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \
- CRAY_Pragma ("_CRI ivdep"); \
- for (__i = 0; __i < __size; __i++) \
- { \
- *__dst = *__src; \
- __dst++; \
- __src--; \
- } \
+#define MPN_REVERSE(dst, src, size) \
+ do { \
+ mp_ptr __dst = (dst); \
+ mp_size_t __size = (size); \
+ mp_srcptr __src = (src) + __size - 1; \
+ mp_size_t __i; \
+ ASSERT ((size) >= 0); \
+ ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \
+ CRAY_Pragma ("_CRI ivdep"); \
+ for (__i = 0; __i < __size; __i++) \
+ { \
+ *__dst = *__src; \
+ __dst++; \
+ __src--; \
+ } \
} while (0)
would be good when on a GNU system. */
#if HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc
-#define MPN_ZERO(dst, n) \
- do { \
- ASSERT ((n) >= 0); \
- if ((n) != 0) \
- { \
- mp_ptr __dst = (dst) - 1; \
- mp_size_t __n = (n); \
- do \
- *++__dst = 0; \
- while (--__n); \
- } \
+#define MPN_ZERO(dst, n) \
+ do { \
+ ASSERT ((n) >= 0); \
+ if ((n) != 0) \
+ { \
+ mp_ptr __dst = (dst) - 1; \
+ mp_size_t __n = (n); \
+ do \
+ *++__dst = 0; \
+ while (--__n); \
+ } \
} while (0)
#endif
#ifndef MPN_ZERO
-#define MPN_ZERO(dst, n) \
- do { \
- ASSERT ((n) >= 0); \
- if ((n) != 0) \
- { \
- mp_ptr __dst = (dst); \
- mp_size_t __n = (n); \
- do \
- *__dst++ = 0; \
- while (--__n); \
- } \
+#define MPN_ZERO(dst, n) \
+ do { \
+ ASSERT ((n) >= 0); \
+ if ((n) != 0) \
+ { \
+ mp_ptr __dst = (dst); \
+ mp_size_t __n = (n); \
+ do \
+ *__dst++ = 0; \
+ while (--__n); \
+ } \
} while (0)
#endif
std/repe/scasl/cld and cld/repe/scasl (the latter would be for stripping
low zeros).
- std cld
- P5 18 16
- P6 46 38
- K6 36 13
- K7 21 20
+ std cld
+ P5 18 16
+ P6 46 38
+ K6 36 13
+ K7 21 20
*/
#ifndef MPN_NORMALIZE
#define MPN_NORMALIZE(DST, NLIMBS) \
do { \
- while ((NLIMBS) > 0) \
+ while ((NLIMBS) > 0) \
{ \
if ((DST)[(NLIMBS) - 1] != 0) \
break; \
} while (0)
#endif
#ifndef MPN_NORMALIZE_NOT_ZERO
-#define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \
- do { \
- ASSERT ((NLIMBS) >= 1); \
- while (1) \
- { \
- if ((DST)[(NLIMBS) - 1] != 0) \
- break; \
- (NLIMBS)--; \
- } \
+#define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \
+ do { \
+ while (1) \
+ { \
+ ASSERT ((NLIMBS) >= 1); \
+ if ((DST)[(NLIMBS) - 1] != 0) \
+ break; \
+ (NLIMBS)--; \
+ } \
} while (0)
#endif
and decrementing size. low should be ptr[0], and will be the new ptr[0]
on returning. The number in {ptr,size} must be non-zero, ie. size!=0 and
somewhere a non-zero limb. */
-#define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low) \
- do { \
- ASSERT ((size) >= 1); \
- ASSERT ((low) == (ptr)[0]); \
- \
- while ((low) == 0) \
- { \
- (size)--; \
- ASSERT ((size) >= 1); \
- (ptr)++; \
- (low) = *(ptr); \
- } \
+#define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size, low) \
+ do { \
+ ASSERT ((size) >= 1); \
+ ASSERT ((low) == (ptr)[0]); \
+ \
+ while ((low) == 0) \
+ { \
+ (size)--; \
+ ASSERT ((size) >= 1); \
+ (ptr)++; \
+ (low) = *(ptr); \
+ } \
} while (0)
/* Initialize X of type mpz_t with space for NLIMBS limbs. X should be a
temporary variable; it will be automatically cleared out at function
return. We use __x here to make it possible to accept both mpz_ptr and
mpz_t arguments. */
-#define MPZ_TMP_INIT(X, NLIMBS) \
- do { \
- mpz_ptr __x = (X); \
- ASSERT ((NLIMBS) >= 1); \
- __x->_mp_alloc = (NLIMBS); \
+#define MPZ_TMP_INIT(X, NLIMBS) \
+ do { \
+ mpz_ptr __x = (X); \
+ ASSERT ((NLIMBS) >= 1); \
+ __x->_mp_alloc = (NLIMBS); \
__x->_mp_d = TMP_ALLOC_LIMBS (NLIMBS); \
} while (0)
+#if WANT_ASSERT
+static inline void *
+_mpz_newalloc (mpz_ptr z, mp_size_t n)
+{
+ void * res = _mpz_realloc(z,n);
+ /* If we are checking the code, force a random change to limbs. */
+ ((mp_ptr) res)[0] = ~ ((mp_ptr) res)[ALLOC (z) - 1];
+ return res;
+}
+#else
+#define _mpz_newalloc _mpz_realloc
+#endif
/* Realloc for an mpz_t WHAT if it has less than NEEDED limbs. */
-#define MPZ_REALLOC(z,n) (UNLIKELY ((n) > ALLOC(z)) \
- ? (mp_ptr) _mpz_realloc(z,n) \
- : PTR(z))
+#define MPZ_REALLOC(z,n) (UNLIKELY ((n) > ALLOC(z)) \
+ ? (mp_ptr) _mpz_realloc(z,n) \
+ : PTR(z))
+#define MPZ_NEWALLOC(z,n) (UNLIKELY ((n) > ALLOC(z)) \
+ ? (mp_ptr) _mpz_newalloc(z,n) \
+ : PTR(z))
#define MPZ_EQUAL_1_P(z) (SIZ(z)==1 && PTR(z)[0] == 1)
__GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[];
#define FIB_TABLE(n) (__gmp_fib_table[(n)+1])
+extern const mp_limb_t __gmp_oddfac_table[];
+extern const mp_limb_t __gmp_odd2fac_table[];
+extern const unsigned char __gmp_fac2cnt_table[];
+extern const mp_limb_t __gmp_limbroots_table[];
+
+/* n^log <= GMP_NUMB_MAX, a limb can store log factors less than n */
+static inline unsigned
+log_n_max (mp_limb_t n)
+{
+ unsigned log;
+ for (log = 8; n > __gmp_limbroots_table[log - 1]; log--);
+ return log;
+}
+
#define SIEVESIZE 512 /* FIXME: Allow gmp_init_primesieve to choose */
typedef struct
{
#define gmp_nextprime __gmp_nextprime
__GMP_DECLSPEC unsigned long int gmp_nextprime (gmp_primesieve_t *);
+#define gmp_primesieve __gmp_primesieve
+__GMP_DECLSPEC mp_limb_t gmp_primesieve (mp_ptr, mp_limb_t);
+
#ifndef MUL_TOOM22_THRESHOLD
#define MUL_TOOM22_THRESHOLD 30
#define MUL_TOOM42_TO_TOOM63_THRESHOLD 110
#endif
+#ifndef MUL_TOOM43_TO_TOOM54_THRESHOLD
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 150
+#endif
+
/* MUL_TOOM22_THRESHOLD_LIMIT is the maximum for MUL_TOOM22_THRESHOLD. In a
normal build MUL_TOOM22_THRESHOLD is a constant and we use that. In a fat
binary or tune program build MUL_TOOM22_THRESHOLD is a variable and a
#define SQR_TOOM3_THRESHOLD_LIMIT SQR_TOOM3_THRESHOLD
#endif
+#ifndef MULMID_TOOM42_THRESHOLD
+#define MULMID_TOOM42_THRESHOLD MUL_TOOM22_THRESHOLD
+#endif
+
#ifndef DC_DIV_QR_THRESHOLD
#define DC_DIV_QR_THRESHOLD 50
#endif
where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
etc. See mpn_fft_best_k(). */
#ifndef MUL_FFT_TABLE
-#define MUL_FFT_TABLE \
- { MUL_TOOM33_THRESHOLD * 4, /* k=5 */ \
- MUL_TOOM33_THRESHOLD * 8, /* k=6 */ \
- MUL_TOOM33_THRESHOLD * 16, /* k=7 */ \
- MUL_TOOM33_THRESHOLD * 32, /* k=8 */ \
- MUL_TOOM33_THRESHOLD * 96, /* k=9 */ \
- MUL_TOOM33_THRESHOLD * 288, /* k=10 */ \
+#define MUL_FFT_TABLE \
+ { MUL_TOOM33_THRESHOLD * 4, /* k=5 */ \
+ MUL_TOOM33_THRESHOLD * 8, /* k=6 */ \
+ MUL_TOOM33_THRESHOLD * 16, /* k=7 */ \
+ MUL_TOOM33_THRESHOLD * 32, /* k=8 */ \
+ MUL_TOOM33_THRESHOLD * 96, /* k=9 */ \
+ MUL_TOOM33_THRESHOLD * 288, /* k=10 */ \
0 }
#endif
#ifndef SQR_FFT_TABLE
-#define SQR_FFT_TABLE \
- { SQR_TOOM3_THRESHOLD * 4, /* k=5 */ \
- SQR_TOOM3_THRESHOLD * 8, /* k=6 */ \
- SQR_TOOM3_THRESHOLD * 16, /* k=7 */ \
- SQR_TOOM3_THRESHOLD * 32, /* k=8 */ \
- SQR_TOOM3_THRESHOLD * 96, /* k=9 */ \
- SQR_TOOM3_THRESHOLD * 288, /* k=10 */ \
+#define SQR_FFT_TABLE \
+ { SQR_TOOM3_THRESHOLD * 4, /* k=5 */ \
+ SQR_TOOM3_THRESHOLD * 8, /* k=6 */ \
+ SQR_TOOM3_THRESHOLD * 16, /* k=7 */ \
+ SQR_TOOM3_THRESHOLD * 32, /* k=8 */ \
+ SQR_TOOM3_THRESHOLD * 96, /* k=9 */ \
+ SQR_TOOM3_THRESHOLD * 288, /* k=10 */ \
0 }
#endif
#define SET_STR_PRECOMPUTE_THRESHOLD 2000
#endif
+#ifndef FAC_ODD_THRESHOLD
+#define FAC_ODD_THRESHOLD 35
+#endif
+
+#ifndef FAC_DSC_THRESHOLD
+#define FAC_DSC_THRESHOLD 400
+#endif
+
/* Return non-zero if xp,xsize and yp,ysize overlap.
If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
overlap. If both these are false, there's an overlap. */
-#define MPN_OVERLAP_P(xp, xsize, yp, ysize) \
+#define MPN_OVERLAP_P(xp, xsize, yp, ysize) \
((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
-#define MEM_OVERLAP_P(xp, xsize, yp, ysize) \
- ( (char *) (xp) + (xsize) > (char *) (yp) \
+#define MEM_OVERLAP_P(xp, xsize, yp, ysize) \
+ ( (char *) (xp) + (xsize) > (char *) (yp) \
&& (char *) (yp) + (ysize) > (char *) (xp))
/* Return non-zero if xp,xsize and yp,ysize are either identical or not
overlapping. Return zero if they're partially overlapping. */
-#define MPN_SAME_OR_SEPARATE_P(xp, yp, size) \
+#define MPN_SAME_OR_SEPARATE_P(xp, yp, size) \
MPN_SAME_OR_SEPARATE2_P(xp, size, yp, size)
-#define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize) \
+#define MPN_SAME_OR_SEPARATE2_P(xp, xsize, yp, ysize) \
((xp) == (yp) || ! MPN_OVERLAP_P (xp, xsize, yp, ysize))
/* Return non-zero if dst,dsize and src,ssize are either identical or
overlapping in a way suitable for an incrementing/decrementing algorithm.
Return zero if they're partially overlapping in an unsuitable fashion. */
-#define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize) \
+#define MPN_SAME_OR_INCR2_P(dst, dsize, src, ssize) \
((dst) <= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
-#define MPN_SAME_OR_INCR_P(dst, src, size) \
+#define MPN_SAME_OR_INCR_P(dst, src, size) \
MPN_SAME_OR_INCR2_P(dst, size, src, size)
-#define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize) \
+#define MPN_SAME_OR_DECR2_P(dst, dsize, src, ssize) \
((dst) >= (src) || ! MPN_OVERLAP_P (dst, dsize, src, ssize))
-#define MPN_SAME_OR_DECR_P(dst, src, size) \
+#define MPN_SAME_OR_DECR_P(dst, src, size) \
MPN_SAME_OR_DECR2_P(dst, size, src, size)
#define ASSERT_FILE ""
#endif
-__GMP_DECLSPEC void __gmp_assert_header __GMP_PROTO ((const char *, int));
-__GMP_DECLSPEC void __gmp_assert_fail __GMP_PROTO ((const char *, int, const char *)) ATTRIBUTE_NORETURN;
+__GMP_DECLSPEC void __gmp_assert_header (const char *, int);
+__GMP_DECLSPEC void __gmp_assert_fail (const char *, int, const char *) ATTRIBUTE_NORETURN;
#if HAVE_STRINGIZE
#define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
#define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr")
#endif
-#define ASSERT_ALWAYS(expr) \
- do { \
- if (!(expr)) \
- ASSERT_FAIL (expr); \
+#define ASSERT_ALWAYS(expr) \
+ do { \
+ if (UNLIKELY (!(expr))) \
+ ASSERT_FAIL (expr); \
} while (0)
#if WANT_ASSERT
protection on routines like mpq_equal which give wrong results on
non-canonical inputs. */
#if WANT_ASSERT
-#define ASSERT_MPQ_CANONICAL(q) \
- do { \
- ASSERT (q->_mp_den._mp_size > 0); \
- if (q->_mp_num._mp_size == 0) \
- { \
- /* zero should be 0/1 */ \
- ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0); \
- } \
- else \
- { \
- /* no common factors */ \
- mpz_t __g; \
- mpz_init (__g); \
- mpz_gcd (__g, mpq_numref(q), mpq_denref(q)); \
- ASSERT (mpz_cmp_ui (__g, 1) == 0); \
- mpz_clear (__g); \
- } \
+#define ASSERT_MPQ_CANONICAL(q) \
+ do { \
+ ASSERT (q->_mp_den._mp_size > 0); \
+ if (q->_mp_num._mp_size == 0) \
+ { \
+ /* zero should be 0/1 */ \
+ ASSERT (mpz_cmp_ui (mpq_denref(q), 1L) == 0); \
+ } \
+ else \
+ { \
+ /* no common factors */ \
+ mpz_t __g; \
+ mpz_init (__g); \
+ mpz_gcd (__g, mpq_numref(q), mpq_denref(q)); \
+ ASSERT (mpz_cmp_ui (__g, 1) == 0); \
+ mpz_clear (__g); \
+ } \
} while (0)
#else
-#define ASSERT_MPQ_CANONICAL(q) do {} while (0)
+#define ASSERT_MPQ_CANONICAL(q) do {} while (0)
#endif
/* Check that the nail parts are zero. */
-#define ASSERT_ALWAYS_LIMB(limb) \
- do { \
- mp_limb_t __nail = (limb) & GMP_NAIL_MASK; \
- ASSERT_ALWAYS (__nail == 0); \
+#define ASSERT_ALWAYS_LIMB(limb) \
+ do { \
+ mp_limb_t __nail = (limb) & GMP_NAIL_MASK; \
+ ASSERT_ALWAYS (__nail == 0); \
} while (0)
-#define ASSERT_ALWAYS_MPN(ptr, size) \
- do { \
- /* let whole loop go dead when no nails */ \
- if (GMP_NAIL_BITS != 0) \
- { \
- mp_size_t __i; \
- for (__i = 0; __i < (size); __i++) \
- ASSERT_ALWAYS_LIMB ((ptr)[__i]); \
- } \
+#define ASSERT_ALWAYS_MPN(ptr, size) \
+ do { \
+ /* let whole loop go dead when no nails */ \
+ if (GMP_NAIL_BITS != 0) \
+ { \
+ mp_size_t __i; \
+ for (__i = 0; __i < (size); __i++) \
+ ASSERT_ALWAYS_LIMB ((ptr)[__i]); \
+ } \
} while (0)
#if WANT_ASSERT
#define ASSERT_LIMB(limb) ASSERT_ALWAYS_LIMB (limb)
/* Assert that an mpn region {ptr,size} is zero, or non-zero.
size==0 is allowed, and in that case {ptr,size} considered to be zero. */
#if WANT_ASSERT
-#define ASSERT_MPN_ZERO_P(ptr,size) \
- do { \
- mp_size_t __i; \
- ASSERT ((size) >= 0); \
- for (__i = 0; __i < (size); __i++) \
- ASSERT ((ptr)[__i] == 0); \
+#define ASSERT_MPN_ZERO_P(ptr,size) \
+ do { \
+ mp_size_t __i; \
+ ASSERT ((size) >= 0); \
+ for (__i = 0; __i < (size); __i++) \
+ ASSERT ((ptr)[__i] == 0); \
} while (0)
-#define ASSERT_MPN_NONZERO_P(ptr,size) \
- do { \
- mp_size_t __i; \
- int __nonzero = 0; \
- ASSERT ((size) >= 0); \
- for (__i = 0; __i < (size); __i++) \
- if ((ptr)[__i] != 0) \
- { \
- __nonzero = 1; \
- break; \
- } \
- ASSERT (__nonzero); \
+#define ASSERT_MPN_NONZERO_P(ptr,size) \
+ do { \
+ mp_size_t __i; \
+ int __nonzero = 0; \
+ ASSERT ((size) >= 0); \
+ for (__i = 0; __i < (size); __i++) \
+ if ((ptr)[__i] != 0) \
+ { \
+ __nonzero = 1; \
+ break; \
+ } \
+ ASSERT (__nonzero); \
} while (0)
#else
#define ASSERT_MPN_ZERO_P(ptr,size) do {} while (0)
#if ! HAVE_NATIVE_mpn_com
#undef mpn_com
-#define mpn_com(d,s,n) \
- do { \
- mp_ptr __d = (d); \
- mp_srcptr __s = (s); \
- mp_size_t __n = (n); \
- ASSERT (__n >= 1); \
- ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n)); \
- do \
- *__d++ = (~ *__s++) & GMP_NUMB_MASK; \
- while (--__n); \
+#define mpn_com(d,s,n) \
+ do { \
+ mp_ptr __d = (d); \
+ mp_srcptr __s = (s); \
+ mp_size_t __n = (n); \
+ ASSERT (__n >= 1); \
+ ASSERT (MPN_SAME_OR_SEPARATE_P (__d, __s, __n)); \
+ do \
+ *__d++ = (~ *__s++) & GMP_NUMB_MASK; \
+ while (--__n); \
} while (0)
#endif
#endif
#define mpn_trialdiv __MPN(trialdiv)
-__GMP_DECLSPEC mp_limb_t mpn_trialdiv __GMP_PROTO ((mp_srcptr, mp_size_t, mp_size_t, int *));
+__GMP_DECLSPEC mp_limb_t mpn_trialdiv (mp_srcptr, mp_size_t, mp_size_t, int *);
#define mpn_remove __MPN(remove)
-__GMP_DECLSPEC mp_bitcnt_t mpn_remove __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_bitcnt_t));
+__GMP_DECLSPEC mp_bitcnt_t mpn_remove (mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_bitcnt_t);
/* ADDC_LIMB sets w=x+y and cout to 0 or 1 for a carry from that addition. */
#if GMP_NAIL_BITS == 0
-#define ADDC_LIMB(cout, w, x, y) \
- do { \
- mp_limb_t __x = (x); \
- mp_limb_t __y = (y); \
- mp_limb_t __w = __x + __y; \
- (w) = __w; \
- (cout) = __w < __x; \
+#define ADDC_LIMB(cout, w, x, y) \
+ do { \
+ mp_limb_t __x = (x); \
+ mp_limb_t __y = (y); \
+ mp_limb_t __w = __x + __y; \
+ (w) = __w; \
+ (cout) = __w < __x; \
} while (0)
#else
-#define ADDC_LIMB(cout, w, x, y) \
- do { \
- mp_limb_t __w; \
- ASSERT_LIMB (x); \
- ASSERT_LIMB (y); \
- __w = (x) + (y); \
- (w) = __w & GMP_NUMB_MASK; \
- (cout) = __w >> GMP_NUMB_BITS; \
+#define ADDC_LIMB(cout, w, x, y) \
+ do { \
+ mp_limb_t __w; \
+ ASSERT_LIMB (x); \
+ ASSERT_LIMB (y); \
+ __w = (x) + (y); \
+ (w) = __w & GMP_NUMB_MASK; \
+ (cout) = __w >> GMP_NUMB_BITS; \
} while (0)
#endif
/* SUBC_LIMB sets w=x-y and cout to 0 or 1 for a borrow from that
subtract. */
#if GMP_NAIL_BITS == 0
-#define SUBC_LIMB(cout, w, x, y) \
- do { \
- mp_limb_t __x = (x); \
- mp_limb_t __y = (y); \
- mp_limb_t __w = __x - __y; \
- (w) = __w; \
- (cout) = __w > __x; \
+#define SUBC_LIMB(cout, w, x, y) \
+ do { \
+ mp_limb_t __x = (x); \
+ mp_limb_t __y = (y); \
+ mp_limb_t __w = __x - __y; \
+ (w) = __w; \
+ (cout) = __w > __x; \
} while (0)
#else
-#define SUBC_LIMB(cout, w, x, y) \
- do { \
- mp_limb_t __w = (x) - (y); \
- (w) = __w & GMP_NUMB_MASK; \
- (cout) = __w >> (GMP_LIMB_BITS-1); \
+#define SUBC_LIMB(cout, w, x, y) \
+ do { \
+ mp_limb_t __w = (x) - (y); \
+ (w) = __w & GMP_NUMB_MASK; \
+ (cout) = __w >> (GMP_LIMB_BITS-1); \
} while (0)
#endif
declaring their operand sizes, then remove the former. This is purely
for the benefit of assertion checking. */
-#if defined (__GNUC__) && HAVE_HOST_CPU_FAMILY_x86 && GMP_NAIL_BITS == 0 \
- && GMP_LIMB_BITS == 32 && ! defined (NO_ASM) && ! WANT_ASSERT
+#if defined (__GNUC__) && GMP_NAIL_BITS == 0 && ! defined (NO_ASM) \
+ && (defined(HAVE_HOST_CPU_FAMILY_x86) || defined(HAVE_HOST_CPU_FAMILY_x86_64)) \
+ && ! WANT_ASSERT
/* Better flags handling than the generic C gives on i386, saving a few
bytes of code and maybe a cycle or two. */
#define MPN_IORD_U(ptr, incr, aors) \
do { \
mp_ptr __ptr_dummy; \
- if (__builtin_constant_p (incr) && (incr) == 1) \
+ if (__builtin_constant_p (incr) && (incr) == 0) \
{ \
- __asm__ __volatile__ \
- ("\n" ASM_L(top) ":\n" \
- "\t" aors " $1, (%0)\n" \
- "\tleal 4(%0),%0\n" \
- "\tjc " ASM_L(top) \
- : "=r" (__ptr_dummy) \
- : "0" (ptr) \
- : "memory"); \
+ } \
+ else if (__builtin_constant_p (incr) && (incr) == 1) \
+ { \
+ __asm__ __volatile__ \
+ ("\n" ASM_L(top) ":\n" \
+ "\t" aors "\t$1, (%0)\n" \
+ "\tlea\t%c2(%0), %0\n" \
+ "\tjc\t" ASM_L(top) \
+ : "=r" (__ptr_dummy) \
+ : "0" (ptr), "n" (sizeof(mp_limb_t)) \
+ : "memory"); \
} \
else \
{ \
- __asm__ __volatile__ \
- ( aors " %2,(%0)\n" \
- "\tjnc " ASM_L(done) "\n" \
- ASM_L(top) ":\n" \
- "\t" aors " $1,4(%0)\n" \
- "\tleal 4(%0),%0\n" \
- "\tjc " ASM_L(top) "\n" \
- ASM_L(done) ":\n" \
- : "=r" (__ptr_dummy) \
- : "0" (ptr), \
- "ri" (incr) \
- : "memory"); \
+ __asm__ __volatile__ \
+ ( aors "\t%2, (%0)\n" \
+ "\tjnc\t" ASM_L(done) "\n" \
+ ASM_L(top) ":\n" \
+ "\t" aors "\t$1, %c3(%0)\n" \
+ "\tlea\t%c3(%0), %0\n" \
+ "\tjc\t" ASM_L(top) "\n" \
+ ASM_L(done) ":\n" \
+ : "=r" (__ptr_dummy) \
+ : "0" (ptr), \
+ "ri" ((mp_limb_t) (incr)), "n" (sizeof(mp_limb_t)) \
+ : "memory"); \
} \
} while (0)
+#if GMP_LIMB_BITS == 32
#define MPN_INCR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "addl")
#define MPN_DECR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "subl")
+#endif
+#if GMP_LIMB_BITS == 64
+#define MPN_INCR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "addq")
+#define MPN_DECR_U(ptr, size, incr) MPN_IORD_U (ptr, incr, "subq")
+#endif
#define mpn_incr_u(ptr, incr) MPN_INCR_U (ptr, 0, incr)
#define mpn_decr_u(ptr, incr) MPN_DECR_U (ptr, 0, incr)
#endif
#if GMP_NAIL_BITS == 0
#ifndef mpn_incr_u
-#define mpn_incr_u(p,incr) \
- do { \
- mp_limb_t __x; \
- mp_ptr __p = (p); \
- if (__builtin_constant_p (incr) && (incr) == 1) \
- { \
- while (++(*(__p++)) == 0) \
- ; \
- } \
- else \
- { \
- __x = *__p + (incr); \
- *__p = __x; \
- if (__x < (incr)) \
- while (++(*(++__p)) == 0) \
- ; \
- } \
+#define mpn_incr_u(p,incr) \
+ do { \
+ mp_limb_t __x; \
+ mp_ptr __p = (p); \
+ if (__builtin_constant_p (incr) && (incr) == 1) \
+ { \
+ while (++(*(__p++)) == 0) \
+ ; \
+ } \
+ else \
+ { \
+ __x = *__p + (incr); \
+ *__p = __x; \
+ if (__x < (incr)) \
+ while (++(*(++__p)) == 0) \
+ ; \
+ } \
} while (0)
#endif
#ifndef mpn_decr_u
-#define mpn_decr_u(p,incr) \
- do { \
- mp_limb_t __x; \
- mp_ptr __p = (p); \
- if (__builtin_constant_p (incr) && (incr) == 1) \
- { \
- while ((*(__p++))-- == 0) \
- ; \
- } \
- else \
- { \
- __x = *__p; \
- *__p = __x - (incr); \
- if (__x < (incr)) \
- while ((*(++__p))-- == 0) \
- ; \
- } \
+#define mpn_decr_u(p,incr) \
+ do { \
+ mp_limb_t __x; \
+ mp_ptr __p = (p); \
+ if (__builtin_constant_p (incr) && (incr) == 1) \
+ { \
+ while ((*(__p++))-- == 0) \
+ ; \
+ } \
+ else \
+ { \
+ __x = *__p; \
+ *__p = __x - (incr); \
+ if (__x < (incr)) \
+ while ((*(++__p))-- == 0) \
+ ; \
+ } \
} while (0)
#endif
#endif
#if GMP_NAIL_BITS >= 1
#ifndef mpn_incr_u
-#define mpn_incr_u(p,incr) \
- do { \
- mp_limb_t __x; \
- mp_ptr __p = (p); \
- if (__builtin_constant_p (incr) && (incr) == 1) \
- { \
- do \
- { \
- __x = (*__p + 1) & GMP_NUMB_MASK; \
- *__p++ = __x; \
- } \
- while (__x == 0); \
- } \
- else \
- { \
- __x = (*__p + (incr)); \
- *__p++ = __x & GMP_NUMB_MASK; \
- if (__x >> GMP_NUMB_BITS != 0) \
- { \
- do \
- { \
- __x = (*__p + 1) & GMP_NUMB_MASK; \
- *__p++ = __x; \
- } \
- while (__x == 0); \
- } \
- } \
+#define mpn_incr_u(p,incr) \
+ do { \
+ mp_limb_t __x; \
+ mp_ptr __p = (p); \
+ if (__builtin_constant_p (incr) && (incr) == 1) \
+ { \
+ do \
+ { \
+ __x = (*__p + 1) & GMP_NUMB_MASK; \
+ *__p++ = __x; \
+ } \
+ while (__x == 0); \
+ } \
+ else \
+ { \
+ __x = (*__p + (incr)); \
+ *__p++ = __x & GMP_NUMB_MASK; \
+ if (__x >> GMP_NUMB_BITS != 0) \
+ { \
+ do \
+ { \
+ __x = (*__p + 1) & GMP_NUMB_MASK; \
+ *__p++ = __x; \
+ } \
+ while (__x == 0); \
+ } \
+ } \
} while (0)
#endif
#ifndef mpn_decr_u
-#define mpn_decr_u(p,incr) \
- do { \
- mp_limb_t __x; \
- mp_ptr __p = (p); \
- if (__builtin_constant_p (incr) && (incr) == 1) \
- { \
- do \
- { \
- __x = *__p; \
- *__p++ = (__x - 1) & GMP_NUMB_MASK; \
- } \
- while (__x == 0); \
- } \
- else \
- { \
- __x = *__p - (incr); \
- *__p++ = __x & GMP_NUMB_MASK; \
- if (__x >> GMP_NUMB_BITS != 0) \
- { \
- do \
- { \
- __x = *__p; \
- *__p++ = (__x - 1) & GMP_NUMB_MASK; \
- } \
- while (__x == 0); \
- } \
- } \
+#define mpn_decr_u(p,incr) \
+ do { \
+ mp_limb_t __x; \
+ mp_ptr __p = (p); \
+ if (__builtin_constant_p (incr) && (incr) == 1) \
+ { \
+ do \
+ { \
+ __x = *__p; \
+ *__p++ = (__x - 1) & GMP_NUMB_MASK; \
+ } \
+ while (__x == 0); \
+ } \
+ else \
+ { \
+ __x = *__p - (incr); \
+ *__p++ = __x & GMP_NUMB_MASK; \
+ if (__x >> GMP_NUMB_BITS != 0) \
+ { \
+ do \
+ { \
+ __x = *__p; \
+ *__p++ = (__x - 1) & GMP_NUMB_MASK; \
+ } \
+ while (__x == 0); \
+ } \
+ } \
} while (0)
#endif
#endif
#ifndef MPN_INCR_U
#if WANT_ASSERT
-#define MPN_INCR_U(ptr, size, n) \
- do { \
- ASSERT ((size) >= 1); \
- ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n)); \
+#define MPN_INCR_U(ptr, size, n) \
+ do { \
+ ASSERT ((size) >= 1); \
+ ASSERT_NOCARRY (mpn_add_1 (ptr, ptr, size, n)); \
} while (0)
#else
#define MPN_INCR_U(ptr, size, n) mpn_incr_u (ptr, n)
#ifndef MPN_DECR_U
#if WANT_ASSERT
-#define MPN_DECR_U(ptr, size, n) \
- do { \
- ASSERT ((size) >= 1); \
- ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n)); \
+#define MPN_DECR_U(ptr, size, n) \
+ do { \
+ ASSERT ((size) >= 1); \
+ ASSERT_NOCARRY (mpn_sub_1 (ptr, ptr, size, n)); \
} while (0)
#else
#define MPN_DECR_U(ptr, size, n) mpn_decr_u (ptr, n)
#endif
-/* Structure for conversion between internal binary format and
- strings in base 2..36. */
+/* Structure for conversion between internal binary format and strings. */
struct bases
{
/* Number of digits in the conversion base that always fits in an mp_limb_t.
int chars_per_limb;
/* log(2)/log(conversion_base) */
- double chars_per_bit_exactly;
+ mp_limb_t logb2;
+
+ /* log(conversion_base)/log(2) */
+ mp_limb_t log2b;
/* base**chars_per_limb, i.e. the biggest number that fits a word, built by
factors of base. Exception: For 2, 4, 8, etc, big_base is log2(base),
__GMP_DECLSPEC extern const struct bases mp_bases[257];
+/* Compute the number of digits in base for nbits bits, making sure the result
+ is never too small. The two variants of the macro implement the same
+ function; the GT2 variant below works just for bases > 2. */
+#define DIGITS_IN_BASE_FROM_BITS(res, nbits, b) \
+ do { \
+ mp_limb_t _ph, _dummy; \
+ size_t _nbits = (nbits); \
+ umul_ppmm (_ph, _dummy, mp_bases[b].logb2, _nbits); \
+ _ph += (_dummy + _nbits < _dummy); \
+ res = _ph + 1; \
+ } while (0)
+#define DIGITS_IN_BASEGT2_FROM_BITS(res, nbits, b) \
+ do { \
+ mp_limb_t _ph, _dummy; \
+ size_t _nbits = (nbits); \
+ umul_ppmm (_ph, _dummy, mp_bases[b].logb2 + 1, _nbits); \
+ res = _ph + 1; \
+ } while (0)
+
/* For power of 2 bases this is exact. For other bases the result is either
exact or one too big.
limbs to increase the probability of being exact, but that doesn't seem
worth bothering with. */
-#define MPN_SIZEINBASE(result, ptr, size, base) \
- do { \
- int __lb_base, __cnt; \
- size_t __totbits; \
- \
- ASSERT ((size) >= 0); \
- ASSERT ((base) >= 2); \
- ASSERT ((base) < numberof (mp_bases)); \
- \
- /* Special case for X == 0. */ \
- if ((size) == 0) \
- (result) = 1; \
- else \
- { \
- /* Calculate the total number of significant bits of X. */ \
- count_leading_zeros (__cnt, (ptr)[(size)-1]); \
- __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
- \
- if (POW2_P (base)) \
- { \
- __lb_base = mp_bases[base].big_base; \
- (result) = (__totbits + __lb_base - 1) / __lb_base; \
- } \
- else \
- (result) = (size_t) \
- (__totbits * mp_bases[base].chars_per_bit_exactly) + 1; \
- } \
+#define MPN_SIZEINBASE(result, ptr, size, base) \
+ do { \
+ int __lb_base, __cnt; \
+ size_t __totbits; \
+ \
+ ASSERT ((size) >= 0); \
+ ASSERT ((base) >= 2); \
+ ASSERT ((base) < numberof (mp_bases)); \
+ \
+ /* Special case for X == 0. */ \
+ if ((size) == 0) \
+ (result) = 1; \
+ else \
+ { \
+ /* Calculate the total number of significant bits of X. */ \
+ count_leading_zeros (__cnt, (ptr)[(size)-1]); \
+ __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
+ \
+ if (POW2_P (base)) \
+ { \
+ __lb_base = mp_bases[base].big_base; \
+ (result) = (__totbits + __lb_base - 1) / __lb_base; \
+ } \
+ else \
+ { \
+ DIGITS_IN_BASEGT2_FROM_BITS (result, __totbits, base); \
+ } \
+ } \
} while (0)
-/* eliminate mp_bases lookups for base==16 */
-#define MPN_SIZEINBASE_16(result, ptr, size) \
- do { \
- int __cnt; \
- mp_size_t __totbits; \
- \
- ASSERT ((size) >= 0); \
- \
- /* Special case for X == 0. */ \
- if ((size) == 0) \
- (result) = 1; \
- else \
- { \
- /* Calculate the total number of significant bits of X. */ \
- count_leading_zeros (__cnt, (ptr)[(size)-1]); \
- __totbits = (size_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS);\
- (result) = (__totbits + 4 - 1) / 4; \
- } \
+#define MPN_SIZEINBASE_2EXP(result, ptr, size, base2exp) \
+ do { \
+ int __cnt; \
+ mp_bitcnt_t __totbits; \
+ ASSERT ((size) > 0); \
+ ASSERT ((ptr)[(size)-1] != 0); \
+ count_leading_zeros (__cnt, (ptr)[(size)-1]); \
+ __totbits = (mp_bitcnt_t) (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS); \
+ (result) = (__totbits + (base2exp)-1) / (base2exp); \
} while (0)
+
/* bit count to limb count, rounding up */
#define BITS_TO_LIMBS(n) (((n) + (GMP_NUMB_BITS - 1)) / GMP_NUMB_BITS)
#if BITS_PER_ULONG <= GMP_NUMB_BITS /* need one limb per ulong */
#define LIMBS_PER_ULONG 1
-#define MPN_SET_UI(zp, zn, u) \
- (zp)[0] = (u); \
+#define MPN_SET_UI(zp, zn, u) \
+ (zp)[0] = (u); \
(zn) = ((zp)[0] != 0);
-#define MPZ_FAKE_UI(z, zp, u) \
- (zp)[0] = (u); \
- PTR (z) = (zp); \
- SIZ (z) = ((zp)[0] != 0); \
+#define MPZ_FAKE_UI(z, zp, u) \
+ (zp)[0] = (u); \
+ PTR (z) = (zp); \
+ SIZ (z) = ((zp)[0] != 0); \
ASSERT_CODE (ALLOC (z) = 1);
#else /* need two limbs per ulong */
#define LIMBS_PER_ULONG 2
-#define MPN_SET_UI(zp, zn, u) \
- (zp)[0] = (u) & GMP_NUMB_MASK; \
- (zp)[1] = (u) >> GMP_NUMB_BITS; \
+#define MPN_SET_UI(zp, zn, u) \
+ (zp)[0] = (u) & GMP_NUMB_MASK; \
+ (zp)[1] = (u) >> GMP_NUMB_BITS; \
(zn) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0);
-#define MPZ_FAKE_UI(z, zp, u) \
- (zp)[0] = (u) & GMP_NUMB_MASK; \
- (zp)[1] = (u) >> GMP_NUMB_BITS; \
- SIZ (z) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0); \
- PTR (z) = (zp); \
+#define MPZ_FAKE_UI(z, zp, u) \
+ (zp)[0] = (u) & GMP_NUMB_MASK; \
+ (zp)[1] = (u) >> GMP_NUMB_BITS; \
+ SIZ (z) = ((zp)[1] != 0 ? 2 : (zp)[0] != 0 ? 1 : 0); \
+ PTR (z) = (zp); \
ASSERT_CODE (ALLOC (z) = 2);
#endif
shift on past versions too (in particular since an important use of
LIMB_HIGHBIT_TO_MASK is in udiv_qrnnd_preinv). */
-#define LIMB_HIGHBIT_TO_MASK(n) \
- (((mp_limb_signed_t) -1 >> 1) < 0 \
- ? (mp_limb_signed_t) (n) >> (GMP_LIMB_BITS - 1) \
+#define LIMB_HIGHBIT_TO_MASK(n) \
+ (((mp_limb_signed_t) -1 >> 1) < 0 \
+ ? (mp_limb_signed_t) (n) >> (GMP_LIMB_BITS - 1) \
: (n) & GMP_LIMB_HIGHBIT ? MP_LIMB_T_MAX : CNST_LIMB(0))
/* Use a library function for invert_limb, if available. */
-#define mpn_invert_limb __MPN(invert_limb)
-__GMP_DECLSPEC mp_limb_t mpn_invert_limb __GMP_PROTO ((mp_limb_t)) ATTRIBUTE_CONST;
+#define mpn_invert_limb __MPN(invert_limb)
+__GMP_DECLSPEC mp_limb_t mpn_invert_limb (mp_limb_t) ATTRIBUTE_CONST;
#if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
-#define invert_limb(invxl,xl) \
- do { \
- (invxl) = mpn_invert_limb (xl); \
+#define invert_limb(invxl,xl) \
+ do { \
+ (invxl) = mpn_invert_limb (xl); \
} while (0)
#endif
#ifndef invert_limb
-#define invert_limb(invxl,xl) \
- do { \
- mp_limb_t dummy; \
- ASSERT ((xl) != 0); \
- udiv_qrnnd (invxl, dummy, ~(xl), ~CNST_LIMB(0), xl); \
- } while (0)
-#endif
-
-#define invert_pi1(dinv, d1, d0) \
- do { \
- mp_limb_t v, p, t1, t0, mask; \
- invert_limb (v, d1); \
- p = d1 * v; \
- p += d0; \
- if (p < d0) \
- { \
- v--; \
- mask = -(p >= d1); \
- p -= d1; \
- v += mask; \
- p -= mask & d1; \
- } \
- umul_ppmm (t1, t0, d0, v); \
- p += t1; \
- if (p < t1) \
- { \
- v--; \
- if (UNLIKELY (p >= d1)) \
- { \
- if (p > d1 || t0 >= d0) \
- v--; \
- } \
- } \
- (dinv).inv32 = v; \
+#define invert_limb(invxl,xl) \
+ do { \
+ mp_limb_t _dummy; \
+ ASSERT ((xl) != 0); \
+ udiv_qrnnd (invxl, _dummy, ~(xl), ~CNST_LIMB(0), xl); \
} while (0)
-
-
-#ifndef udiv_qrnnd_preinv
-#define udiv_qrnnd_preinv udiv_qrnnd_preinv3
#endif
-/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
- limb not larger than (2**(2*GMP_LIMB_BITS))/D - (2**GMP_LIMB_BITS).
- If this would yield overflow, DI should be the largest possible number
- (i.e., only ones). For correct operation, the most significant bit of D
- has to be set. Put the quotient in Q and the remainder in R. */
-#define udiv_qrnnd_preinv1(q, r, nh, nl, d, di) \
+#define invert_pi1(dinv, d1, d0) \
do { \
- mp_limb_t _q, _ql, _r; \
- mp_limb_t _xh, _xl; \
- ASSERT ((d) != 0); \
- umul_ppmm (_q, _ql, (nh), (di)); \
- _q += (nh); /* Compensate, di is 2**GMP_LIMB_BITS too small */ \
- umul_ppmm (_xh, _xl, _q, (d)); \
- sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl); \
- if (_xh != 0) \
+ mp_limb_t _v, _p, _t1, _t0, _mask; \
+ invert_limb (_v, d1); \
+ _p = (d1) * _v; \
+ _p += (d0); \
+ if (_p < (d0)) \
{ \
- sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \
- _q += 1; \
- if (_xh != 0) \
- { \
- _r -= (d); \
- _q += 1; \
- } \
+ _v--; \
+ _mask = -(mp_limb_t) (_p >= (d1)); \
+ _p -= (d1); \
+ _v += _mask; \
+ _p -= _mask & (d1); \
} \
- if (_r >= (d)) \
+ umul_ppmm (_t1, _t0, d0, _v); \
+ _p += _t1; \
+ if (_p < _t1) \
{ \
- _r -= (d); \
- _q += 1; \
+ _v--; \
+ if (UNLIKELY (_p >= (d1))) \
+ { \
+ if (_p > (d1) || _t0 >= (d0)) \
+ _v--; \
+ } \
} \
- (r) = _r; \
- (q) = _q; \
- } while (0)
-
-/* Like udiv_qrnnd_preinv, but branch-free. */
-#define udiv_qrnnd_preinv2(q, r, nh, nl, d, di) \
- do { \
- mp_limb_t _n2, _n10, _nmask, _nadj, _q1; \
- mp_limb_t _xh, _xl; \
- _n2 = (nh); \
- _n10 = (nl); \
- _nmask = LIMB_HIGHBIT_TO_MASK (_n10); \
- _nadj = _n10 + (_nmask & (d)); \
- umul_ppmm (_xh, _xl, di, _n2 - _nmask); \
- add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj); \
- _q1 = ~_xh; \
- umul_ppmm (_xh, _xl, _q1, d); \
- add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
- _xh -= (d); /* xh = 0 or -1 */ \
- (r) = _xl + ((d) & _xh); \
- (q) = _xh - _q1; \
- } while (0)
-
-/* Like udiv_qrnnd_preinv2, but for for any value D. DNORM is D shifted left
- so that its most significant bit is set. LGUP is ceil(log2(D)). */
-#define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \
- do { \
- mp_limb_t _n2, _n10, _nmask, _nadj, _q1; \
- mp_limb_t _xh, _xl; \
- _n2 = ((nh) << (GMP_LIMB_BITS - (lgup))) + ((nl) >> 1 >> (l - 1)); \
- _n10 = (nl) << (GMP_LIMB_BITS - (lgup)); \
- _nmask = LIMB_HIGHBIT_TO_MASK (_n10); \
- _nadj = _n10 + (_nmask & (dnorm)); \
- umul_ppmm (_xh, _xl, di, _n2 - _nmask); \
- add_ssaaaa (_xh, _xl, _xh, _xl, _n2, _nadj); \
- _q1 = ~_xh; \
- umul_ppmm (_xh, _xl, _q1, d); \
- add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
- _xh -= (d); \
- (r) = _xl + ((d) & _xh); \
- (q) = _xh - _q1; \
+ (dinv).inv32 = _v; \
} while (0)
-/* udiv_qrnnd_preinv3 -- Based on work by Niels Möller and Torbjörn Granlund.
+/* udiv_qrnnd_preinv -- Based on work by Niels Möller and Torbjörn Granlund.
We write things strangely below, to help gcc. A more straightforward
version:
-
- _r = (nl) - _qh * (d);
- _t = _r + (d);
- if (_r >= _ql)
- {
- _qh--;
- _r = _t;
- }
-
+ _r = (nl) - _qh * (d);
+ _t = _r + (d);
+ if (_r >= _ql)
+ {
+ _qh--;
+ _r = _t;
+ }
For one operation shorter critical path, one may want to use this form:
-
- _p = _qh * (d)
- _s = (nl) + (d);
- _r = (nl) - _p;
- _t = _s - _p;
- if (_r >= _ql)
- {
- _qh--;
- _r = _t;
- }
+ _p = _qh * (d)
+ _s = (nl) + (d);
+ _r = (nl) - _p;
+ _t = _s - _p;
+ if (_r >= _ql)
+ {
+ _qh--;
+ _r = _t;
+ }
*/
-#define udiv_qrnnd_preinv3(q, r, nh, nl, d, di) \
+#define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \
do { \
- mp_limb_t _qh, _ql, _r; \
+ mp_limb_t _qh, _ql, _r, _mask; \
umul_ppmm (_qh, _ql, (nh), (di)); \
if (__builtin_constant_p (nl) && (nl) == 0) \
- _qh += (nh) + 1; \
- else \
- add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \
- _r = (nl) - _qh * (d); \
- if (_r > _ql) /* both > and >= should be OK */ \
{ \
- _r += (d); \
- _qh--; \
+ _qh += (nh) + 1; \
+ _r = - _qh * (d); \
+ _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \
+ _qh += _mask; \
+ _r += _mask & (d); \
} \
- if (UNLIKELY (_r >= (d))) \
+ else \
{ \
- _r -= (d); \
- _qh++; \
+ add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \
+ _r = (nl) - _qh * (d); \
+ _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \
+ _qh += _mask; \
+ _r += _mask & (d); \
+ if (UNLIKELY (_r >= (d))) \
+ { \
+ _r -= (d); \
+ _qh++; \
+ } \
} \
(r) = _r; \
(q) = _qh; \
} while (0)
-/* Compute r = nh*B mod d, where di is the inverse of d. */
-#define udiv_rnd_preinv(r, nh, d, di) \
+/* Dividing (NH, NL) by D, returning the remainder only. Unlike
+ udiv_qrnnd_preinv, works also for the case NH == D, where the
+ quotient doesn't quite fit in a single limb. */
+#define udiv_rnnd_preinv(r, nh, nl, d, di) \
do { \
- mp_limb_t _qh, _ql, _r; \
+ mp_limb_t _qh, _ql, _r, _mask; \
umul_ppmm (_qh, _ql, (nh), (di)); \
- _qh += (nh) + 1; \
- _r = - _qh * (d); \
- if (_r > _ql) \
- _r += (d); \
+ if (__builtin_constant_p (nl) && (nl) == 0) \
+ { \
+ _r = ~(_qh + (nh)) * (d); \
+ _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \
+ _r += _mask & (d); \
+ } \
+ else \
+ { \
+ add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \
+ _r = (nl) - _qh * (d); \
+ _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \
+ _r += _mask & (d); \
+ if (UNLIKELY (_r >= (d))) \
+ _r -= (d); \
+ } \
(r) = _r; \
} while (0)
\
/* Compute the two most significant limbs of n - q'd */ \
(r1) = (n1) - (d1) * (q); \
- (r0) = (n0); \
- sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \
+ sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0)); \
umul_ppmm (_t1, _t0, (d0), (q)); \
sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0); \
(q)++; \
#ifndef mpn_preinv_divrem_1 /* if not done with cpuvec in a fat binary */
#define mpn_preinv_divrem_1 __MPN(preinv_divrem_1)
-__GMP_DECLSPEC mp_limb_t mpn_preinv_divrem_1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int));
+__GMP_DECLSPEC mp_limb_t mpn_preinv_divrem_1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int);
#endif
/* This selection may seem backwards. The reason mpn_mod_1 typically takes
over for larger sizes is that it uses the mod_1_1 function. */
-#define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse) \
+#define MPN_MOD_OR_PREINV_MOD_1(src,size,divisor,inverse) \
(BELOW_THRESHOLD (size, PREINV_MOD_1_TO_MOD_1_THRESHOLD) \
? mpn_preinv_mod_1 (src, size, divisor, inverse) \
: mpn_mod_1 (src, size, divisor))
#ifndef mpn_mod_34lsub1 /* if not done with cpuvec in a fat binary */
-#define mpn_mod_34lsub1 __MPN(mod_34lsub1)
-__GMP_DECLSPEC mp_limb_t mpn_mod_34lsub1 __GMP_PROTO ((mp_srcptr, mp_size_t)) __GMP_ATTRIBUTE_PURE;
+#define mpn_mod_34lsub1 __MPN(mod_34lsub1)
+__GMP_DECLSPEC mp_limb_t mpn_mod_34lsub1 (mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
#endif
#ifndef mpn_divexact_1 /* if not done with cpuvec in a fat binary */
#define mpn_divexact_1 __MPN(divexact_1)
-__GMP_DECLSPEC void mpn_divexact_1 __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
-#endif
-
-#define MPN_DIVREM_OR_DIVEXACT_1(dst, src, size, divisor) \
- do { \
- if (BELOW_THRESHOLD (size, DIVEXACT_1_THRESHOLD)) \
- ASSERT_NOCARRY (mpn_divrem_1 (dst, (mp_size_t) 0, src, size, divisor)); \
- else \
- { \
- ASSERT (mpn_mod_1 (src, size, divisor) == 0); \
- mpn_divexact_1 (dst, src, size, divisor); \
- } \
+__GMP_DECLSPEC void mpn_divexact_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+#endif
+
+#define MPN_DIVREM_OR_DIVEXACT_1(rp, up, n, d) \
+ do { \
+ if (BELOW_THRESHOLD (n, DIVEXACT_1_THRESHOLD)) \
+ ASSERT_NOCARRY (mpn_divrem_1 (rp, (mp_size_t) 0, up, n, d)); \
+ else \
+ { \
+ ASSERT (mpn_mod_1 (up, n, d) == 0); \
+ mpn_divexact_1 (rp, up, n, d); \
+ } \
} while (0)
#ifndef mpn_modexact_1c_odd /* if not done with cpuvec in a fat binary */
-#define mpn_modexact_1c_odd __MPN(modexact_1c_odd)
-__GMP_DECLSPEC mp_limb_t mpn_modexact_1c_odd __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
+#define mpn_modexact_1c_odd __MPN(modexact_1c_odd)
+__GMP_DECLSPEC mp_limb_t mpn_modexact_1c_odd (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
#endif
#if HAVE_NATIVE_mpn_modexact_1_odd
#define mpn_modexact_1_odd __MPN(modexact_1_odd)
-__GMP_DECLSPEC mp_limb_t mpn_modexact_1_odd __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC mp_limb_t mpn_modexact_1_odd (mp_srcptr, mp_size_t, mp_limb_t) __GMP_ATTRIBUTE_PURE;
#else
#define mpn_modexact_1_odd(src,size,divisor) \
mpn_modexact_1c_odd (src, size, divisor, CNST_LIMB(0))
\
if ((a) <= (d)) \
{ \
- /* small a is reasonably likely */ \
- (r) = (d) - (a); \
+ /* small a is reasonably likely */ \
+ (r) = (d) - (a); \
} \
else \
{ \
- unsigned __twos; \
- mp_limb_t __dnorm; \
- count_leading_zeros (__twos, d); \
- __twos -= GMP_NAIL_BITS; \
- __dnorm = (d) << __twos; \
- (r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a); \
+ unsigned __twos; \
+ mp_limb_t __dnorm; \
+ count_leading_zeros (__twos, d); \
+ __twos -= GMP_NAIL_BITS; \
+ __dnorm = (d) << __twos; \
+ (r) = ((a) <= __dnorm ? __dnorm : 2*__dnorm) - (a); \
} \
\
ASSERT_LIMB (r); \
int __p = 0; \
do \
{ \
- __p ^= 0x96696996L >> (__n & 0x1F); \
- __n >>= 5; \
+ __p ^= 0x96696996L >> (__n & 0x1F); \
+ __n >>= 5; \
} \
while (__n != 0); \
\
#if ! defined (BSWAP_LIMB)
#if GMP_LIMB_BITS == 8
-#define BSWAP_LIMB(dst, src) \
+#define BSWAP_LIMB(dst, src) \
do { (dst) = (src); } while (0)
#endif
#if GMP_LIMB_BITS == 16
-#define BSWAP_LIMB(dst, src) \
- do { \
- (dst) = ((src) << 8) + ((src) >> 8); \
+#define BSWAP_LIMB(dst, src) \
+ do { \
+ (dst) = ((src) << 8) + ((src) >> 8); \
} while (0)
#endif
#if GMP_LIMB_BITS == 32
-#define BSWAP_LIMB(dst, src) \
- do { \
- (dst) = \
- ((src) << 24) \
- + (((src) & 0xFF00) << 8) \
- + (((src) >> 8) & 0xFF00) \
- + ((src) >> 24); \
+#define BSWAP_LIMB(dst, src) \
+ do { \
+ (dst) = \
+ ((src) << 24) \
+ + (((src) & 0xFF00) << 8) \
+ + (((src) >> 8) & 0xFF00) \
+ + ((src) >> 24); \
} while (0)
#endif
#if GMP_LIMB_BITS == 64
-#define BSWAP_LIMB(dst, src) \
- do { \
- (dst) = \
- ((src) << 56) \
- + (((src) & 0xFF00) << 40) \
- + (((src) & 0xFF0000) << 24) \
- + (((src) & 0xFF000000) << 8) \
- + (((src) >> 8) & 0xFF000000) \
- + (((src) >> 24) & 0xFF0000) \
- + (((src) >> 40) & 0xFF00) \
- + ((src) >> 56); \
+#define BSWAP_LIMB(dst, src) \
+ do { \
+ (dst) = \
+ ((src) << 56) \
+ + (((src) & 0xFF00) << 40) \
+ + (((src) & 0xFF0000) << 24) \
+ + (((src) & 0xFF000000) << 8) \
+ + (((src) >> 8) & 0xFF000000) \
+ + (((src) >> 24) & 0xFF0000) \
+ + (((src) >> 40) & 0xFF00) \
+ + ((src) >> 56); \
} while (0)
#endif
#endif
#if ! defined (BSWAP_LIMB)
-#define BSWAP_LIMB(dst, src) \
- do { \
- mp_limb_t __bswapl_src = (src); \
- mp_limb_t __dst = 0; \
- int __i; \
- for (__i = 0; __i < BYTES_PER_MP_LIMB; __i++) \
- { \
- __dst = (__dst << 8) | (__bswapl_src & 0xFF); \
- __bswapl_src >>= 8; \
- } \
- (dst) = __dst; \
+#define BSWAP_LIMB(dst, src) \
+ do { \
+ mp_limb_t __bswapl_src = (src); \
+ mp_limb_t __dstl = 0; \
+ int __i; \
+ for (__i = 0; __i < BYTES_PER_MP_LIMB; __i++) \
+ { \
+ __dstl = (__dstl << 8) | (__bswapl_src & 0xFF); \
+ __bswapl_src >>= 8; \
+ } \
+ (dst) = __dstl; \
} while (0)
#endif
/* Apparently lwbrx might be slow on some PowerPC chips, so restrict it to
those we know are fast. */
-#if defined (__GNUC__) && ! defined (NO_ASM) \
- && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN \
- && (HAVE_HOST_CPU_powerpc604 \
- || HAVE_HOST_CPU_powerpc604e \
- || HAVE_HOST_CPU_powerpc750 \
+#if defined (__GNUC__) && ! defined (NO_ASM) \
+ && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN \
+ && (HAVE_HOST_CPU_powerpc604 \
+ || HAVE_HOST_CPU_powerpc604e \
+ || HAVE_HOST_CPU_powerpc750 \
|| HAVE_HOST_CPU_powerpc7400)
#define BSWAP_LIMB_FETCH(limb, src) \
do { \
/* On the same basis that lwbrx might be slow, restrict stwbrx to those we
know are fast. FIXME: Is this necessary? */
-#if defined (__GNUC__) && ! defined (NO_ASM) \
- && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN \
- && (HAVE_HOST_CPU_powerpc604 \
- || HAVE_HOST_CPU_powerpc604e \
- || HAVE_HOST_CPU_powerpc750 \
+#if defined (__GNUC__) && ! defined (NO_ASM) \
+ && GMP_LIMB_BITS == 32 && HAVE_LIMB_BIG_ENDIAN \
+ && (HAVE_HOST_CPU_powerpc604 \
+ || HAVE_HOST_CPU_powerpc604e \
+ || HAVE_HOST_CPU_powerpc750 \
|| HAVE_HOST_CPU_powerpc7400)
#define BSWAP_LIMB_STORE(dst, limb) \
do { \
/* Byte swap limbs from {src,size} and store at {dst,size}. */
-#define MPN_BSWAP(dst, src, size) \
- do { \
- mp_ptr __dst = (dst); \
- mp_srcptr __src = (src); \
- mp_size_t __size = (size); \
- mp_size_t __i; \
- ASSERT ((size) >= 0); \
- ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); \
- CRAY_Pragma ("_CRI ivdep"); \
- for (__i = 0; __i < __size; __i++) \
- { \
- BSWAP_LIMB_FETCH (*__dst, __src); \
- __dst++; \
- __src++; \
- } \
+#define MPN_BSWAP(dst, src, size) \
+ do { \
+ mp_ptr __dst = (dst); \
+ mp_srcptr __src = (src); \
+ mp_size_t __size = (size); \
+ mp_size_t __i; \
+ ASSERT ((size) >= 0); \
+ ASSERT (MPN_SAME_OR_SEPARATE_P (dst, src, size)); \
+ CRAY_Pragma ("_CRI ivdep"); \
+ for (__i = 0; __i < __size; __i++) \
+ { \
+ BSWAP_LIMB_FETCH (*__dst, __src); \
+ __dst++; \
+ __src++; \
+ } \
} while (0)
/* Byte swap limbs from {dst,size} and store in reverse order at {src,size}. */
-#define MPN_BSWAP_REVERSE(dst, src, size) \
- do { \
- mp_ptr __dst = (dst); \
- mp_size_t __size = (size); \
- mp_srcptr __src = (src) + __size - 1; \
- mp_size_t __i; \
- ASSERT ((size) >= 0); \
- ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \
- CRAY_Pragma ("_CRI ivdep"); \
- for (__i = 0; __i < __size; __i++) \
- { \
- BSWAP_LIMB_FETCH (*__dst, __src); \
- __dst++; \
- __src--; \
- } \
+#define MPN_BSWAP_REVERSE(dst, src, size) \
+ do { \
+ mp_ptr __dst = (dst); \
+ mp_size_t __size = (size); \
+ mp_srcptr __src = (src) + __size - 1; \
+ mp_size_t __i; \
+ ASSERT ((size) >= 0); \
+ ASSERT (! MPN_OVERLAP_P (dst, size, src, size)); \
+ CRAY_Pragma ("_CRI ivdep"); \
+ for (__i = 0; __i < __size; __i++) \
+ { \
+ BSWAP_LIMB_FETCH (*__dst, __src); \
+ __dst++; \
+ __src--; \
+ } \
} while (0)
/* Cray intrinsic. */
#ifdef _CRAY
-#define popc_limb(result, input) \
- do { \
- (result) = _popcnt (input); \
+#define popc_limb(result, input) \
+ do { \
+ (result) = _popcnt (input); \
} while (0)
#endif
mp_limb_t __x = (input); \
__x -= (__x >> 1) & MP_LIMB_T_MAX/3; \
__x = ((__x >> 2) & MP_LIMB_T_MAX/5) + (__x & MP_LIMB_T_MAX/5); \
- __x = ((__x >> 4) + __x) & MP_LIMB_T_MAX/17; \
- (result) = __x & 0xff; \
+ __x = ((__x >> 4) + __x); \
+ (result) = __x & 0x0f; \
} while (0)
#endif
};
#endif
+#if HAVE_DOUBLE_VAX_D
+union double_extract
+{
+ struct
+ {
+ gmp_uint_least32_t man3:7; /* highest 7 bits */
+ gmp_uint_least32_t exp:8; /* excess-128 exponent */
+ gmp_uint_least32_t sig:1;
+ gmp_uint_least32_t man2:16;
+ gmp_uint_least32_t man1:16;
+ gmp_uint_least32_t man0:16; /* lowest 16 bits */
+ } s;
+ double d;
+};
+#endif
/* Use (4.0 * ...) instead of (2.0 * ...) to work around buggy compilers
that don't convert ulong->double correctly (eg. SunOS 4 native cc). */
We assume doubles have 53 mantissa bits. */
#define LIMBS_PER_DOUBLE ((53 + GMP_NUMB_BITS - 2) / GMP_NUMB_BITS + 1)
-__GMP_DECLSPEC int __gmp_extract_double __GMP_PROTO ((mp_ptr, double));
+__GMP_DECLSPEC int __gmp_extract_double (mp_ptr, double);
#define mpn_get_d __gmpn_get_d
-__GMP_DECLSPEC double mpn_get_d __GMP_PROTO ((mp_srcptr, mp_size_t, mp_size_t, long)) __GMP_ATTRIBUTE_PURE;
+__GMP_DECLSPEC double mpn_get_d (mp_srcptr, mp_size_t, mp_size_t, long) __GMP_ATTRIBUTE_PURE;
/* DOUBLE_NAN_INF_ACTION executes code a_nan if x is a NaN, or executes
branch prediction. */
#if _GMP_IEEE_FLOATS
-#define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \
- do { \
- union ieee_double_extract u; \
- u.d = (x); \
- if (UNLIKELY (u.s.exp == 0x7FF)) \
- { \
- if (u.s.manl == 0 && u.s.manh == 0) \
- { a_inf; } \
- else \
- { a_nan; } \
- } \
+#define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \
+ do { \
+ union ieee_double_extract u; \
+ u.d = (x); \
+ if (UNLIKELY (u.s.exp == 0x7FF)) \
+ { \
+ if (u.s.manl == 0 && u.s.manh == 0) \
+ { a_inf; } \
+ else \
+ { a_nan; } \
+ } \
} while (0)
#endif
/* Unknown format, try something generic.
NaN should be "unordered", so x!=x.
Inf should be bigger than DBL_MAX. */
-#define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \
- do { \
- { \
- if (UNLIKELY ((x) != (x))) \
- { a_nan; } \
- else if (UNLIKELY ((x) > DBL_MAX || (x) < -DBL_MAX)) \
- { a_inf; } \
- } \
+#define DOUBLE_NAN_INF_ACTION(x, a_nan, a_inf) \
+ do { \
+ { \
+ if (UNLIKELY ((x) != (x))) \
+ { a_nan; } \
+ else if (UNLIKELY ((x) > DBL_MAX || (x) < -DBL_MAX)) \
+ { a_inf; } \
+ } \
} while (0)
#endif
#endif
+__GMP_DECLSPEC extern const unsigned char __gmp_digit_value_tab[];
+
__GMP_DECLSPEC extern int __gmp_junk;
__GMP_DECLSPEC extern const int __gmp_0;
-__GMP_DECLSPEC void __gmp_exception __GMP_PROTO ((int)) ATTRIBUTE_NORETURN;
-__GMP_DECLSPEC void __gmp_divide_by_zero __GMP_PROTO ((void)) ATTRIBUTE_NORETURN;
-__GMP_DECLSPEC void __gmp_sqrt_of_negative __GMP_PROTO ((void)) ATTRIBUTE_NORETURN;
-__GMP_DECLSPEC void __gmp_invalid_operation __GMP_PROTO ((void)) ATTRIBUTE_NORETURN;
+__GMP_DECLSPEC void __gmp_exception (int) ATTRIBUTE_NORETURN;
+__GMP_DECLSPEC void __gmp_divide_by_zero (void) ATTRIBUTE_NORETURN;
+__GMP_DECLSPEC void __gmp_sqrt_of_negative (void) ATTRIBUTE_NORETURN;
+__GMP_DECLSPEC void __gmp_invalid_operation (void) ATTRIBUTE_NORETURN;
#define GMP_ERROR(code) __gmp_exception (code)
#define DIVIDE_BY_ZERO __gmp_divide_by_zero ()
#define SQRT_OF_NEGATIVE __gmp_sqrt_of_negative ()
#if defined _LONG_LONG_LIMB
-#if __GMP_HAVE_TOKEN_PASTE
#define CNST_LIMB(C) ((mp_limb_t) C##LL)
-#else
-#define CNST_LIMB(C) ((mp_limb_t) C/**/LL)
-#endif
#else /* not _LONG_LONG_LIMB */
-#if __GMP_HAVE_TOKEN_PASTE
#define CNST_LIMB(C) ((mp_limb_t) C##L)
-#else
-#define CNST_LIMB(C) ((mp_limb_t) C/**/L)
-#endif
#endif /* _LONG_LONG_LIMB */
/* Stuff used by mpn/generic/perfsqr.c and mpz/prime_p.c */
#define PP_FIRST_OMITTED 3
#endif
-
-
/* BIT1 means a result value in bit 1 (second least significant bit), with a
zero bit representing +1 and a one bit representing -1. Bits other than
bit 1 are garbage. These are meant to be kept in "int"s, and casts are
/* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */
#define JACOBI_U0(a) ((a) == 1)
+/* FIXME: JACOBI_LS0 and JACOBI_0LS are the same, so delete one and
+ come up with a better name. */
+
/* (a/0), with a given by low and size;
is 1 if a=+/-1, 0 otherwise */
#define JACOBI_LS0(alow,asize) \
decrementing b_size. b_low should be b_ptr[0] on entry, and will be
updated for the new b_ptr. result_bit1 is updated according to the
factors of 2 stripped, as per (a/2). */
-#define JACOBI_STRIP_LOW_ZEROS(result_bit1, a, b_ptr, b_size, b_low) \
- do { \
- ASSERT ((b_size) >= 1); \
- ASSERT ((b_low) == (b_ptr)[0]); \
- \
- while (UNLIKELY ((b_low) == 0)) \
- { \
- (b_size)--; \
- ASSERT ((b_size) >= 1); \
- (b_ptr)++; \
- (b_low) = *(b_ptr); \
- \
- ASSERT (((a) & 1) != 0); \
- if ((GMP_NUMB_BITS % 2) == 1) \
- (result_bit1) ^= JACOBI_TWO_U_BIT1(a); \
- } \
+#define JACOBI_STRIP_LOW_ZEROS(result_bit1, a, b_ptr, b_size, b_low) \
+ do { \
+ ASSERT ((b_size) >= 1); \
+ ASSERT ((b_low) == (b_ptr)[0]); \
+ \
+ while (UNLIKELY ((b_low) == 0)) \
+ { \
+ (b_size)--; \
+ ASSERT ((b_size) >= 1); \
+ (b_ptr)++; \
+ (b_low) = *(b_ptr); \
+ \
+ ASSERT (((a) & 1) != 0); \
+ if ((GMP_NUMB_BITS % 2) == 1) \
+ (result_bit1) ^= JACOBI_TWO_U_BIT1(a); \
+ } \
} while (0)
/* Set a_rem to {a_ptr,a_size} reduced modulo b, either using mod_1 or
or not skip a divide step, or something. */
#define JACOBI_MOD_OR_MODEXACT_1_ODD(result_bit1, a_rem, a_ptr, a_size, b) \
- do { \
- mp_srcptr __a_ptr = (a_ptr); \
- mp_size_t __a_size = (a_size); \
- mp_limb_t __b = (b); \
- \
- ASSERT (__a_size >= 1); \
- ASSERT (__b & 1); \
- \
- if ((GMP_NUMB_BITS % 2) != 0 \
- || ABOVE_THRESHOLD (__a_size, BMOD_1_TO_MOD_1_THRESHOLD)) \
- { \
- (a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b); \
- } \
- else \
- { \
- (result_bit1) ^= JACOBI_N1B_BIT1 (__b); \
- (a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b); \
- } \
+ do { \
+ mp_srcptr __a_ptr = (a_ptr); \
+ mp_size_t __a_size = (a_size); \
+ mp_limb_t __b = (b); \
+ \
+ ASSERT (__a_size >= 1); \
+ ASSERT (__b & 1); \
+ \
+ if ((GMP_NUMB_BITS % 2) != 0 \
+ || ABOVE_THRESHOLD (__a_size, BMOD_1_TO_MOD_1_THRESHOLD)) \
+ { \
+ (a_rem) = mpn_mod_1 (__a_ptr, __a_size, __b); \
+ } \
+ else \
+ { \
+ (result_bit1) ^= JACOBI_N1B_BIT1 (__b); \
+ (a_rem) = mpn_modexact_1_odd (__a_ptr, __a_size, __b); \
+ } \
} while (0)
+/* State for the Jacobi computation using Lehmer. */
+#define jacobi_table __gmp_jacobi_table
+__GMP_DECLSPEC extern const unsigned char jacobi_table[208];
+
+/* Bit layout for the initial state. b must be odd.
+
+ 3 2 1 0
+ +--+--+--+--+
+ |a1|a0|b1| s|
+ +--+--+--+--+
+
+ */
+static inline unsigned
+mpn_jacobi_init (unsigned a, unsigned b, unsigned s)
+{
+ ASSERT (b & 1);
+ ASSERT (s <= 1);
+ return ((a & 3) << 2) + (b & 2) + s;
+}
+
+static inline int
+mpn_jacobi_finish (unsigned bits)
+{
+ /* (a, b) = (1,0) or (0,1) */
+ ASSERT ( (bits & 14) == 0);
+
+ return 1-2*(bits & 1);
+}
+
+static inline unsigned
+mpn_jacobi_update (unsigned bits, unsigned denominator, unsigned q)
+{
+ /* FIXME: Could halve table size by not including the e bit in the
+ * index, and instead xor when updating. Then the lookup would be
+ * like
+ *
+ * bits ^= table[((bits & 30) << 2) + (denominator << 2) + q];
+ */
+
+ ASSERT (bits < 26);
+ ASSERT (denominator < 2);
+ ASSERT (q < 4);
+
+ /* For almost all calls, denominator is constant and quite often q
+ is constant too. So use addition rather than or, so the compiler
+ can put the constant part can into the offset of an indexed
+ addressing instruction.
+
+ With constant denominator, the below table lookup is compiled to
+
+ C Constant q = 1, constant denominator = 1
+ movzbl table+5(%eax,8), %eax
+
+ or
+
+ C q in %edx, constant denominator = 1
+ movzbl table+4(%edx,%eax,8), %eax
+
+ One could maintain the state preshifted 3 bits, to save a shift
+ here, but at least on x86, that's no real saving.
+ */
+ return bits = jacobi_table[(bits << 3) + (denominator << 2) + q];
+}
+
/* Matrix multiplication */
#define mpn_matrix22_mul __MPN(matrix22_mul)
-__GMP_DECLSPEC void mpn_matrix22_mul __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_matrix22_mul (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_matrix22_mul_strassen __MPN(matrix22_mul_strassen)
-__GMP_DECLSPEC void mpn_matrix22_mul_strassen __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_matrix22_mul_strassen (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
#define mpn_matrix22_mul_itch __MPN(matrix22_mul_itch)
-__GMP_DECLSPEC mp_size_t mpn_matrix22_mul_itch __GMP_PROTO ((mp_size_t, mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_matrix22_mul_itch (mp_size_t, mp_size_t);
#ifndef MATRIX22_STRASSEN_THRESHOLD
#define MATRIX22_STRASSEN_THRESHOLD 30
};
#define mpn_hgcd2 __MPN (hgcd2)
-__GMP_DECLSPEC int mpn_hgcd2 __GMP_PROTO ((mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *));
+__GMP_DECLSPEC int mpn_hgcd2 (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *);
#define mpn_hgcd_mul_matrix1_vector __MPN (hgcd_mul_matrix1_vector)
-__GMP_DECLSPEC mp_size_t mpn_hgcd_mul_matrix1_vector __GMP_PROTO ((const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_hgcd_mul_matrix1_vector (const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t);
+
+#define mpn_matrix22_mul1_inverse_vector __MPN (matrix22_mul1_inverse_vector)
+__GMP_DECLSPEC mp_size_t mpn_matrix22_mul1_inverse_vector (const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t);
-#define mpn_hgcd_mul_matrix1_inverse_vector __MPN (hgcd_mul_matrix1_inverse_vector)
-__GMP_DECLSPEC mp_size_t mpn_hgcd_mul_matrix1_inverse_vector __GMP_PROTO ((const struct hgcd_matrix1 *, mp_ptr, mp_srcptr, mp_ptr, mp_size_t));
+#define mpn_hgcd2_jacobi __MPN (hgcd2_jacobi)
+__GMP_DECLSPEC int mpn_hgcd2_jacobi (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t, struct hgcd_matrix1 *, unsigned *);
struct hgcd_matrix
{
#define MPN_HGCD_MATRIX_INIT_ITCH(n) (4 * ((n+1)/2 + 1))
#define mpn_hgcd_matrix_init __MPN (hgcd_matrix_init)
-__GMP_DECLSPEC void mpn_hgcd_matrix_init __GMP_PROTO ((struct hgcd_matrix *, mp_size_t, mp_ptr));
+__GMP_DECLSPEC void mpn_hgcd_matrix_init (struct hgcd_matrix *, mp_size_t, mp_ptr);
+
+#define mpn_hgcd_matrix_update_q __MPN (hgcd_matrix_update_q)
+__GMP_DECLSPEC void mpn_hgcd_matrix_update_q (struct hgcd_matrix *, mp_srcptr, mp_size_t, unsigned, mp_ptr);
+
+#define mpn_hgcd_matrix_mul_1 __MPN (hgcd_matrix_mul_1)
+__GMP_DECLSPEC void mpn_hgcd_matrix_mul_1 (struct hgcd_matrix *, const struct hgcd_matrix1 *, mp_ptr);
#define mpn_hgcd_matrix_mul __MPN (hgcd_matrix_mul)
-__GMP_DECLSPEC void mpn_hgcd_matrix_mul __GMP_PROTO ((struct hgcd_matrix *, const struct hgcd_matrix *, mp_ptr));
+__GMP_DECLSPEC void mpn_hgcd_matrix_mul (struct hgcd_matrix *, const struct hgcd_matrix *, mp_ptr);
#define mpn_hgcd_matrix_adjust __MPN (hgcd_matrix_adjust)
-__GMP_DECLSPEC mp_size_t mpn_hgcd_matrix_adjust __GMP_PROTO ((struct hgcd_matrix *, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC mp_size_t mpn_hgcd_matrix_adjust (const struct hgcd_matrix *, mp_size_t, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
+
+#define mpn_hgcd_step __MPN(hgcd_step)
+__GMP_DECLSPEC mp_size_t mpn_hgcd_step (mp_size_t, mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
+
+#define mpn_hgcd_reduce __MPN(hgcd_reduce)
+__GMP_DECLSPEC mp_size_t mpn_hgcd_reduce (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr);
+
+#define mpn_hgcd_reduce_itch __MPN(hgcd_reduce_itch)
+__GMP_DECLSPEC mp_size_t mpn_hgcd_reduce_itch (mp_size_t, mp_size_t);
#define mpn_hgcd_itch __MPN (hgcd_itch)
-__GMP_DECLSPEC mp_size_t mpn_hgcd_itch __GMP_PROTO ((mp_size_t));
+__GMP_DECLSPEC mp_size_t mpn_hgcd_itch (mp_size_t);
#define mpn_hgcd __MPN (hgcd)
-__GMP_DECLSPEC mp_size_t mpn_hgcd __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
+__GMP_DECLSPEC mp_size_t mpn_hgcd (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
+
+#define mpn_hgcd_appr_itch __MPN (hgcd_appr_itch)
+__GMP_DECLSPEC mp_size_t mpn_hgcd_appr_itch (mp_size_t);
+
+#define mpn_hgcd_appr __MPN (hgcd_appr)
+__GMP_DECLSPEC int mpn_hgcd_appr (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
-#define MPN_HGCD_LEHMER_ITCH(n) (n)
+#define mpn_hgcd_jacobi __MPN (hgcd_jacobi)
+__GMP_DECLSPEC mp_size_t mpn_hgcd_jacobi (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, unsigned *, mp_ptr);
-#define mpn_hgcd_lehmer __MPN (hgcd_lehmer)
-__GMP_DECLSPEC mp_size_t mpn_hgcd_lehmer __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr));
+typedef void gcd_subdiv_step_hook(void *, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t, int);
/* Needs storage for the quotient */
#define MPN_GCD_SUBDIV_STEP_ITCH(n) (n)
#define mpn_gcd_subdiv_step __MPN(gcd_subdiv_step)
-__GMP_DECLSPEC mp_size_t mpn_gcd_subdiv_step __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC mp_size_t mpn_gcd_subdiv_step (mp_ptr, mp_ptr, mp_size_t, mp_size_t, gcd_subdiv_step_hook *, void *, mp_ptr);
-#define MPN_GCD_LEHMER_N_ITCH(n) (n)
-
-#define mpn_gcd_lehmer_n __MPN(gcd_lehmer_n)
-__GMP_DECLSPEC mp_size_t mpn_gcd_lehmer_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
+struct gcdext_ctx
+{
+ /* Result parameters. */
+ mp_ptr gp;
+ mp_size_t gn;
+ mp_ptr up;
+ mp_size_t *usize;
+
+ /* Cofactors updated in each step. */
+ mp_size_t un;
+ mp_ptr u0, u1, tp;
+};
-#define mpn_gcdext_subdiv_step __MPN(gcdext_subdiv_step)
-__GMP_DECLSPEC mp_size_t mpn_gcdext_subdiv_step __GMP_PROTO ((mp_ptr, mp_size_t *, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr));
+#define mpn_gcdext_hook __MPN (gcdext_hook)
+gcd_subdiv_step_hook mpn_gcdext_hook;
#define MPN_GCDEXT_LEHMER_N_ITCH(n) (4*(n) + 3)
#define mpn_gcdext_lehmer_n __MPN(gcdext_lehmer_n)
-__GMP_DECLSPEC mp_size_t mpn_gcdext_lehmer_n __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr));
+__GMP_DECLSPEC mp_size_t mpn_gcdext_lehmer_n (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_ptr, mp_size_t, mp_ptr);
/* 4*(an + 1) + 4*(bn + 1) + an */
#define MPN_GCDEXT_LEHMER_ITCH(an, bn) (5*(an) + 4*(bn) + 8)
#define HGCD_THRESHOLD 400
#endif
+#ifndef HGCD_APPR_THRESHOLD
+#define HGCD_APPR_THRESHOLD 400
+#endif
+
+#ifndef HGCD_REDUCE_THRESHOLD
+#define HGCD_REDUCE_THRESHOLD 1000
+#endif
+
#ifndef GCD_DC_THRESHOLD
#define GCD_DC_THRESHOLD 1000
#endif
#define mpn_dc_get_str_itch(n) ((n) + GMP_LIMB_BITS)
#define mpn_dc_set_str __MPN(dc_set_str)
-__GMP_DECLSPEC mp_size_t mpn_dc_set_str __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, const powers_t *, mp_ptr));
+__GMP_DECLSPEC mp_size_t mpn_dc_set_str (mp_ptr, const unsigned char *, size_t, const powers_t *, mp_ptr);
#define mpn_bc_set_str __MPN(bc_set_str)
-__GMP_DECLSPEC mp_size_t mpn_bc_set_str __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, int));
+__GMP_DECLSPEC mp_size_t mpn_bc_set_str (mp_ptr, const unsigned char *, size_t, int);
#define mpn_set_str_compute_powtab __MPN(set_str_compute_powtab)
-__GMP_DECLSPEC void mpn_set_str_compute_powtab __GMP_PROTO ((powers_t *, mp_ptr, mp_size_t, int));
+__GMP_DECLSPEC void mpn_set_str_compute_powtab (powers_t *, mp_ptr, mp_size_t, int);
/* __GMPF_BITS_TO_PREC applies a minimum 53 bits, rounds upwards to a whole
__GMP_DECLSPEC extern mp_size_t __gmp_default_fp_limb_precision;
+/* Compute the number of base-b digits corresponding to nlimbs limbs, rounding
+ down. */
+#define DIGITS_IN_BASE_PER_LIMB(res, nlimbs, b) \
+ do { \
+ mp_limb_t _ph, _pl; \
+ umul_ppmm (_ph, _pl, \
+ mp_bases[b].logb2, GMP_NUMB_BITS * (mp_limb_t) (nlimbs));\
+ res = _ph; \
+ } while (0)
+
+/* Compute the number of limbs corresponding to ndigits base-b digits, rounding
+ up. */
+#define LIMBS_PER_DIGIT_IN_BASE(res, ndigits, b) \
+ do { \
+ mp_limb_t _ph, _dummy; \
+ umul_ppmm (_ph, _dummy, mp_bases[b].log2b, (mp_limb_t) (ndigits)); \
+ res = 8 * _ph / GMP_NUMB_BITS + 2; \
+ } while (0)
+
/* Set n to the number of significant digits an mpf of the given _mp_prec
field, in the given base. This is a rounded up value, designed to ensure
GMP_LIMB_BITS then the +2 is unnecessary. This happens always for
base==2, and in base==16 with the current 32 or 64 bit limb sizes. */
-#define MPF_SIGNIFICANT_DIGITS(n, base, prec) \
- do { \
- ASSERT (base >= 2 && base < numberof (mp_bases)); \
- (n) = 2 + (size_t) ((((size_t) (prec) - 1) * GMP_NUMB_BITS) \
- * mp_bases[(base)].chars_per_bit_exactly); \
+#define MPF_SIGNIFICANT_DIGITS(n, base, prec) \
+ do { \
+ size_t rawn; \
+ ASSERT (base >= 2 && base < numberof (mp_bases)); \
+ DIGITS_IN_BASE_PER_LIMB (rawn, (prec) - 1, base); \
+ n = rawn + 2; \
} while (0)
#if _GMP_H_HAVE_VA_LIST
-__GMP_DECLSPEC typedef int (*doprnt_format_t) __GMP_PROTO ((void *, const char *, va_list));
-__GMP_DECLSPEC typedef int (*doprnt_memory_t) __GMP_PROTO ((void *, const char *, size_t));
-__GMP_DECLSPEC typedef int (*doprnt_reps_t) __GMP_PROTO ((void *, int, int));
-__GMP_DECLSPEC typedef int (*doprnt_final_t) __GMP_PROTO ((void *));
+typedef int (*doprnt_format_t) (void *, const char *, va_list);
+typedef int (*doprnt_memory_t) (void *, const char *, size_t);
+typedef int (*doprnt_reps_t) (void *, int, int);
+typedef int (*doprnt_final_t) (void *);
struct doprnt_funs_t {
doprnt_format_t format;
size_t alloc;
};
-#define GMP_ASPRINTF_T_INIT(d, output) \
- do { \
- (d).result = (output); \
- (d).alloc = 256; \
- (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc); \
- (d).size = 0; \
+#define GMP_ASPRINTF_T_INIT(d, output) \
+ do { \
+ (d).result = (output); \
+ (d).alloc = 256; \
+ (d).buf = (char *) (*__gmp_allocate_func) ((d).alloc); \
+ (d).size = 0; \
} while (0)
/* If a realloc is necessary, use twice the size actually required, so as to
avoid repeated small reallocs. */
-#define GMP_ASPRINTF_T_NEED(d, n) \
- do { \
- size_t alloc, newsize, newalloc; \
- ASSERT ((d)->alloc >= (d)->size + 1); \
- \
- alloc = (d)->alloc; \
- newsize = (d)->size + (n); \
- if (alloc <= newsize) \
- { \
- newalloc = 2*newsize; \
- (d)->alloc = newalloc; \
- (d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf, \
- alloc, newalloc, char); \
- } \
+#define GMP_ASPRINTF_T_NEED(d, n) \
+ do { \
+ size_t alloc, newsize, newalloc; \
+ ASSERT ((d)->alloc >= (d)->size + 1); \
+ \
+ alloc = (d)->alloc; \
+ newsize = (d)->size + (n); \
+ if (alloc <= newsize) \
+ { \
+ newalloc = 2*newsize; \
+ (d)->alloc = newalloc; \
+ (d)->buf = __GMP_REALLOCATE_FUNC_TYPE ((d)->buf, \
+ alloc, newalloc, char); \
+ } \
} while (0)
-__GMP_DECLSPEC int __gmp_asprintf_memory __GMP_PROTO ((struct gmp_asprintf_t *, const char *, size_t));
-__GMP_DECLSPEC int __gmp_asprintf_reps __GMP_PROTO ((struct gmp_asprintf_t *, int, int));
-__GMP_DECLSPEC int __gmp_asprintf_final __GMP_PROTO ((struct gmp_asprintf_t *));
+__GMP_DECLSPEC int __gmp_asprintf_memory (struct gmp_asprintf_t *, const char *, size_t);
+__GMP_DECLSPEC int __gmp_asprintf_reps (struct gmp_asprintf_t *, int, int);
+__GMP_DECLSPEC int __gmp_asprintf_final (struct gmp_asprintf_t *);
/* buf is where to write the next output, and size is how much space is left
there. If the application passed size==0 then that's what we'll have
/* Add the bytes printed by the call to the total retval, or bail out on an
error. */
-#define DOPRNT_ACCUMULATE(call) \
- do { \
- int __ret; \
- __ret = call; \
- if (__ret == -1) \
- goto error; \
- retval += __ret; \
+#define DOPRNT_ACCUMULATE(call) \
+ do { \
+ int __ret; \
+ __ret = call; \
+ if (__ret == -1) \
+ goto error; \
+ retval += __ret; \
} while (0)
-#define DOPRNT_ACCUMULATE_FUN(fun, params) \
- do { \
- ASSERT ((fun) != NULL); \
- DOPRNT_ACCUMULATE ((*(fun)) params); \
+#define DOPRNT_ACCUMULATE_FUN(fun, params) \
+ do { \
+ ASSERT ((fun) != NULL); \
+ DOPRNT_ACCUMULATE ((*(fun)) params); \
} while (0)
-#define DOPRNT_FORMAT(fmt, ap) \
+#define DOPRNT_FORMAT(fmt, ap) \
DOPRNT_ACCUMULATE_FUN (funs->format, (data, fmt, ap))
-#define DOPRNT_MEMORY(ptr, len) \
+#define DOPRNT_MEMORY(ptr, len) \
DOPRNT_ACCUMULATE_FUN (funs->memory, (data, ptr, len))
-#define DOPRNT_REPS(c, n) \
+#define DOPRNT_REPS(c, n) \
DOPRNT_ACCUMULATE_FUN (funs->reps, (data, c, n))
#define DOPRNT_STRING(str) DOPRNT_MEMORY (str, strlen (str))
-#define DOPRNT_REPS_MAYBE(c, n) \
- do { \
- if ((n) != 0) \
- DOPRNT_REPS (c, n); \
+#define DOPRNT_REPS_MAYBE(c, n) \
+ do { \
+ if ((n) != 0) \
+ DOPRNT_REPS (c, n); \
} while (0)
-#define DOPRNT_MEMORY_MAYBE(ptr, len) \
- do { \
- if ((len) != 0) \
- DOPRNT_MEMORY (ptr, len); \
+#define DOPRNT_MEMORY_MAYBE(ptr, len) \
+ do { \
+ if ((len) != 0) \
+ DOPRNT_MEMORY (ptr, len); \
} while (0)
-__GMP_DECLSPEC int __gmp_doprnt __GMP_PROTO ((const struct doprnt_funs_t *, void *, const char *, va_list));
-__GMP_DECLSPEC int __gmp_doprnt_integer __GMP_PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *));
+__GMP_DECLSPEC int __gmp_doprnt (const struct doprnt_funs_t *, void *, const char *, va_list);
+__GMP_DECLSPEC int __gmp_doprnt_integer (const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *);
#define __gmp_doprnt_mpf __gmp_doprnt_mpf2
-__GMP_DECLSPEC int __gmp_doprnt_mpf __GMP_PROTO ((const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *, mpf_srcptr));
+__GMP_DECLSPEC int __gmp_doprnt_mpf (const struct doprnt_funs_t *, void *, const struct doprnt_params_t *, const char *, mpf_srcptr);
-__GMP_DECLSPEC int __gmp_replacement_vsnprintf __GMP_PROTO ((char *, size_t, const char *, va_list));
+__GMP_DECLSPEC int __gmp_replacement_vsnprintf (char *, size_t, const char *, va_list);
#endif /* _GMP_H_HAVE_VA_LIST */
-typedef int (*gmp_doscan_scan_t) __GMP_PROTO ((void *, const char *, ...));
-typedef void *(*gmp_doscan_step_t) __GMP_PROTO ((void *, int));
-typedef int (*gmp_doscan_get_t) __GMP_PROTO ((void *));
-typedef int (*gmp_doscan_unget_t) __GMP_PROTO ((int, void *));
+typedef int (*gmp_doscan_scan_t) (void *, const char *, ...);
+typedef void *(*gmp_doscan_step_t) (void *, int);
+typedef int (*gmp_doscan_get_t) (void *);
+typedef int (*gmp_doscan_unget_t) (int, void *);
struct gmp_doscan_funs_t {
gmp_doscan_scan_t scan;
extern const struct gmp_doscan_funs_t __gmp_sscanf_funs;
#if _GMP_H_HAVE_VA_LIST
-__GMP_DECLSPEC int __gmp_doscan __GMP_PROTO ((const struct gmp_doscan_funs_t *, void *, const char *, va_list));
+__GMP_DECLSPEC int __gmp_doscan (const struct gmp_doscan_funs_t *, void *, const char *, va_list);
#endif
/* For testing and debugging. */
-#define MPZ_CHECK_FORMAT(z) \
- do { \
- ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0); \
- ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z)); \
- ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z)); \
+#define MPZ_CHECK_FORMAT(z) \
+ do { \
+ ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0); \
+ ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z)); \
+ ASSERT_ALWAYS_MPN (PTR(z), ABSIZ(z)); \
} while (0)
-#define MPQ_CHECK_FORMAT(q) \
- do { \
- MPZ_CHECK_FORMAT (mpq_numref (q)); \
- MPZ_CHECK_FORMAT (mpq_denref (q)); \
- ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1); \
- \
- if (SIZ(mpq_numref(q)) == 0) \
- { \
- /* should have zero as 0/1 */ \
- ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1 \
- && PTR(mpq_denref(q))[0] == 1); \
- } \
- else \
- { \
- /* should have no common factors */ \
- mpz_t g; \
- mpz_init (g); \
- mpz_gcd (g, mpq_numref(q), mpq_denref(q)); \
- ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0); \
- mpz_clear (g); \
- } \
+#define MPQ_CHECK_FORMAT(q) \
+ do { \
+ MPZ_CHECK_FORMAT (mpq_numref (q)); \
+ MPZ_CHECK_FORMAT (mpq_denref (q)); \
+ ASSERT_ALWAYS (SIZ(mpq_denref(q)) >= 1); \
+ \
+ if (SIZ(mpq_numref(q)) == 0) \
+ { \
+ /* should have zero as 0/1 */ \
+ ASSERT_ALWAYS (SIZ(mpq_denref(q)) == 1 \
+ && PTR(mpq_denref(q))[0] == 1); \
+ } \
+ else \
+ { \
+ /* should have no common factors */ \
+ mpz_t g; \
+ mpz_init (g); \
+ mpz_gcd (g, mpq_numref(q), mpq_denref(q)); \
+ ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0); \
+ mpz_clear (g); \
+ } \
} while (0)
-#define MPF_CHECK_FORMAT(f) \
- do { \
- ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53)); \
- ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1); \
- if (SIZ(f) == 0) \
- ASSERT_ALWAYS (EXP(f) == 0); \
- if (SIZ(f) != 0) \
- ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0); \
+#define MPF_CHECK_FORMAT(f) \
+ do { \
+ ASSERT_ALWAYS (PREC(f) >= __GMPF_BITS_TO_PREC(53)); \
+ ASSERT_ALWAYS (ABSIZ(f) <= PREC(f)+1); \
+ if (SIZ(f) == 0) \
+ ASSERT_ALWAYS (EXP(f) == 0); \
+ if (SIZ(f) != 0) \
+ ASSERT_ALWAYS (PTR(f)[ABSIZ(f) - 1] != 0); \
} while (0)
-#define MPZ_PROVOKE_REALLOC(z) \
+#define MPZ_PROVOKE_REALLOC(z) \
do { ALLOC(z) = ABSIZ(z); } while (0)
in mpn/x86/x86-defs.m4. Be sure to update that when changing here. */
struct cpuvec_t {
DECL_add_n ((*add_n));
+ DECL_addlsh1_n ((*addlsh1_n));
+ DECL_addlsh2_n ((*addlsh2_n));
DECL_addmul_1 ((*addmul_1));
+ DECL_addmul_2 ((*addmul_2));
+ DECL_bdiv_dbm1c ((*bdiv_dbm1c));
+ DECL_com ((*com));
DECL_copyd ((*copyd));
DECL_copyi ((*copyi));
DECL_divexact_1 ((*divexact_1));
- DECL_divexact_by3c ((*divexact_by3c));
DECL_divrem_1 ((*divrem_1));
DECL_gcd_1 ((*gcd_1));
DECL_lshift ((*lshift));
+ DECL_lshiftc ((*lshiftc));
DECL_mod_1 ((*mod_1));
+ DECL_mod_1_1p ((*mod_1_1p));
+ DECL_mod_1_1p_cps ((*mod_1_1p_cps));
+ DECL_mod_1s_2p ((*mod_1s_2p));
+ DECL_mod_1s_2p_cps ((*mod_1s_2p_cps));
+ DECL_mod_1s_4p ((*mod_1s_4p));
+ DECL_mod_1s_4p_cps ((*mod_1s_4p_cps));
DECL_mod_34lsub1 ((*mod_34lsub1));
DECL_modexact_1c_odd ((*modexact_1c_odd));
DECL_mul_1 ((*mul_1));
DECL_mul_basecase ((*mul_basecase));
+ DECL_mullo_basecase ((*mullo_basecase));
DECL_preinv_divrem_1 ((*preinv_divrem_1));
DECL_preinv_mod_1 ((*preinv_mod_1));
+ DECL_redc_1 ((*redc_1));
+ DECL_redc_2 ((*redc_2));
DECL_rshift ((*rshift));
DECL_sqr_basecase ((*sqr_basecase));
DECL_sub_n ((*sub_n));
+ DECL_sublsh1_n ((*sublsh1_n));
DECL_submul_1 ((*submul_1));
- int initialized;
mp_size_t mul_toom22_threshold;
mp_size_t mul_toom33_threshold;
mp_size_t sqr_toom2_threshold;
mp_size_t sqr_toom3_threshold;
+ mp_size_t bmod_1_to_mod_1_threshold;
};
__GMP_DECLSPEC extern struct cpuvec_t __gmpn_cpuvec;
+__GMP_DECLSPEC extern int __gmpn_cpuvec_initialized;
#endif /* x86 fat binary */
-__GMP_DECLSPEC void __gmpn_cpuvec_init __GMP_PROTO ((void));
+__GMP_DECLSPEC void __gmpn_cpuvec_init (void);
/* Get a threshold "field" from __gmpn_cpuvec, running __gmpn_cpuvec_init()
if that hasn't yet been done (to establish the right values). */
-#define CPUVEC_THRESHOLD(field) \
- ((LIKELY (__gmpn_cpuvec.initialized) ? 0 : (__gmpn_cpuvec_init (), 0)), \
+#define CPUVEC_THRESHOLD(field) \
+ ((LIKELY (__gmpn_cpuvec_initialized) ? 0 : (__gmpn_cpuvec_init (), 0)), \
__gmpn_cpuvec.field)
#if HAVE_NATIVE_mpn_add_nc
#define mpn_add_nc __MPN(add_nc)
-__GMP_DECLSPEC mp_limb_t mpn_add_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_add_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
#else
static inline
mp_limb_t
#if HAVE_NATIVE_mpn_sub_nc
#define mpn_sub_nc __MPN(sub_nc)
-__GMP_DECLSPEC mp_limb_t mpn_sub_nc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t));
+__GMP_DECLSPEC mp_limb_t mpn_sub_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
#else
static inline mp_limb_t
mpn_sub_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t ci)
#define MUL_TOOM42_TO_TOOM63_THRESHOLD mul_toom42_to_toom63_threshold
extern mp_size_t mul_toom42_to_toom63_threshold;
+#undef MUL_TOOM43_TO_TOOM54_THRESHOLD
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD mul_toom43_to_toom54_threshold;
+extern mp_size_t mul_toom43_to_toom54_threshold;
+
#undef MUL_FFT_THRESHOLD
#define MUL_FFT_THRESHOLD mul_fft_threshold
extern mp_size_t mul_fft_threshold;
#define MULLO_MUL_N_THRESHOLD mullo_mul_n_threshold
extern mp_size_t mullo_mul_n_threshold;
+#undef MULMID_TOOM42_THRESHOLD
+#define MULMID_TOOM42_THRESHOLD mulmid_toom42_threshold
+extern mp_size_t mulmid_toom42_threshold;
+
+#undef DIV_QR_2_PI2_THRESHOLD
+#define DIV_QR_2_PI2_THRESHOLD div_qr_2_pi2_threshold
+extern mp_size_t div_qr_2_pi2_threshold;
+
#undef DC_DIV_QR_THRESHOLD
#define DC_DIV_QR_THRESHOLD dc_div_qr_threshold
extern mp_size_t dc_div_qr_threshold;
#define HGCD_THRESHOLD hgcd_threshold
extern mp_size_t hgcd_threshold;
+#undef HGCD_APPR_THRESHOLD
+#define HGCD_APPR_THRESHOLD hgcd_appr_threshold
+extern mp_size_t hgcd_appr_threshold;
+
+#undef HGCD_REDUCE_THRESHOLD
+#define HGCD_REDUCE_THRESHOLD hgcd_reduce_threshold
+extern mp_size_t hgcd_reduce_threshold;
+
#undef GCD_DC_THRESHOLD
#define GCD_DC_THRESHOLD gcd_dc_threshold
extern mp_size_t gcd_dc_threshold;
#define MOD_1_UNNORM_THRESHOLD mod_1_unnorm_threshold
extern mp_size_t mod_1_unnorm_threshold;
+#undef MOD_1_1P_METHOD
+#define MOD_1_1P_METHOD mod_1_1p_method
+extern int mod_1_1p_method;
+
#undef MOD_1N_TO_MOD_1_1_THRESHOLD
#define MOD_1N_TO_MOD_1_1_THRESHOLD mod_1n_to_mod_1_1_threshold
extern mp_size_t mod_1n_to_mod_1_1_threshold;
#define SET_STR_PRECOMPUTE_THRESHOLD set_str_precompute_threshold
extern mp_size_t set_str_precompute_threshold;
+#undef FAC_ODD_THRESHOLD
+#define FAC_ODD_THRESHOLD fac_odd_threshold
+extern mp_size_t fac_odd_threshold;
+
+#undef FAC_DSC_THRESHOLD
+#define FAC_DSC_THRESHOLD fac_dsc_threshold
+extern mp_size_t fac_dsc_threshold;
+
#undef FFT_TABLE_ATTRS
#define FFT_TABLE_ATTRS
extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
#define SQR_TOOM8_THRESHOLD_LIMIT 1200
#define MULLO_BASECASE_THRESHOLD_LIMIT 200
#define GET_STR_THRESHOLD_LIMIT 150
+#define FAC_DSC_THRESHOLD_LIMIT 2048
#endif /* TUNE_PROGRAM_BUILD */
(3 * (an) + GMP_NUMB_BITS)
#define mpn_toom6_sqr_itch(n) \
-( ((n) - SQR_TOOM6_THRESHOLD)*2 + \
- MAX(SQR_TOOM6_THRESHOLD*2 + GMP_NUMB_BITS*6, \
- mpn_toom4_sqr_itch(SQR_TOOM6_THRESHOLD)) )
+ (((n) - SQR_TOOM6_THRESHOLD)*2 + \
+ MAX(SQR_TOOM6_THRESHOLD*2 + GMP_NUMB_BITS*6, \
+ mpn_toom4_sqr_itch(SQR_TOOM6_THRESHOLD)))
+#define MUL_TOOM6H_MIN \
+ ((MUL_TOOM6H_THRESHOLD > MUL_TOOM44_THRESHOLD) ? \
+ MUL_TOOM6H_THRESHOLD : MUL_TOOM44_THRESHOLD)
#define mpn_toom6_mul_n_itch(n) \
-( ((n) - MUL_TOOM6H_THRESHOLD)*2 + \
- MAX(MUL_TOOM6H_THRESHOLD*2 + GMP_NUMB_BITS*6, \
- mpn_toom44_mul_itch(MUL_TOOM6H_THRESHOLD,MUL_TOOM6H_THRESHOLD)) )
+ (((n) - MUL_TOOM6H_MIN)*2 + \
+ MAX(MUL_TOOM6H_MIN*2 + GMP_NUMB_BITS*6, \
+ mpn_toom44_mul_itch(MUL_TOOM6H_MIN,MUL_TOOM6H_MIN)))
static inline mp_size_t
mpn_toom6h_mul_itch (mp_size_t an, mp_size_t bn) {
}
#define mpn_toom8_sqr_itch(n) \
-( (((n)*15)>>3) - ((SQR_TOOM8_THRESHOLD*15)>>3) + \
- MAX(((SQR_TOOM8_THRESHOLD*15)>>3) + GMP_NUMB_BITS*6, \
- mpn_toom6_sqr_itch(SQR_TOOM8_THRESHOLD)) )
+ ((((n)*15)>>3) - ((SQR_TOOM8_THRESHOLD*15)>>3) + \
+ MAX(((SQR_TOOM8_THRESHOLD*15)>>3) + GMP_NUMB_BITS*6, \
+ mpn_toom6_sqr_itch(SQR_TOOM8_THRESHOLD)))
+#define MUL_TOOM8H_MIN \
+ ((MUL_TOOM8H_THRESHOLD > MUL_TOOM6H_MIN) ? \
+ MUL_TOOM8H_THRESHOLD : MUL_TOOM6H_MIN)
#define mpn_toom8_mul_n_itch(n) \
-( (((n)*15)>>3) - ((MUL_TOOM8H_THRESHOLD*15)>>3) + \
- MAX(((MUL_TOOM8H_THRESHOLD*15)>>3) + GMP_NUMB_BITS*6, \
- mpn_toom6_mul_n_itch(MUL_TOOM8H_THRESHOLD)) )
+ ((((n)*15)>>3) - ((MUL_TOOM8H_MIN*15)>>3) + \
+ MAX(((MUL_TOOM8H_MIN*15)>>3) + GMP_NUMB_BITS*6, \
+ mpn_toom6_mul_n_itch(MUL_TOOM8H_MIN)))
static inline mp_size_t
mpn_toom8h_mul_itch (mp_size_t an, mp_size_t bn) {
return 9 * n + 3;
}
+static inline mp_size_t
+mpn_toom54_mul_itch (mp_size_t an, mp_size_t bn)
+{
+ mp_size_t n = 1 + (4 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 4);
+ return 9 * n + 3;
+}
+
+/* let S(n) = space required for input size n,
+ then S(n) = 3 floor(n/2) + 1 + S(floor(n/2)). */
+#define mpn_toom42_mulmid_itch(n) \
+ (3 * (n) + GMP_NUMB_BITS)
+
#if 0
#define mpn_fft_mul mpn_mul_fft_full
#else
std::istream &__gmpz_operator_in_nowhite (std::istream &, mpz_ptr, char);
int __gmp_istream_set_base (std::istream &, char &, bool &, bool &);
void __gmp_istream_set_digits (std::string &, std::istream &, char &, bool &, int);
-void __gmp_doprnt_params_from_ios (struct doprnt_params_t *p, std::ios &o);
-std::ostream& __gmp_doprnt_integer_ostream (std::ostream &o, struct doprnt_params_t *p, char *s);
+void __gmp_doprnt_params_from_ios (struct doprnt_params_t *, std::ios &);
+std::ostream& __gmp_doprnt_integer_ostream (std::ostream &, struct doprnt_params_t *, char *);
extern const struct doprnt_funs_t __gmp_asprintf_funs_noformat;
#endif /* __cplusplus */
/* gmpxx.h -- C++ class wrapper for GMP types. -*- C++ -*-
-Copyright 2001, 2002, 2003, 2006, 2008 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2003, 2006, 2008, 2011, 2012 Free Software Foundation,
+Inc.
This file is part of the GNU MP Library.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-/* the C++ compiler must implement the following features:
- - member templates
- - partial specialization of templates
- - namespace support
- for g++, this means version 2.91 or higher
- for other compilers, I don't know */
-#ifdef __GNUC__
-#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 91)
-#error gmpxx.h requires g++ version 2.91 (egcs 1.1.2) or higher
-#endif
-#endif
-
#ifndef __GMP_PLUSPLUS__
#define __GMP_PLUSPLUS__
#include <iosfwd>
#include <cstring> /* for strlen */
+#include <limits> /* numeric_limits */
+#include <utility>
+#include <algorithm> /* swap */
#include <string>
#include <stdexcept>
#include <cfloat>
#include <gmp.h>
+// wrapper for gcc's __builtin_constant_p
+// __builtin_constant_p has been in gcc since forever,
+// but g++-3.4 miscompiles it.
+#if __GMP_GNUC_PREREQ(4, 2)
+#define __GMPXX_CONSTANT(X) __builtin_constant_p(X)
+#else
+#define __GMPXX_CONSTANT(X) false
+#endif
+
+// Use C++11 features
+#ifndef __GMPXX_USE_CXX11
+#if __cplusplus >= 201103L
+#define __GMPXX_USE_CXX11 1
+#else
+#define __GMPXX_USE_CXX11 0
+#endif
+#endif
+
+#if __GMPXX_USE_CXX11
+#define __GMPXX_NOEXCEPT noexcept
+#include <type_traits> // for common_type
+#else
+#define __GMPXX_NOEXCEPT
+#endif
+
+// Max allocations for plain types when converted to mpz_t
+#define __GMPZ_DBL_LIMBS (2 + DBL_MAX_EXP / GMP_NUMB_BITS)
+
+#if GMP_NAIL_BITS != 0 && ! defined _LONG_LONG_LIMB
+#define __GMPZ_ULI_LIMBS 2
+#else
+#define __GMPZ_ULI_LIMBS 1
+#endif
+
+inline void __mpz_set_ui_safe(mpz_ptr p, unsigned long l)
+{
+ p->_mp_size = (l != 0);
+ p->_mp_d[0] = l & GMP_NUMB_MASK;
+#if __GMPZ_ULI_LIMBS > 1
+ l >>= GMP_NUMB_BITS;
+ p->_mp_d[1] = l;
+ p->_mp_size += (l != 0);
+#endif
+}
+
+inline void __mpz_set_si_safe(mpz_ptr p, long l)
+{
+ if(l < 0)
+ {
+ __mpz_set_ui_safe(p, -static_cast<unsigned long>(l));
+ mpz_neg(p, p);
+ }
+ else
+ __mpz_set_ui_safe(p, l);
+ // Note: we know the high bit of l is 0 so we could do slightly better
+}
+
+// Fake temporary variables
+#define __GMPXX_TMPZ_UI \
+ mpz_t temp; \
+ mp_limb_t limbs[__GMPZ_ULI_LIMBS]; \
+ temp->_mp_d = limbs; \
+ __mpz_set_ui_safe (temp, l)
+#define __GMPXX_TMPZ_SI \
+ mpz_t temp; \
+ mp_limb_t limbs[__GMPZ_ULI_LIMBS]; \
+ temp->_mp_d = limbs; \
+ __mpz_set_si_safe (temp, l)
+#define __GMPXX_TMPZ_D \
+ mpz_t temp; \
+ mp_limb_t limbs[__GMPZ_DBL_LIMBS]; \
+ temp->_mp_d = limbs; \
+ temp->_mp_alloc = __GMPZ_DBL_LIMBS; \
+ mpz_set_d (temp, d)
+
+#define __GMPXX_TMPQ_UI \
+ mpq_t temp; \
+ mp_limb_t limbs[__GMPZ_ULI_LIMBS+1]; \
+ mpq_numref(temp)->_mp_d = limbs; \
+ __mpz_set_ui_safe (mpq_numref(temp), l); \
+ mpq_denref(temp)->_mp_d = limbs + __GMPZ_ULI_LIMBS; \
+ mpq_denref(temp)->_mp_size = 1; \
+ mpq_denref(temp)->_mp_d[0] = 1
+#define __GMPXX_TMPQ_SI \
+ mpq_t temp; \
+ mp_limb_t limbs[__GMPZ_ULI_LIMBS+1]; \
+ mpq_numref(temp)->_mp_d = limbs; \
+ __mpz_set_si_safe (mpq_numref(temp), l); \
+ mpq_denref(temp)->_mp_d = limbs + __GMPZ_ULI_LIMBS; \
+ mpq_denref(temp)->_mp_size = 1; \
+ mpq_denref(temp)->_mp_d[0] = 1
+
+inline unsigned long __gmpxx_abs_ui (signed long l)
+{
+ return l >= 0 ? static_cast<unsigned long>(l)
+ : -static_cast<unsigned long>(l);
+}
/**************** Function objects ****************/
/* Any evaluation of a __gmp_expr ends up calling one of these functions
{ mpz_add(z, w, v); }
static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
- { mpz_add_ui(z, w, l); }
- static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
- { mpz_add_ui(z, w, l); }
- static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
{
- if (l >= 0)
- mpz_add_ui(z, w, l);
+ // Ideally, those checks should happen earlier so that the tree
+ // generated for a+0+b would just be sum(a,b).
+ if (__GMPXX_CONSTANT(l) && l == 0)
+ {
+ if (z != w) mpz_set(z, w);
+ }
else
- mpz_sub_ui(z, w, -l);
+ mpz_add_ui(z, w, l);
}
- static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
+ static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
+ { eval(z, w, l); }
+ static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
{
if (l >= 0)
- mpz_add_ui(z, w, l);
+ eval(z, w, static_cast<unsigned long>(l));
else
- mpz_sub_ui(z, w, -l);
+ mpz_sub_ui(z, w, -static_cast<unsigned long>(l));
}
+ static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
+ { eval(z, w, l); }
static void eval(mpz_ptr z, mpz_srcptr w, double d)
- {
- mpz_t temp;
- mpz_init_set_d(temp, d);
- mpz_add(z, w, temp);
- mpz_clear(temp);
- }
+ { __GMPXX_TMPZ_D; mpz_add (z, w, temp); }
static void eval(mpz_ptr z, double d, mpz_srcptr w)
- {
- mpz_t temp;
- mpz_init_set_d(temp, d);
- mpz_add(z, temp, w);
- mpz_clear(temp);
- }
+ { eval(z, w, d); }
static void eval(mpq_ptr q, mpq_srcptr r, mpq_srcptr s)
{ mpq_add(q, r, s); }
static void eval(mpq_ptr q, mpq_srcptr r, unsigned long int l)
- { mpq_set(q, r); mpz_addmul_ui(mpq_numref(q), mpq_denref(q), l); }
- static void eval(mpq_ptr q, unsigned long int l, mpq_srcptr r)
- { mpq_set(q, r); mpz_addmul_ui(mpq_numref(q), mpq_denref(q), l); }
- static void eval(mpq_ptr q, mpq_srcptr r, signed long int l)
{
- mpq_set(q, r);
- if (l >= 0)
- mpz_addmul_ui(mpq_numref(q), mpq_denref(q), l);
+ if (__GMPXX_CONSTANT(l) && l == 0)
+ {
+ if (q != r) mpq_set(q, r);
+ }
else
- mpz_submul_ui(mpq_numref(q), mpq_denref(q), -l);
+ {
+ if (q == r)
+ mpz_addmul_ui(mpq_numref(q), mpq_denref(q), l);
+ else
+ {
+ mpz_mul_ui(mpq_numref(q), mpq_denref(r), l);
+ mpz_add(mpq_numref(q), mpq_numref(q), mpq_numref(r));
+ mpz_set(mpq_denref(q), mpq_denref(r));
+ }
+ }
}
+ static void eval(mpq_ptr q, unsigned long int l, mpq_srcptr r)
+ { eval(q, r, l); }
+ static inline void eval(mpq_ptr q, mpq_srcptr r, signed long int l);
+ // defined after __gmp_binary_minus
static void eval(mpq_ptr q, signed long int l, mpq_srcptr r)
- {
- mpq_set(q, r);
- if (l >= 0)
- mpz_addmul_ui(mpq_numref(q), mpq_denref(q), l);
- else
- mpz_submul_ui(mpq_numref(q), mpq_denref(q), -l);
- }
+ { eval(q, r, l); }
static void eval(mpq_ptr q, mpq_srcptr r, double d)
{
mpq_t temp;
mpq_clear(temp);
}
static void eval(mpq_ptr q, double d, mpq_srcptr r)
- {
- mpq_t temp;
- mpq_init(temp);
- mpq_set_d(temp, d);
- mpq_add(q, temp, r);
- mpq_clear(temp);
- }
+ { eval(q, r, d); }
static void eval(mpq_ptr q, mpq_srcptr r, mpz_srcptr z)
- { mpq_set(q, r); mpz_addmul(mpq_numref(q), mpq_denref(q), z); }
+ {
+ if (q == r)
+ mpz_addmul(mpq_numref(q), mpq_denref(q), z);
+ else
+ {
+ mpz_mul(mpq_numref(q), mpq_denref(r), z);
+ mpz_add(mpq_numref(q), mpq_numref(q), mpq_numref(r));
+ mpz_set(mpq_denref(q), mpq_denref(r));
+ }
+ }
static void eval(mpq_ptr q, mpz_srcptr z, mpq_srcptr r)
- { mpq_set(q, r); mpz_addmul(mpq_numref(q), mpq_denref(q), z); }
+ { eval(q, r, z); }
static void eval(mpf_ptr f, mpf_srcptr g, mpf_srcptr h)
{ mpf_add(f, g, h); }
if (l >= 0)
mpf_add_ui(f, g, l);
else
- mpf_sub_ui(f, g, -l);
+ mpf_sub_ui(f, g, -static_cast<unsigned long>(l));
}
static void eval(mpf_ptr f, signed long int l, mpf_srcptr g)
- {
- if (l >= 0)
- mpf_add_ui(f, g, l);
- else
- mpf_sub_ui(f, g, -l);
- }
+ { eval(f, g, l); }
static void eval(mpf_ptr f, mpf_srcptr g, double d)
{
mpf_t temp;
mpf_clear(temp);
}
static void eval(mpf_ptr f, double d, mpf_srcptr g)
- {
- mpf_t temp;
- mpf_init2(temp, 8*sizeof(double));
- mpf_set_d(temp, d);
- mpf_add(f, temp, g);
- mpf_clear(temp);
- }
+ { eval(f, g, d); }
};
struct __gmp_binary_minus
{ mpz_sub(z, w, v); }
static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
- { mpz_sub_ui(z, w, l); }
+ {
+ if (__GMPXX_CONSTANT(l) && l == 0)
+ {
+ if (z != w) mpz_set(z, w);
+ }
+ else
+ mpz_sub_ui(z, w, l);
+ }
static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
- { mpz_ui_sub(z, l, w); }
+ {
+ if (__GMPXX_CONSTANT(l) && l == 0)
+ {
+ mpz_neg(z, w);
+ }
+ else
+ mpz_ui_sub(z, l, w);
+ }
static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
{
if (l >= 0)
- mpz_sub_ui(z, w, l);
+ eval(z, w, static_cast<unsigned long>(l));
else
- mpz_add_ui(z, w, -l);
+ mpz_add_ui(z, w, -static_cast<unsigned long>(l));
}
static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
{
if (l >= 0)
- mpz_ui_sub(z, l, w);
+ eval(z, static_cast<unsigned long>(l), w);
else
{
- mpz_add_ui(z, w, -l);
+ mpz_add_ui(z, w, -static_cast<unsigned long>(l));
mpz_neg(z, z);
}
}
static void eval(mpz_ptr z, mpz_srcptr w, double d)
- {
- mpz_t temp;
- mpz_init_set_d(temp, d);
- mpz_sub(z, w, temp);
- mpz_clear(temp);
- }
+ { __GMPXX_TMPZ_D; mpz_sub (z, w, temp); }
static void eval(mpz_ptr z, double d, mpz_srcptr w)
- {
- mpz_t temp;
- mpz_init_set_d(temp, d);
- mpz_sub(z, temp, w);
- mpz_clear(temp);
- }
+ { __GMPXX_TMPZ_D; mpz_sub (z, temp, w); }
static void eval(mpq_ptr q, mpq_srcptr r, mpq_srcptr s)
{ mpq_sub(q, r, s); }
static void eval(mpq_ptr q, mpq_srcptr r, unsigned long int l)
- { mpq_set(q, r); mpz_submul_ui(mpq_numref(q), mpq_denref(q), l); }
- static void eval(mpq_ptr q, unsigned long int l, mpq_srcptr r)
- { mpq_neg(q, r); mpz_addmul_ui(mpq_numref(q), mpq_denref(q), l); }
- static void eval(mpq_ptr q, mpq_srcptr r, signed long int l)
{
- mpq_set(q, r);
- if (l >= 0)
- mpz_submul_ui(mpq_numref(q), mpq_denref(q), l);
+ if (__GMPXX_CONSTANT(l) && l == 0)
+ {
+ if (q != r) mpq_set(q, r);
+ }
else
- mpz_addmul_ui(mpq_numref(q), mpq_denref(q), -l);
+ {
+ if (q == r)
+ mpz_submul_ui(mpq_numref(q), mpq_denref(q), l);
+ else
+ {
+ mpz_mul_ui(mpq_numref(q), mpq_denref(r), l);
+ mpz_sub(mpq_numref(q), mpq_numref(r), mpq_numref(q));
+ mpz_set(mpq_denref(q), mpq_denref(r));
+ }
+ }
}
- static void eval(mpq_ptr q, signed long int l, mpq_srcptr r)
+ static void eval(mpq_ptr q, unsigned long int l, mpq_srcptr r)
+ { eval(q, r, l); mpq_neg(q, q); }
+ static void eval(mpq_ptr q, mpq_srcptr r, signed long int l)
{
- mpq_neg(q, r);
if (l >= 0)
- mpz_addmul_ui(mpq_numref(q), mpq_denref(q), l);
+ eval(q, r, static_cast<unsigned long>(l));
else
- mpz_submul_ui(mpq_numref(q), mpq_denref(q), -l);
+ __gmp_binary_plus::eval(q, r, -static_cast<unsigned long>(l));
}
+ static void eval(mpq_ptr q, signed long int l, mpq_srcptr r)
+ { eval(q, r, l); mpq_neg(q, q); }
static void eval(mpq_ptr q, mpq_srcptr r, double d)
{
mpq_t temp;
}
static void eval(mpq_ptr q, mpq_srcptr r, mpz_srcptr z)
- { mpq_set(q, r); mpz_submul(mpq_numref(q), mpq_denref(q), z); }
+ {
+ if (q == r)
+ mpz_submul(mpq_numref(q), mpq_denref(q), z);
+ else
+ {
+ mpz_mul(mpq_numref(q), mpq_denref(r), z);
+ mpz_sub(mpq_numref(q), mpq_numref(r), mpq_numref(q));
+ mpz_set(mpq_denref(q), mpq_denref(r));
+ }
+ }
static void eval(mpq_ptr q, mpz_srcptr z, mpq_srcptr r)
- { mpq_neg(q, r); mpz_addmul(mpq_numref(q), mpq_denref(q), z); }
+ { eval(q, r, z); mpq_neg(q, q); }
static void eval(mpf_ptr f, mpf_srcptr g, mpf_srcptr h)
{ mpf_sub(f, g, h); }
if (l >= 0)
mpf_sub_ui(f, g, l);
else
- mpf_add_ui(f, g, -l);
+ mpf_add_ui(f, g, -static_cast<unsigned long>(l));
}
static void eval(mpf_ptr f, signed long int l, mpf_srcptr g)
{
if (l >= 0)
mpf_sub_ui(f, g, l);
else
- mpf_add_ui(f, g, -l);
+ mpf_add_ui(f, g, -static_cast<unsigned long>(l));
mpf_neg(f, f);
}
static void eval(mpf_ptr f, mpf_srcptr g, double d)
}
};
+// defined here so it can reference __gmp_binary_minus
+inline void
+__gmp_binary_plus::eval(mpq_ptr q, mpq_srcptr r, signed long int l)
+{
+ if (l >= 0)
+ eval(q, r, static_cast<unsigned long>(l));
+ else
+ __gmp_binary_minus::eval(q, r, -static_cast<unsigned long>(l));
+}
+
+struct __gmp_binary_lshift
+{
+ static void eval(mpz_ptr z, mpz_srcptr w, mp_bitcnt_t l)
+ {
+ if (__GMPXX_CONSTANT(l) && (l == 0))
+ {
+ if (z != w) mpz_set(z, w);
+ }
+ else
+ mpz_mul_2exp(z, w, l);
+ }
+ static void eval(mpq_ptr q, mpq_srcptr r, mp_bitcnt_t l)
+ {
+ if (__GMPXX_CONSTANT(l) && (l == 0))
+ {
+ if (q != r) mpq_set(q, r);
+ }
+ else
+ mpq_mul_2exp(q, r, l);
+ }
+ static void eval(mpf_ptr f, mpf_srcptr g, mp_bitcnt_t l)
+ { mpf_mul_2exp(f, g, l); }
+};
+
+struct __gmp_binary_rshift
+{
+ static void eval(mpz_ptr z, mpz_srcptr w, mp_bitcnt_t l)
+ {
+ if (__GMPXX_CONSTANT(l) && (l == 0))
+ {
+ if (z != w) mpz_set(z, w);
+ }
+ else
+ mpz_fdiv_q_2exp(z, w, l);
+ }
+ static void eval(mpq_ptr q, mpq_srcptr r, mp_bitcnt_t l)
+ {
+ if (__GMPXX_CONSTANT(l) && (l == 0))
+ {
+ if (q != r) mpq_set(q, r);
+ }
+ else
+ mpq_div_2exp(q, r, l);
+ }
+ static void eval(mpf_ptr f, mpf_srcptr g, mp_bitcnt_t l)
+ { mpf_div_2exp(f, g, l); }
+};
+
struct __gmp_binary_multiplies
{
static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)
{ mpz_mul(z, w, v); }
static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
- { mpz_mul_ui(z, w, l); }
+ {
+// gcc-3.3 doesn't have __builtin_ctzl. Don't bother optimizing for old gcc.
+#if __GMP_GNUC_PREREQ(3, 4)
+ if (__GMPXX_CONSTANT(l) && (l & (l-1)) == 0)
+ {
+ if (l == 0)
+ {
+ z->_mp_size = 0;
+ }
+ else
+ {
+ __gmp_binary_lshift::eval(z, w, __builtin_ctzl(l));
+ }
+ }
+ else
+#endif
+ mpz_mul_ui(z, w, l);
+ }
static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
- { mpz_mul_ui(z, w, l); }
+ { eval(z, w, l); }
static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
- { mpz_mul_si (z, w, l); }
- static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
- { mpz_mul_si (z, w, l); }
- static void eval(mpz_ptr z, mpz_srcptr w, double d)
{
- mpz_t temp;
- mpz_init_set_d(temp, d);
- mpz_mul(z, w, temp);
- mpz_clear(temp);
+ if (__GMPXX_CONSTANT(l))
+ {
+ if (l >= 0)
+ eval(z, w, static_cast<unsigned long>(l));
+ else
+ {
+ eval(z, w, -static_cast<unsigned long>(l));
+ mpz_neg(z, z);
+ }
+ }
+ else
+ mpz_mul_si (z, w, l);
}
+ static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
+ { eval(z, w, l); }
+ static void eval(mpz_ptr z, mpz_srcptr w, double d)
+ { __GMPXX_TMPZ_D; mpz_mul (z, w, temp); }
static void eval(mpz_ptr z, double d, mpz_srcptr w)
- {
- mpz_t temp;
- mpz_init_set_d(temp, d);
- mpz_mul(z, temp, w);
- mpz_clear(temp);
- }
+ { eval(z, w, d); }
static void eval(mpq_ptr q, mpq_srcptr r, mpq_srcptr s)
{ mpq_mul(q, r, s); }
static void eval(mpq_ptr q, mpq_srcptr r, unsigned long int l)
{
- mpq_t temp;
- mpq_init(temp);
- mpq_set_ui(temp, l, 1);
- mpq_mul(q, r, temp);
- mpq_clear(temp);
+#if __GMP_GNUC_PREREQ(3, 4)
+ if (__GMPXX_CONSTANT(l) && (l & (l-1)) == 0)
+ {
+ if (l == 0)
+ {
+ mpq_set_ui(q, 0, 1);
+ }
+ else
+ {
+ __gmp_binary_lshift::eval(q, r, __builtin_ctzl(l));
+ }
+ }
+ else
+#endif
+ {
+ __GMPXX_TMPQ_UI;
+ mpq_mul (q, r, temp);
+ }
}
static void eval(mpq_ptr q, unsigned long int l, mpq_srcptr r)
- {
- mpq_t temp;
- mpq_init(temp);
- mpq_set_ui(temp, l, 1);
- mpq_mul(q, temp, r);
- mpq_clear(temp);
- }
+ { eval(q, r, l); }
static void eval(mpq_ptr q, mpq_srcptr r, signed long int l)
{
- mpq_t temp;
- mpq_init(temp);
- mpq_set_si(temp, l, 1);
- mpq_mul(q, r, temp);
- mpq_clear(temp);
+ if (__GMPXX_CONSTANT(l))
+ {
+ if (l >= 0)
+ eval(q, r, static_cast<unsigned long>(l));
+ else
+ {
+ eval(q, r, -static_cast<unsigned long>(l));
+ mpq_neg(q, q);
+ }
+ }
+ else
+ {
+ __GMPXX_TMPQ_SI;
+ mpq_mul (q, r, temp);
+ }
}
static void eval(mpq_ptr q, signed long int l, mpq_srcptr r)
- {
- mpq_t temp;
- mpq_init(temp);
- mpq_set_si(temp, l, 1);
- mpq_mul(q, temp, r);
- mpq_clear(temp);
- }
+ { eval(q, r, l); }
static void eval(mpq_ptr q, mpq_srcptr r, double d)
{
mpq_t temp;
mpq_clear(temp);
}
static void eval(mpq_ptr q, double d, mpq_srcptr r)
- {
- mpq_t temp;
- mpq_init(temp);
- mpq_set_d(temp, d);
- mpq_mul(q, temp, r);
- mpq_clear(temp);
- }
+ { eval(q, r, d); }
static void eval(mpf_ptr f, mpf_srcptr g, mpf_srcptr h)
{ mpf_mul(f, g, h); }
mpf_mul_ui(f, g, l);
else
{
- mpf_mul_ui(f, g, -l);
+ mpf_mul_ui(f, g, -static_cast<unsigned long>(l));
mpf_neg(f, f);
}
}
static void eval(mpf_ptr f, signed long int l, mpf_srcptr g)
- {
- if (l >= 0)
- mpf_mul_ui(f, g, l);
- else
- {
- mpf_mul_ui(f, g, -l);
- mpf_neg(f, f);
- }
- }
+ { eval(f, g, l); }
static void eval(mpf_ptr f, mpf_srcptr g, double d)
{
mpf_t temp;
mpf_clear(temp);
}
static void eval(mpf_ptr f, double d, mpf_srcptr g)
- {
- mpf_t temp;
- mpf_init2(temp, 8*sizeof(double));
- mpf_set_d(temp, d);
- mpf_mul(f, temp, g);
- mpf_clear(temp);
- }
+ { eval(f, g, d); }
};
struct __gmp_binary_divides
{ mpz_tdiv_q(z, w, v); }
static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
- { mpz_tdiv_q_ui(z, w, l); }
+ {
+#if __GMP_GNUC_PREREQ(3, 4)
+ // Don't optimize division by 0...
+ if (__GMPXX_CONSTANT(l) && (l & (l-1)) == 0 && l != 0)
+ {
+ if (l == 1)
+ {
+ if (z != w) mpz_set(z, w);
+ }
+ else
+ mpz_tdiv_q_2exp(z, w, __builtin_ctzl(l));
+ // warning: do not use rshift (fdiv)
+ }
+ else
+#endif
+ mpz_tdiv_q_ui(z, w, l);
+ }
static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
{
if (mpz_sgn(w) >= 0)
static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
{
if (l >= 0)
- mpz_tdiv_q_ui(z, w, l);
+ eval(z, w, static_cast<unsigned long>(l));
else
{
- mpz_tdiv_q_ui(z, w, -l);
+ eval(z, w, -static_cast<unsigned long>(l));
mpz_neg(z, z);
}
}
{
/* if w is bigger than a long then the quotient must be zero, unless
l==LONG_MIN and w==-LONG_MIN in which case the quotient is -1 */
- mpz_set_si (z, (mpz_cmpabs_ui (w, (l >= 0 ? l : -l)) == 0 ? -1 : 0));
+ mpz_set_si (z, (mpz_cmpabs_ui (w, __gmpxx_abs_ui(l)) == 0 ? -1 : 0));
}
}
static void eval(mpz_ptr z, mpz_srcptr w, double d)
- {
- mpz_t temp;
- mpz_init_set_d(temp, d);
- mpz_tdiv_q(z, w, temp);
- mpz_clear(temp);
- }
+ { __GMPXX_TMPZ_D; mpz_tdiv_q (z, w, temp); }
static void eval(mpz_ptr z, double d, mpz_srcptr w)
- {
- mpz_t temp;
- mpz_init_set_d(temp, d);
- mpz_tdiv_q(z, temp, w);
- mpz_clear(temp);
- }
+ { __GMPXX_TMPZ_D; mpz_tdiv_q (z, temp, w); }
static void eval(mpq_ptr q, mpq_srcptr r, mpq_srcptr s)
{ mpq_div(q, r, s); }
static void eval(mpq_ptr q, mpq_srcptr r, unsigned long int l)
{
- mpq_t temp;
- mpq_init(temp);
- mpq_set_ui(temp, l, 1);
- mpq_div(q, r, temp);
- mpq_clear(temp);
+#if __GMP_GNUC_PREREQ(3, 4)
+ if (__GMPXX_CONSTANT(l) && (l & (l-1)) == 0 && l != 0)
+ __gmp_binary_rshift::eval(q, r, __builtin_ctzl(l));
+ else
+#endif
+ {
+ __GMPXX_TMPQ_UI;
+ mpq_div (q, r, temp);
+ }
}
static void eval(mpq_ptr q, unsigned long int l, mpq_srcptr r)
- {
- mpq_t temp;
- mpq_init(temp);
- mpq_set_ui(temp, l, 1);
- mpq_div(q, temp, r);
- mpq_clear(temp);
- }
+ { __GMPXX_TMPQ_UI; mpq_div (q, temp, r); }
static void eval(mpq_ptr q, mpq_srcptr r, signed long int l)
{
- mpq_t temp;
- mpq_init(temp);
- mpq_set_si(temp, l, 1);
- mpq_div(q, r, temp);
- mpq_clear(temp);
+ if (__GMPXX_CONSTANT(l))
+ {
+ if (l >= 0)
+ eval(q, r, static_cast<unsigned long>(l));
+ else
+ {
+ eval(q, r, -static_cast<unsigned long>(l));
+ mpq_neg(q, q);
+ }
+ }
+ else
+ {
+ __GMPXX_TMPQ_SI;
+ mpq_div (q, r, temp);
+ }
}
static void eval(mpq_ptr q, signed long int l, mpq_srcptr r)
- {
- mpq_t temp;
- mpq_init(temp);
- mpq_set_si(temp, l, 1);
- mpq_div(q, temp, r);
- mpq_clear(temp);
- }
+ { __GMPXX_TMPQ_SI; mpq_div (q, temp, r); }
static void eval(mpq_ptr q, mpq_srcptr r, double d)
{
mpq_t temp;
mpf_div_ui(f, g, l);
else
{
- mpf_div_ui(f, g, -l);
+ mpf_div_ui(f, g, -static_cast<unsigned long>(l));
mpf_neg(f, f);
}
}
mpf_ui_div(f, l, g);
else
{
- mpf_ui_div(f, -l, g);
+ mpf_ui_div(f, -static_cast<unsigned long>(l), g);
mpf_neg(f, f);
}
}
}
static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
{
- mpz_tdiv_r_ui (z, w, (l >= 0 ? l : -l));
+ mpz_tdiv_r_ui (z, w, __gmpxx_abs_ui(l));
}
static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
{
{
/* if w is bigger than a long then the remainder is l unchanged,
unless l==LONG_MIN and w==-LONG_MIN in which case it's 0 */
- mpz_set_si (z, mpz_cmpabs_ui (w, (l >= 0 ? l : -l)) == 0 ? 0 : l);
+ mpz_set_si (z, mpz_cmpabs_ui (w, __gmpxx_abs_ui(l)) == 0 ? 0 : l);
}
}
static void eval(mpz_ptr z, mpz_srcptr w, double d)
- {
- mpz_t temp;
- mpz_init_set_d(temp, d);
- mpz_tdiv_r(z, w, temp);
- mpz_clear(temp);
- }
+ { __GMPXX_TMPZ_D; mpz_tdiv_r (z, w, temp); }
static void eval(mpz_ptr z, double d, mpz_srcptr w)
- {
- mpz_t temp;
- mpz_init_set_d(temp, d);
- mpz_tdiv_r(z, temp, w);
- mpz_clear(temp);
- }
+ { __GMPXX_TMPZ_D; mpz_tdiv_r (z, temp, w); }
};
-// Max allocations for plain types when converted to mpz_t
-#define __GMP_DBL_LIMBS (2 + DBL_MAX_EXP / GMP_NUMB_BITS)
-#define __GMP_ULI_LIMBS (1 + (8 * sizeof (long) - 1) / GMP_NUMB_BITS)
-
-#define __GMPXX_TMP_UI \
- mpz_t temp; \
- mp_limb_t limbs[__GMP_ULI_LIMBS]; \
- temp->_mp_d = limbs; \
- temp->_mp_alloc = __GMP_ULI_LIMBS; \
- mpz_set_ui (temp, l)
-#define __GMPXX_TMP_SI \
- mpz_t temp; \
- mp_limb_t limbs[__GMP_ULI_LIMBS]; \
- temp->_mp_d = limbs; \
- temp->_mp_alloc = __GMP_ULI_LIMBS; \
- mpz_set_si (temp, l)
-#define __GMPXX_TMP_D \
- mpz_t temp; \
- mp_limb_t limbs[__GMP_DBL_LIMBS]; \
- temp->_mp_d = limbs; \
- temp->_mp_alloc = __GMP_DBL_LIMBS; \
- mpz_set_d (temp, d)
-
struct __gmp_binary_and
{
static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)
{ mpz_and(z, w, v); }
static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
- { __GMPXX_TMP_UI; mpz_and (z, w, temp); }
+ { __GMPXX_TMPZ_UI; mpz_and (z, w, temp); }
static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
- { __GMPXX_TMP_UI; mpz_and (z, w, temp); }
+ { eval(z, w, l); }
static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
- { __GMPXX_TMP_SI; mpz_and (z, w, temp); }
+ { __GMPXX_TMPZ_SI; mpz_and (z, w, temp); }
static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
- { __GMPXX_TMP_SI; mpz_and (z, w, temp); }
+ { eval(z, w, l); }
static void eval(mpz_ptr z, mpz_srcptr w, double d)
- { __GMPXX_TMP_D; mpz_and (z, w, temp); }
+ { __GMPXX_TMPZ_D; mpz_and (z, w, temp); }
static void eval(mpz_ptr z, double d, mpz_srcptr w)
- { __GMPXX_TMP_D; mpz_and (z, w, temp); }
+ { eval(z, w, d); }
};
struct __gmp_binary_ior
static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)
{ mpz_ior(z, w, v); }
static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
- { __GMPXX_TMP_UI; mpz_ior (z, w, temp); }
+ { __GMPXX_TMPZ_UI; mpz_ior (z, w, temp); }
static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
- { __GMPXX_TMP_UI; mpz_ior (z, w, temp); }
+ { eval(z, w, l); }
static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
- { __GMPXX_TMP_SI; mpz_ior (z, w, temp); }
+ { __GMPXX_TMPZ_SI; mpz_ior (z, w, temp); }
static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
- { __GMPXX_TMP_SI; mpz_ior (z, w, temp); }
+ { eval(z, w, l); }
static void eval(mpz_ptr z, mpz_srcptr w, double d)
- { __GMPXX_TMP_D; mpz_ior (z, w, temp); }
+ { __GMPXX_TMPZ_D; mpz_ior (z, w, temp); }
static void eval(mpz_ptr z, double d, mpz_srcptr w)
- { __GMPXX_TMP_D; mpz_ior (z, w, temp); }
+ { eval(z, w, d); }
};
struct __gmp_binary_xor
static void eval(mpz_ptr z, mpz_srcptr w, mpz_srcptr v)
{ mpz_xor(z, w, v); }
static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
- { __GMPXX_TMP_UI; mpz_xor (z, w, temp); }
+ { __GMPXX_TMPZ_UI; mpz_xor (z, w, temp); }
static void eval(mpz_ptr z, unsigned long int l, mpz_srcptr w)
- { __GMPXX_TMP_UI; mpz_xor (z, w, temp); }
+ { eval(z, w, l); }
static void eval(mpz_ptr z, mpz_srcptr w, signed long int l)
- { __GMPXX_TMP_SI; mpz_xor (z, w, temp); }
+ { __GMPXX_TMPZ_SI; mpz_xor (z, w, temp); }
static void eval(mpz_ptr z, signed long int l, mpz_srcptr w)
- { __GMPXX_TMP_SI; mpz_xor (z, w, temp); }
+ { eval(z, w, l); }
static void eval(mpz_ptr z, mpz_srcptr w, double d)
- { __GMPXX_TMP_D; mpz_xor (z, w, temp); }
+ { __GMPXX_TMPZ_D; mpz_xor (z, w, temp); }
static void eval(mpz_ptr z, double d, mpz_srcptr w)
- { __GMPXX_TMP_D; mpz_xor (z, w, temp); }
-};
-
-struct __gmp_binary_lshift
-{
- static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
- { mpz_mul_2exp(z, w, l); }
- static void eval(mpq_ptr q, mpq_srcptr r, unsigned long int l)
- { mpq_mul_2exp(q, r, l); }
- static void eval(mpf_ptr f, mpf_srcptr g, unsigned long int l)
- { mpf_mul_2exp(f, g, l); }
-};
-
-struct __gmp_binary_rshift
-{
- static void eval(mpz_ptr z, mpz_srcptr w, unsigned long int l)
- { mpz_fdiv_q_2exp(z, w, l); }
- static void eval(mpq_ptr q, mpq_srcptr r, unsigned long int l)
- { mpq_div_2exp(q, r, l); }
- static void eval(mpf_ptr f, mpf_srcptr g, unsigned long int l)
- { mpf_div_2exp(f, g, l); }
+ { eval(z, w, d); }
};
struct __gmp_binary_equal
}
static bool eval(double d, mpq_srcptr q)
{
- bool b;
- mpq_t temp;
- mpq_init(temp);
- mpq_set_d(temp, d);
- b = (mpq_equal(temp, q) != 0);
- mpq_clear(temp);
- return b;
+ return eval(q, d);
}
static bool eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g) == 0; }
static bool eval(unsigned long int l, mpf_srcptr f)
{ return mpf_cmp_ui(f, l) == 0; }
static bool eval(mpf_srcptr f, signed long int l)
- { return mpf_cmp_si(f, l) == 0; }
- static bool eval(signed long int l, mpf_srcptr f)
- { return mpf_cmp_si(f, l) == 0; }
- static bool eval(mpf_srcptr f, double d)
- { return mpf_cmp_d(f, d) == 0; }
- static bool eval(double d, mpf_srcptr f)
- { return mpf_cmp_d(f, d) == 0; }
-};
-
-struct __gmp_binary_not_equal
-{
- static bool eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w) != 0; }
-
- static bool eval(mpz_srcptr z, unsigned long int l)
- { return mpz_cmp_ui(z, l) != 0; }
- static bool eval(unsigned long int l, mpz_srcptr z)
- { return mpz_cmp_ui(z, l) != 0; }
- static bool eval(mpz_srcptr z, signed long int l)
- { return mpz_cmp_si(z, l) != 0; }
- static bool eval(signed long int l, mpz_srcptr z)
- { return mpz_cmp_si(z, l) != 0; }
- static bool eval(mpz_srcptr z, double d)
- { return mpz_cmp_d(z, d) != 0; }
- static bool eval(double d, mpz_srcptr z)
- { return mpz_cmp_d(z, d) != 0; }
-
- static bool eval(mpq_srcptr q, mpq_srcptr r)
- { return mpq_equal(q, r) == 0; }
-
- static bool eval(mpq_srcptr q, unsigned long int l)
- { return mpq_cmp_ui(q, l, 1) != 0; }
- static bool eval(unsigned long int l, mpq_srcptr q)
- { return mpq_cmp_ui(q, l, 1) != 0; }
- static bool eval(mpq_srcptr q, signed long int l)
- { return mpq_cmp_si(q, l, 1) != 0; }
- static bool eval(signed long int l, mpq_srcptr q)
- { return mpq_cmp_si(q, l, 1) != 0; }
- static bool eval(mpq_srcptr q, double d)
- {
- bool b;
- mpq_t temp;
- mpq_init(temp);
- mpq_set_d(temp, d);
- b = (mpq_equal(q, temp) == 0);
- mpq_clear(temp);
- return b;
- }
- static bool eval(double d, mpq_srcptr q)
- {
- bool b;
- mpq_t temp;
- mpq_init(temp);
- mpq_set_d(temp, d);
- b = (mpq_equal(temp, q) == 0);
- mpq_clear(temp);
- return b;
- }
-
- static bool eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g) != 0; }
-
- static bool eval(mpf_srcptr f, unsigned long int l)
- { return mpf_cmp_ui(f, l) != 0; }
- static bool eval(unsigned long int l, mpf_srcptr f)
- { return mpf_cmp_ui(f, l) != 0; }
- static bool eval(mpf_srcptr f, signed long int l)
- { return mpf_cmp_si(f, l) != 0; }
- static bool eval(signed long int l, mpf_srcptr f)
- { return mpf_cmp_si(f, l) != 0; }
- static bool eval(mpf_srcptr f, double d)
- { return mpf_cmp_d(f, d) != 0; }
- static bool eval(double d, mpf_srcptr f)
- { return mpf_cmp_d(f, d) != 0; }
-};
-
-struct __gmp_binary_less
-{
- static bool eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w) < 0; }
-
- static bool eval(mpz_srcptr z, unsigned long int l)
- { return mpz_cmp_ui(z, l) < 0; }
- static bool eval(unsigned long int l, mpz_srcptr z)
- { return mpz_cmp_ui(z, l) > 0; }
- static bool eval(mpz_srcptr z, signed long int l)
- { return mpz_cmp_si(z, l) < 0; }
- static bool eval(signed long int l, mpz_srcptr z)
- { return mpz_cmp_si(z, l) > 0; }
- static bool eval(mpz_srcptr z, double d)
- { return mpz_cmp_d(z, d) < 0; }
- static bool eval(double d, mpz_srcptr z)
- { return mpz_cmp_d(z, d) > 0; }
-
- static bool eval(mpq_srcptr q, mpq_srcptr r) { return mpq_cmp(q, r) < 0; }
-
- static bool eval(mpq_srcptr q, unsigned long int l)
- { return mpq_cmp_ui(q, l, 1) < 0; }
- static bool eval(unsigned long int l, mpq_srcptr q)
- { return mpq_cmp_ui(q, l, 1) > 0; }
- static bool eval(mpq_srcptr q, signed long int l)
- { return mpq_cmp_si(q, l, 1) < 0; }
- static bool eval(signed long int l, mpq_srcptr q)
- { return mpq_cmp_si(q, l, 1) > 0; }
- static bool eval(mpq_srcptr q, double d)
- {
- bool b;
- mpq_t temp;
- mpq_init(temp);
- mpq_set_d(temp, d);
- b = (mpq_cmp(q, temp) < 0);
- mpq_clear(temp);
- return b;
- }
- static bool eval(double d, mpq_srcptr q)
- {
- bool b;
- mpq_t temp;
- mpq_init(temp);
- mpq_set_d(temp, d);
- b = (mpq_cmp(temp, q) < 0);
- mpq_clear(temp);
- return b;
- }
-
- static bool eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g) < 0; }
-
- static bool eval(mpf_srcptr f, unsigned long int l)
- { return mpf_cmp_ui(f, l) < 0; }
- static bool eval(unsigned long int l, mpf_srcptr f)
- { return mpf_cmp_ui(f, l) > 0; }
- static bool eval(mpf_srcptr f, signed long int l)
- { return mpf_cmp_si(f, l) < 0; }
+ { return mpf_cmp_si(f, l) == 0; }
static bool eval(signed long int l, mpf_srcptr f)
- { return mpf_cmp_si(f, l) > 0; }
+ { return mpf_cmp_si(f, l) == 0; }
static bool eval(mpf_srcptr f, double d)
- { return mpf_cmp_d(f, d) < 0; }
+ { return mpf_cmp_d(f, d) == 0; }
static bool eval(double d, mpf_srcptr f)
- { return mpf_cmp_d(f, d) > 0; }
+ { return mpf_cmp_d(f, d) == 0; }
};
-struct __gmp_binary_less_equal
+struct __gmp_binary_less
{
- static bool eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w) <= 0; }
+ static bool eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w) < 0; }
static bool eval(mpz_srcptr z, unsigned long int l)
- { return mpz_cmp_ui(z, l) <= 0; }
+ { return mpz_cmp_ui(z, l) < 0; }
static bool eval(unsigned long int l, mpz_srcptr z)
- { return mpz_cmp_ui(z, l) >= 0; }
+ { return mpz_cmp_ui(z, l) > 0; }
static bool eval(mpz_srcptr z, signed long int l)
- { return mpz_cmp_si(z, l) <= 0; }
+ { return mpz_cmp_si(z, l) < 0; }
static bool eval(signed long int l, mpz_srcptr z)
- { return mpz_cmp_si(z, l) >= 0; }
+ { return mpz_cmp_si(z, l) > 0; }
static bool eval(mpz_srcptr z, double d)
- { return mpz_cmp_d(z, d) <= 0; }
+ { return mpz_cmp_d(z, d) < 0; }
static bool eval(double d, mpz_srcptr z)
- { return mpz_cmp_d(z, d) >= 0; }
+ { return mpz_cmp_d(z, d) > 0; }
- static bool eval(mpq_srcptr q, mpq_srcptr r) { return mpq_cmp(q, r) <= 0; }
+ static bool eval(mpq_srcptr q, mpq_srcptr r) { return mpq_cmp(q, r) < 0; }
static bool eval(mpq_srcptr q, unsigned long int l)
- { return mpq_cmp_ui(q, l, 1) <= 0; }
+ { return mpq_cmp_ui(q, l, 1) < 0; }
static bool eval(unsigned long int l, mpq_srcptr q)
- { return mpq_cmp_ui(q, l, 1) >= 0; }
+ { return mpq_cmp_ui(q, l, 1) > 0; }
static bool eval(mpq_srcptr q, signed long int l)
- { return mpq_cmp_si(q, l, 1) <= 0; }
+ { return mpq_cmp_si(q, l, 1) < 0; }
static bool eval(signed long int l, mpq_srcptr q)
- { return mpq_cmp_si(q, l, 1) >= 0; }
+ { return mpq_cmp_si(q, l, 1) > 0; }
static bool eval(mpq_srcptr q, double d)
{
bool b;
mpq_t temp;
mpq_init(temp);
mpq_set_d(temp, d);
- b = (mpq_cmp(q, temp) <= 0);
+ b = (mpq_cmp(q, temp) < 0);
mpq_clear(temp);
return b;
}
mpq_t temp;
mpq_init(temp);
mpq_set_d(temp, d);
- b = (mpq_cmp(temp, q) <= 0);
+ b = (mpq_cmp(temp, q) < 0);
mpq_clear(temp);
return b;
}
- static bool eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g) <= 0; }
+ static bool eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g) < 0; }
static bool eval(mpf_srcptr f, unsigned long int l)
- { return mpf_cmp_ui(f, l) <= 0; }
+ { return mpf_cmp_ui(f, l) < 0; }
static bool eval(unsigned long int l, mpf_srcptr f)
- { return mpf_cmp_ui(f, l) >= 0; }
+ { return mpf_cmp_ui(f, l) > 0; }
static bool eval(mpf_srcptr f, signed long int l)
- { return mpf_cmp_si(f, l) <= 0; }
+ { return mpf_cmp_si(f, l) < 0; }
static bool eval(signed long int l, mpf_srcptr f)
- { return mpf_cmp_si(f, l) >= 0; }
+ { return mpf_cmp_si(f, l) > 0; }
static bool eval(mpf_srcptr f, double d)
- { return mpf_cmp_d(f, d) <= 0; }
+ { return mpf_cmp_d(f, d) < 0; }
static bool eval(double d, mpf_srcptr f)
- { return mpf_cmp_d(f, d) >= 0; }
+ { return mpf_cmp_d(f, d) > 0; }
};
struct __gmp_binary_greater
{ return mpf_cmp_d(f, d) < 0; }
};
-struct __gmp_binary_greater_equal
-{
- static bool eval(mpz_srcptr z, mpz_srcptr w) { return mpz_cmp(z, w) >= 0; }
-
- static bool eval(mpz_srcptr z, unsigned long int l)
- { return mpz_cmp_ui(z, l) >= 0; }
- static bool eval(unsigned long int l, mpz_srcptr z)
- { return mpz_cmp_ui(z, l) <= 0; }
- static bool eval(mpz_srcptr z, signed long int l)
- { return mpz_cmp_si(z, l) >= 0; }
- static bool eval(signed long int l, mpz_srcptr z)
- { return mpz_cmp_si(z, l) <= 0; }
- static bool eval(mpz_srcptr z, double d)
- { return mpz_cmp_d(z, d) >= 0; }
- static bool eval(double d, mpz_srcptr z)
- { return mpz_cmp_d(z, d) <= 0; }
-
- static bool eval(mpq_srcptr q, mpq_srcptr r) { return mpq_cmp(q, r) >= 0; }
-
- static bool eval(mpq_srcptr q, unsigned long int l)
- { return mpq_cmp_ui(q, l, 1) >= 0; }
- static bool eval(unsigned long int l, mpq_srcptr q)
- { return mpq_cmp_ui(q, l, 1) <= 0; }
- static bool eval(mpq_srcptr q, signed long int l)
- { return mpq_cmp_si(q, l, 1) >= 0; }
- static bool eval(signed long int l, mpq_srcptr q)
- { return mpq_cmp_si(q, l, 1) <= 0; }
- static bool eval(mpq_srcptr q, double d)
- {
- bool b;
- mpq_t temp;
- mpq_init(temp);
- mpq_set_d(temp, d);
- b = (mpq_cmp(q, temp) >= 0);
- mpq_clear(temp);
- return b;
- }
- static bool eval(double d, mpq_srcptr q)
- {
- bool b;
- mpq_t temp;
- mpq_init(temp);
- mpq_set_d(temp, d);
- b = (mpq_cmp(temp, q) >= 0);
- mpq_clear(temp);
- return b;
- }
-
- static bool eval(mpf_srcptr f, mpf_srcptr g) { return mpf_cmp(f, g) >= 0; }
-
- static bool eval(mpf_srcptr f, unsigned long int l)
- { return mpf_cmp_ui(f, l) >= 0; }
- static bool eval(unsigned long int l, mpf_srcptr f)
- { return mpf_cmp_ui(f, l) <= 0; }
- static bool eval(mpf_srcptr f, signed long int l)
- { return mpf_cmp_si(f, l) >= 0; }
- static bool eval(signed long int l, mpf_srcptr f)
- { return mpf_cmp_si(f, l) <= 0; }
- static bool eval(mpf_srcptr f, double d)
- { return mpf_cmp_d(f, d) >= 0; }
- static bool eval(double d, mpf_srcptr f)
- { return mpf_cmp_d(f, d) <= 0; }
-};
-
struct __gmp_unary_increment
{
static void eval(mpz_ptr z) { mpz_add_ui(z, z, 1); }
mpf_clear(temp);
}
static void eval(mpf_ptr f, unsigned long int l, mpf_srcptr g)
- {
- mpf_t temp;
- mpf_init2(temp, mpf_get_prec(f));
- mpf_mul(temp, g, g);
- mpf_set_ui(f, l);
- mpf_mul(f, f, f);
- mpf_add(f, f, temp);
- mpf_sqrt(f, f);
- mpf_clear(temp);
- }
+ { eval(f, g, l); }
static void eval(mpf_ptr f, mpf_srcptr g, signed long int l)
{
mpf_t temp;
mpf_clear(temp);
}
static void eval(mpf_ptr f, signed long int l, mpf_srcptr g)
- {
- mpf_t temp;
- mpf_init2(temp, mpf_get_prec(f));
- mpf_mul(temp, g, g);
- mpf_set_si(f, l);
- mpf_mul(f, f, f);
- mpf_add(f, f, temp);
- mpf_sqrt(f, f);
- mpf_clear(temp);
- }
+ { eval(f, g, l); }
static void eval(mpf_ptr f, mpf_srcptr g, double d)
{
mpf_t temp;
mpf_clear(temp);
}
static void eval(mpf_ptr f, double d, mpf_srcptr g)
- {
- mpf_t temp;
- mpf_init2(temp, mpf_get_prec(f));
- mpf_mul(temp, g, g);
- mpf_set_d(f, d);
- mpf_mul(f, f, f);
- mpf_add(f, f, temp);
- mpf_sqrt(f, f);
- mpf_clear(temp);
- }
+ { eval(f, g, d); }
};
struct __gmp_sgn_function
struct __gmp_rand_function
{
- static void eval(mpz_ptr z, gmp_randstate_t s, unsigned long int l)
+ static void eval(mpz_ptr z, gmp_randstate_t s, mp_bitcnt_t l)
{ mpz_urandomb(z, s, l); }
static void eval(mpz_ptr z, gmp_randstate_t s, mpz_srcptr w)
{ mpz_urandomm(z, s, w); }
{
typedef mpz_t value_type;
typedef mpz_ptr ptr_type;
+ typedef mpz_srcptr srcptr_type;
};
template <>
{
typedef mpq_t value_type;
typedef mpq_ptr ptr_type;
+ typedef mpq_srcptr srcptr_type;
};
template <>
{
typedef mpf_t value_type;
typedef mpf_ptr ptr_type;
+ typedef mpf_srcptr srcptr_type;
};
template <>
typedef mpf_t value_type;
};
+#if __GMPXX_USE_CXX11
+namespace std {
+ template <class T, class U, class V, class W>
+ struct common_type <__gmp_expr<T, U>, __gmp_expr<V, W> >
+ {
+ private:
+ typedef typename __gmp_resolve_expr<T, V>::value_type X;
+ public:
+ typedef __gmp_expr<X, X> type;
+ };
-
-template <class T, class U, class V>
-struct __gmp_resolve_temp
-{
- typedef __gmp_expr<T, T> temp_type;
-};
-
-template <class T>
-struct __gmp_resolve_temp<T, T, T>
-{
- typedef const __gmp_expr<T, T> & temp_type;
-};
-
+ template <class T, class U>
+ struct common_type <__gmp_expr<T, U>, __gmp_expr<T, U> >
+ {
+ typedef __gmp_expr<T, U> type;
+ };
+
+#define __GMPXX_DECLARE_COMMON_TYPE(typ) \
+ template <class T, class U> \
+ struct common_type <__gmp_expr<T, U>, typ > \
+ { \
+ typedef __gmp_expr<T, T> type; \
+ }; \
+ \
+ template <class T, class U> \
+ struct common_type <typ, __gmp_expr<T, U> > \
+ { \
+ typedef __gmp_expr<T, T> type; \
+ }
+
+ __GMPXX_DECLARE_COMMON_TYPE(signed char);
+ __GMPXX_DECLARE_COMMON_TYPE(unsigned char);
+ __GMPXX_DECLARE_COMMON_TYPE(signed int);
+ __GMPXX_DECLARE_COMMON_TYPE(unsigned int);
+ __GMPXX_DECLARE_COMMON_TYPE(signed short int);
+ __GMPXX_DECLARE_COMMON_TYPE(unsigned short int);
+ __GMPXX_DECLARE_COMMON_TYPE(signed long int);
+ __GMPXX_DECLARE_COMMON_TYPE(unsigned long int);
+ __GMPXX_DECLARE_COMMON_TYPE(float);
+ __GMPXX_DECLARE_COMMON_TYPE(double);
+#undef __GMPXX_DECLARE_COMMON_TYPE
+}
+#endif
// classes for evaluating unary and binary expressions
template <class T, class Op>
};
-// functions for evaluating expressions
-template <class T, class U>
-void __gmp_set_expr(mpz_ptr, const __gmp_expr<T, U> &);
-template <class T, class U>
-void __gmp_set_expr(mpq_ptr, const __gmp_expr<T, U> &);
-template <class T, class U>
-void __gmp_set_expr(mpf_ptr, const __gmp_expr<T, U> &);
-
/**************** Macros for in-class declarations ****************/
/* This is just repetitive code that is easier to maintain if it's written
__gmp_expr & fun(unsigned long int); \
__gmp_expr & fun(float); \
__gmp_expr & fun(double); \
- __gmp_expr & fun(long double);
+ /* __gmp_expr & fun(long double); */
#define __GMP_DECLARE_COMPOUND_OPERATOR(fun) \
__GMPP_DECLARE_COMPOUND_OPERATOR(fun) \
__GMPN_DECLARE_COMPOUND_OPERATOR(fun)
#define __GMP_DECLARE_COMPOUND_OPERATOR_UI(fun) \
- __gmp_expr & fun(unsigned long int);
+ __gmp_expr & fun(mp_bitcnt_t);
#define __GMP_DECLARE_INCREMENT_OPERATOR(fun) \
inline __gmp_expr & fun(); \
__gmp_expr() { mpz_init(mp); }
__gmp_expr(const __gmp_expr &z) { mpz_init_set(mp, z.mp); }
+#if __GMPXX_USE_CXX11
+ __gmp_expr(__gmp_expr &&z)
+ { *mp = *z.mp; mpz_init(z.mp); }
+#endif
+ template <class T>
+ __gmp_expr(const __gmp_expr<mpz_t, T> &expr)
+ { mpz_init(mp); __gmp_set_expr(mp, expr); }
template <class T, class U>
- __gmp_expr(const __gmp_expr<T, U> &expr)
+ explicit __gmp_expr(const __gmp_expr<T, U> &expr)
{ mpz_init(mp); __gmp_set_expr(mp, expr); }
__gmp_expr(signed char c) { mpz_init_set_si(mp, c); }
__gmp_expr(double d) { mpz_init_set_d(mp, d); }
// __gmp_expr(long double ld) { mpz_init_set_d(mp, ld); }
- explicit __gmp_expr(const char *s)
- {
- if (mpz_init_set_str (mp, s, 0) != 0)
- {
- mpz_clear (mp);
- throw std::invalid_argument ("mpz_set_str");
- }
- }
- __gmp_expr(const char *s, int base)
+ explicit __gmp_expr(const char *s, int base = 0)
{
if (mpz_init_set_str (mp, s, base) != 0)
{
throw std::invalid_argument ("mpz_set_str");
}
}
- explicit __gmp_expr(const std::string &s)
- {
- if (mpz_init_set_str (mp, s.c_str(), 0) != 0)
- {
- mpz_clear (mp);
- throw std::invalid_argument ("mpz_set_str");
- }
- }
- __gmp_expr(const std::string &s, int base)
+ explicit __gmp_expr(const std::string &s, int base = 0)
{
if (mpz_init_set_str(mp, s.c_str(), base) != 0)
{
~__gmp_expr() { mpz_clear(mp); }
+ void swap(__gmp_expr& z) __GMPXX_NOEXCEPT { std::swap(*mp, *z.mp); }
+
// assignment operators
__gmp_expr & operator=(const __gmp_expr &z)
{ mpz_set(mp, z.mp); return *this; }
+#if __GMPXX_USE_CXX11
+ __gmp_expr & operator=(__gmp_expr &&z) noexcept
+ { swap(z); return *this; }
+#endif
template <class T, class U>
__gmp_expr<value_type, value_type> & operator=(const __gmp_expr<T, U> &expr)
{ __gmp_set_expr(mp, expr); return *this; }
// bool fits_double_p() const { return mpz_fits_double_p(mp); }
// bool fits_ldouble_p() const { return mpz_fits_ldouble_p(mp); }
+#if __GMPXX_USE_CXX11
+ explicit operator bool() const { return mp->_mp_size != 0; }
+#endif
+
// member operators
__GMP_DECLARE_COMPOUND_OPERATOR(operator+=)
__GMP_DECLARE_COMPOUND_OPERATOR(operator-=)
// constructors and destructor
__gmp_expr() { mpq_init(mp); }
- __gmp_expr(const __gmp_expr &q) { mpq_init(mp); mpq_set(mp, q.mp); }
+ __gmp_expr(const __gmp_expr &q)
+ {
+ mpz_init_set(mpq_numref(mp), mpq_numref(q.mp));
+ mpz_init_set(mpq_denref(mp), mpq_denref(q.mp));
+ }
+#if __GMPXX_USE_CXX11
+ __gmp_expr(__gmp_expr &&q)
+ { *mp = *q.mp; mpq_init(q.mp); }
+#endif
+ template <class T>
+ __gmp_expr(const __gmp_expr<mpz_t, T> &expr)
+ { mpq_init(mp); __gmp_set_expr(mp, expr); }
+ template <class T>
+ __gmp_expr(const __gmp_expr<mpq_t, T> &expr)
+ { mpq_init(mp); __gmp_set_expr(mp, expr); }
template <class T, class U>
- __gmp_expr(const __gmp_expr<T, U> &expr)
+ explicit __gmp_expr(const __gmp_expr<T, U> &expr)
{ mpq_init(mp); __gmp_set_expr(mp, expr); }
__gmp_expr(signed char c) { mpq_init(mp); mpq_set_si(mp, c, 1); }
__gmp_expr(double d) { mpq_init(mp); mpq_set_d(mp, d); }
// __gmp_expr(long double ld) { mpq_init(mp); mpq_set_ld(mp, ld); }
- explicit __gmp_expr(const char *s)
- {
- mpq_init (mp);
- if (mpq_set_str (mp, s, 0) != 0)
- {
- mpq_clear (mp);
- throw std::invalid_argument ("mpq_set_str");
- }
- }
- __gmp_expr(const char *s, int base)
+ explicit __gmp_expr(const char *s, int base = 0)
{
mpq_init (mp);
- if (mpq_set_str(mp, s, base) != 0)
+ // If s is the literal 0, we meant to call another constructor.
+ // If s just happens to evaluate to 0, we would crash, so whatever.
+ if (s == 0)
{
- mpq_clear (mp);
- throw std::invalid_argument ("mpq_set_str");
+ // Don't turn mpq_class(0,0) into 0
+ mpz_set_si(mpq_denref(mp), base);
}
- }
- explicit __gmp_expr(const std::string &s)
- {
- mpq_init (mp);
- if (mpq_set_str (mp, s.c_str(), 0) != 0)
+ else if (mpq_set_str(mp, s, base) != 0)
{
mpq_clear (mp);
throw std::invalid_argument ("mpq_set_str");
}
}
- __gmp_expr(const std::string &s, int base)
+ explicit __gmp_expr(const std::string &s, int base = 0)
{
mpq_init(mp);
if (mpq_set_str (mp, s.c_str(), base) != 0)
throw std::invalid_argument ("mpq_set_str");
}
}
- explicit __gmp_expr(mpq_srcptr q) { mpq_init(mp); mpq_set(mp, q); }
+ explicit __gmp_expr(mpq_srcptr q)
+ {
+ mpz_init_set(mpq_numref(mp), mpq_numref(q));
+ mpz_init_set(mpq_denref(mp), mpq_denref(q));
+ }
__gmp_expr(const mpz_class &num, const mpz_class &den)
{
- mpq_init(mp);
- mpz_set(mpq_numref(mp), num.get_mpz_t());
- mpz_set(mpq_denref(mp), den.get_mpz_t());
+ mpz_init_set(mpq_numref(mp), num.get_mpz_t());
+ mpz_init_set(mpq_denref(mp), den.get_mpz_t());
}
~__gmp_expr() { mpq_clear(mp); }
+ void swap(__gmp_expr& q) __GMPXX_NOEXCEPT { std::swap(*mp, *q.mp); }
+
// assignment operators
__gmp_expr & operator=(const __gmp_expr &q)
{ mpq_set(mp, q.mp); return *this; }
+#if __GMPXX_USE_CXX11
+ __gmp_expr & operator=(__gmp_expr &&q) noexcept
+ { swap(q); return *this; }
+ __gmp_expr & operator=(mpz_class &&z) noexcept
+ { get_num() = std::move(z); get_den() = 1u; return *this; }
+#endif
template <class T, class U>
__gmp_expr<value_type, value_type> & operator=(const __gmp_expr<T, U> &expr)
{ __gmp_set_expr(mp, expr); return *this; }
double get_d() const { return mpq_get_d(mp); }
+#if __GMPXX_USE_CXX11
+ explicit operator bool() const { return mpq_numref(mp)->_mp_size != 0; }
+#endif
+
// compound assignments
__GMP_DECLARE_COMPOUND_OPERATOR(operator+=)
__GMP_DECLARE_COMPOUND_OPERATOR(operator-=)
__gmp_expr(const __gmp_expr &f)
{ mpf_init2(mp, f.get_prec()); mpf_set(mp, f.mp); }
+#if __GMPXX_USE_CXX11
+ __gmp_expr(__gmp_expr &&f)
+ { *mp = *f.mp; mpf_init2(f.mp, get_prec()); }
+#endif
__gmp_expr(const __gmp_expr &f, mp_bitcnt_t prec)
{ mpf_init2(mp, prec); mpf_set(mp, f.mp); }
template <class T, class U>
~__gmp_expr() { mpf_clear(mp); }
+ void swap(__gmp_expr& f) __GMPXX_NOEXCEPT { std::swap(*mp, *f.mp); }
+
// assignment operators
__gmp_expr & operator=(const __gmp_expr &f)
{ mpf_set(mp, f.mp); return *this; }
+#if __GMPXX_USE_CXX11
+ __gmp_expr & operator=(__gmp_expr &&f) noexcept
+ { swap(f); return *this; }
+#endif
template <class T, class U>
__gmp_expr<value_type, value_type> & operator=(const __gmp_expr<T, U> &expr)
{ __gmp_set_expr(mp, expr); return *this; }
// bool fits_double_p() const { return mpf_fits_double_p(mp); }
// bool fits_ldouble_p() const { return mpf_fits_ldouble_p(mp); }
+#if __GMPXX_USE_CXX11
+ explicit operator bool() const { return mp->_mp_size != 0; }
+#endif
+
// compound assignments
__GMP_DECLARE_COMPOUND_OPERATOR(operator+=)
__GMP_DECLARE_COMPOUND_OPERATOR(operator-=)
-/**************** I/O operators ****************/
+/**************** User-defined literals ****************/
-// these should (and will) be provided separately
+#if __GMPXX_USE_CXX11
+inline mpz_class operator"" _mpz(const char* s)
+{
+ return mpz_class(s);
+}
-template <class T>
-inline std::ostream & operator<<
-(std::ostream &o, const __gmp_expr<T, T> &expr)
+inline mpq_class operator"" _mpq(const char* s)
+{
+ mpq_class q;
+ q.get_num() = s;
+ return q;
+}
+
+inline mpf_class operator"" _mpf(const char* s)
{
- return o << expr.__get_mp();
+ return mpf_class(s);
}
+#endif
+
+/**************** I/O operators ****************/
+
+// these should (and will) be provided separately
template <class T, class U>
inline std::ostream & operator<<
(std::ostream &o, const __gmp_expr<T, U> &expr)
{
- __gmp_expr<T, T> temp(expr);
+ __gmp_expr<T, T> const& temp(expr);
return o << temp.__get_mp();
}
-
template <class T>
inline std::istream & operator>>(std::istream &i, __gmp_expr<T, T> &expr)
{
return i >> expr.__get_mp();
}
+/*
+// you might want to uncomment this
inline std::istream & operator>>(std::istream &i, mpq_class &q)
{
i >> q.get_mpq_t();
- // q.canonicalize(); // you might want to uncomment this
+ q.canonicalize();
return i;
}
+*/
/**************** Functions for type conversion ****************/
-template <>
inline void __gmp_set_expr(mpz_ptr z, const mpz_class &w)
{
mpz_set(z, w.get_mpz_t());
expr.eval(z);
}
-template <>
-inline void __gmp_set_expr(mpz_ptr z, const mpq_class &q)
-{
- mpz_set_q(z, q.get_mpq_t());
-}
-
template <class T>
inline void __gmp_set_expr(mpz_ptr z, const __gmp_expr<mpq_t, T> &expr)
{
- mpq_class temp(expr);
+ mpq_class const& temp(expr);
mpz_set_q(z, temp.get_mpq_t());
}
-template <class T>
-inline void __gmp_set_expr(mpz_ptr z, const mpf_class &f)
-{
- mpz_set_f(z, f.get_mpf_t());
-}
-
template <class T>
inline void __gmp_set_expr(mpz_ptr z, const __gmp_expr<mpf_t, T> &expr)
{
- mpf_class temp(expr);
+ mpf_class const& temp(expr);
mpz_set_f(z, temp.get_mpf_t());
}
-template <>
inline void __gmp_set_expr(mpq_ptr q, const mpz_class &z)
{
mpq_set_z(q, z.get_mpz_t());
template <class T>
inline void __gmp_set_expr(mpq_ptr q, const __gmp_expr<mpz_t, T> &expr)
{
- mpz_class temp(expr);
- mpq_set_z(q, temp.get_mpz_t());
+ __gmp_set_expr(mpq_numref(q), expr);
+ mpz_set_ui(mpq_denref(q), 1);
}
-template <>
inline void __gmp_set_expr(mpq_ptr q, const mpq_class &r)
{
mpq_set(q, r.get_mpq_t());
expr.eval(q);
}
-template <class T>
-inline void __gmp_set_expr(mpq_ptr q, const mpf_class &f)
-{
- mpq_set_f(q, f.get_mpf_t());
-}
-
template <class T>
inline void __gmp_set_expr(mpq_ptr q, const __gmp_expr<mpf_t, T> &expr)
{
- mpf_class temp(expr);
+ mpf_class const& temp(expr);
mpq_set_f(q, temp.get_mpf_t());
}
-template <class T>
-inline void __gmp_set_expr(mpf_ptr f, const mpz_class &z)
-{
- mpf_set_z(f, z.get_mpz_t());
-}
-
template <class T>
inline void __gmp_set_expr(mpf_ptr f, const __gmp_expr<mpz_t, T> &expr)
{
- mpz_class temp(expr);
+ mpz_class const& temp(expr);
mpf_set_z(f, temp.get_mpz_t());
}
-template <class T>
-inline void __gmp_set_expr(mpf_ptr f, const mpq_class &q)
-{
- mpf_set_q(f, q.get_mpq_t());
-}
-
template <class T>
inline void __gmp_set_expr(mpf_ptr f, const __gmp_expr<mpq_t, T> &expr)
{
- mpq_class temp(expr);
+ mpq_class const& temp(expr);
mpf_set_q(f, temp.get_mpq_t());
}
-template <>
inline void __gmp_set_expr(mpf_ptr f, const mpf_class &g)
{
mpf_set(f, g.get_mpf_t());
template <class T>
inline void __gmp_set_expr(mpf_ptr f, const __gmp_expr<mpf_t, T> &expr)
{
- expr.eval(f, mpf_get_prec(f));
+ expr.eval(f);
}
+/* Temporary objects */
+
+template <class T>
+class __gmp_temp
+{
+ __gmp_expr<T, T> val;
+ public:
+ template<class U, class V>
+ __gmp_temp(U const& u, V) : val (u) {}
+ typename __gmp_resolve_expr<T>::srcptr_type
+ __get_mp() const { return val.__get_mp(); }
+};
+
+template <>
+class __gmp_temp <mpf_t>
+{
+ mpf_class val;
+ public:
+ template<class U>
+ __gmp_temp(U const& u, mpf_ptr res) : val (u, mpf_get_prec(res)) {}
+ mpf_srcptr __get_mp() const { return val.__get_mp(); }
+};
+
/**************** Specializations of __gmp_expr ****************/
/* The eval() method of __gmp_expr<T, U> evaluates the corresponding
expression and assigns the result to its argument, which is either an
__gmp_unary_expr<val_type, Op> expr;
public:
- __gmp_expr(const val_type &val) : expr(val) { }
- void eval(typename __gmp_resolve_expr<T>::ptr_type p,
- unsigned long int = 0) const
+ explicit __gmp_expr(const val_type &val) : expr(val) { }
+ void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
{ Op::eval(p, expr.val.__get_mp()); }
const val_type & get_val() const { return expr.val; }
- unsigned long int get_prec() const { return expr.val.get_prec(); }
+ mp_bitcnt_t get_prec() const { return expr.val.get_prec(); }
};
__gmp_unary_expr<val_type, Op> expr;
public:
- __gmp_expr(const val_type &val) : expr(val) { }
+ explicit __gmp_expr(const val_type &val) : expr(val) { }
void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
- { __gmp_expr<T, T> temp(expr.val); Op::eval(p, temp.__get_mp()); }
- void eval(typename __gmp_resolve_expr<T>::ptr_type p,
- mp_bitcnt_t prec) const
- { __gmp_expr<T, T> temp(expr.val, prec); Op::eval(p, temp.__get_mp()); }
+ { expr.val.eval(p); Op::eval(p, p); }
const val_type & get_val() const { return expr.val; }
- unsigned long int get_prec() const { return expr.val.get_prec(); }
+ mp_bitcnt_t get_prec() const { return expr.val.get_prec(); }
};
public:
__gmp_expr(const val1_type &val1, const val2_type &val2)
: expr(val1, val2) { }
- void eval(typename __gmp_resolve_expr<T>::ptr_type p,
- unsigned long int = 0) const
+ void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
{ Op::eval(p, expr.val1.__get_mp(), expr.val2.__get_mp()); }
const val1_type & get_val1() const { return expr.val1; }
const val2_type & get_val2() const { return expr.val2; }
- unsigned long int get_prec() const
+ mp_bitcnt_t get_prec() const
{
mp_bitcnt_t prec1 = expr.val1.get_prec(),
prec2 = expr.val2.get_prec();
public:
__gmp_expr(const val1_type &val1, const val2_type &val2)
: expr(val1, val2) { }
- void eval(typename __gmp_resolve_expr<T>::ptr_type p,
- unsigned long int = 0) const
+ void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
{ Op::eval(p, expr.val1.__get_mp(), expr.val2); }
const val1_type & get_val1() const { return expr.val1; }
const val2_type & get_val2() const { return expr.val2; }
- unsigned long int get_prec() const { return expr.val1.get_prec(); }
+ mp_bitcnt_t get_prec() const { return expr.val1.get_prec(); }
};
template <class T, class U, class Op>
public:
__gmp_expr(const val1_type &val1, const val2_type &val2)
: expr(val1, val2) { }
- void eval(typename __gmp_resolve_expr<T>::ptr_type p,
- unsigned long int = 0) const
+ void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
{ Op::eval(p, expr.val1, expr.val2.__get_mp()); }
const val1_type & get_val1() const { return expr.val1; }
const val2_type & get_val2() const { return expr.val2; }
- unsigned long int get_prec() const { return expr.val2.get_prec(); }
+ mp_bitcnt_t get_prec() const { return expr.val2.get_prec(); }
};
: expr(val1, val2) { }
void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
{
- __gmp_expr<T, T> temp(expr.val2);
- Op::eval(p, expr.val1.__get_mp(), temp.__get_mp());
- }
- void eval(typename __gmp_resolve_expr<T>::ptr_type p,
- mp_bitcnt_t prec) const
- {
- __gmp_expr<T, T> temp(expr.val2, prec);
- Op::eval(p, expr.val1.__get_mp(), temp.__get_mp());
+ if(p != expr.val1.__get_mp())
+ {
+ __gmp_set_expr(p, expr.val2);
+ Op::eval(p, expr.val1.__get_mp(), p);
+ }
+ else
+ {
+ __gmp_temp<T> temp(expr.val2, p);
+ Op::eval(p, expr.val1.__get_mp(), temp.__get_mp());
+ }
}
const val1_type & get_val1() const { return expr.val1; }
const val2_type & get_val2() const { return expr.val2; }
- unsigned long int get_prec() const
+ mp_bitcnt_t get_prec() const
{
mp_bitcnt_t prec1 = expr.val1.get_prec(),
prec2 = expr.val2.get_prec();
: expr(val1, val2) { }
void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
{
- __gmp_expr<T, T> temp(expr.val1);
- Op::eval(p, temp.__get_mp(), expr.val2.__get_mp());
- }
- void eval(typename __gmp_resolve_expr<T>::ptr_type p,
- mp_bitcnt_t prec) const
- {
- __gmp_expr<T, T> temp(expr.val1, prec);
- Op::eval(p, temp.__get_mp(), expr.val2.__get_mp());
+ if(p != expr.val2.__get_mp())
+ {
+ __gmp_set_expr(p, expr.val1);
+ Op::eval(p, p, expr.val2.__get_mp());
+ }
+ else
+ {
+ __gmp_temp<T> temp(expr.val1, p);
+ Op::eval(p, temp.__get_mp(), expr.val2.__get_mp());
+ }
}
const val1_type & get_val1() const { return expr.val1; }
const val2_type & get_val2() const { return expr.val2; }
- unsigned long int get_prec() const
+ mp_bitcnt_t get_prec() const
{
mp_bitcnt_t prec1 = expr.val1.get_prec(),
prec2 = expr.val2.get_prec();
: expr(val1, val2) { }
void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
{
- __gmp_expr<T, T> temp(expr.val2);
- Op::eval(p, expr.val1.__get_mp(), temp.__get_mp());
- }
- void eval(typename __gmp_resolve_expr<T>::ptr_type p,
- mp_bitcnt_t prec) const
- {
- __gmp_expr<T, T> temp(expr.val2, prec);
- Op::eval(p, expr.val1.__get_mp(), temp.__get_mp());
+ if(p != expr.val1.__get_mp())
+ {
+ __gmp_set_expr(p, expr.val2);
+ Op::eval(p, expr.val1.__get_mp(), p);
+ }
+ else
+ {
+ __gmp_temp<T> temp(expr.val2, p);
+ Op::eval(p, expr.val1.__get_mp(), temp.__get_mp());
+ }
}
const val1_type & get_val1() const { return expr.val1; }
const val2_type & get_val2() const { return expr.val2; }
- unsigned long int get_prec() const
+ mp_bitcnt_t get_prec() const
{
mp_bitcnt_t prec1 = expr.val1.get_prec(),
prec2 = expr.val2.get_prec();
: expr(val1, val2) { }
void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
{
- __gmp_expr<T, T> temp(expr.val1);
- Op::eval(p, temp.__get_mp(), expr.val2.__get_mp());
- }
- void eval(typename __gmp_resolve_expr<T>::ptr_type p,
- mp_bitcnt_t prec) const
- {
- __gmp_expr<T, T> temp(expr.val1, prec);
- Op::eval(p, temp.__get_mp(), expr.val2.__get_mp());
+ if(p != expr.val2.__get_mp())
+ {
+ __gmp_set_expr(p, expr.val1);
+ Op::eval(p, p, expr.val2.__get_mp());
+ }
+ else
+ {
+ __gmp_temp<T> temp(expr.val1, p);
+ Op::eval(p, temp.__get_mp(), expr.val2.__get_mp());
+ }
}
const val1_type & get_val1() const { return expr.val1; }
const val2_type & get_val2() const { return expr.val2; }
- unsigned long int get_prec() const
+ mp_bitcnt_t get_prec() const
{
mp_bitcnt_t prec1 = expr.val1.get_prec(),
prec2 = expr.val2.get_prec();
: expr(val1, val2) { }
void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
{
- __gmp_expr<T, T> temp(expr.val1);
- Op::eval(p, temp.__get_mp(), expr.val2);
- }
- void eval(typename __gmp_resolve_expr<T>::ptr_type p,
- mp_bitcnt_t prec) const
- {
- __gmp_expr<T, T> temp(expr.val1, prec);
- Op::eval(p, temp.__get_mp(), expr.val2);
+ expr.val1.eval(p);
+ Op::eval(p, p, expr.val2);
}
const val1_type & get_val1() const { return expr.val1; }
const val2_type & get_val2() const { return expr.val2; }
- unsigned long int get_prec() const { return expr.val1.get_prec(); }
+ mp_bitcnt_t get_prec() const { return expr.val1.get_prec(); }
};
template <class T, class U, class V, class Op>
: expr(val1, val2) { }
void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
{
- __gmp_expr<T, T> temp(expr.val2);
- Op::eval(p, expr.val1, temp.__get_mp());
- }
- void eval(typename __gmp_resolve_expr<T>::ptr_type p,
- mp_bitcnt_t prec) const
- {
- __gmp_expr<T, T> temp(expr.val2, prec);
- Op::eval(p, expr.val1, temp.__get_mp());
+ expr.val2.eval(p);
+ Op::eval(p, expr.val1, p);
}
const val1_type & get_val1() const { return expr.val1; }
const val2_type & get_val2() const { return expr.val2; }
- unsigned long int get_prec() const { return expr.val2.get_prec(); }
+ mp_bitcnt_t get_prec() const { return expr.val2.get_prec(); }
};
: expr(val1, val2) { }
void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
{
- __gmp_expr<T, T> temp1(expr.val1), temp2(expr.val2);
- Op::eval(p, temp1.__get_mp(), temp2.__get_mp());
- }
- void eval(typename __gmp_resolve_expr<T>::ptr_type p,
- mp_bitcnt_t prec) const
- {
- __gmp_expr<T, T> temp1(expr.val1, prec), temp2(expr.val2, prec);
- Op::eval(p, temp1.__get_mp(), temp2.__get_mp());
+ __gmp_temp<T> temp2(expr.val2, p);
+ expr.val1.eval(p);
+ Op::eval(p, p, temp2.__get_mp());
}
const val1_type & get_val1() const { return expr.val1; }
const val2_type & get_val2() const { return expr.val2; }
- unsigned long int get_prec() const
+ mp_bitcnt_t get_prec() const
{
mp_bitcnt_t prec1 = expr.val1.get_prec(),
prec2 = expr.val2.get_prec();
: expr(val1, val2) { }
void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
{
- __gmp_expr<T, T> temp1(expr.val1), temp2(expr.val2);
- Op::eval(p, temp1.__get_mp(), temp2.__get_mp());
- }
- void eval(typename __gmp_resolve_expr<T>::ptr_type p,
- mp_bitcnt_t prec) const
- {
- __gmp_expr<T, T> temp1(expr.val1, prec), temp2(expr.val2, prec);
- Op::eval(p, temp1.__get_mp(), temp2.__get_mp());
+ __gmp_temp<T> temp1(expr.val1, p);
+ expr.val2.eval(p);
+ Op::eval(p, temp1.__get_mp(), p);
}
const val1_type & get_val1() const { return expr.val1; }
const val2_type & get_val2() const { return expr.val2; }
- unsigned long int get_prec() const
+ mp_bitcnt_t get_prec() const
{
mp_bitcnt_t prec1 = expr.val1.get_prec(),
prec2 = expr.val2.get_prec();
: expr(val1, val2) { }
void eval(typename __gmp_resolve_expr<T>::ptr_type p) const
{
- __gmp_expr<T, T> temp1(expr.val1), temp2(expr.val2);
- Op::eval(p, temp1.__get_mp(), temp2.__get_mp());
- }
- void eval(typename __gmp_resolve_expr<T>::ptr_type p,
- mp_bitcnt_t prec) const
- {
- __gmp_expr<T, T> temp1(expr.val1, prec), temp2(expr.val2, prec);
- Op::eval(p, temp1.__get_mp(), temp2.__get_mp());
+ __gmp_temp<T> temp2(expr.val2, p);
+ expr.val1.eval(p);
+ Op::eval(p, p, temp2.__get_mp());
}
const val1_type & get_val1() const { return expr.val1; }
const val2_type & get_val2() const { return expr.val2; }
- unsigned long int get_prec() const
+ mp_bitcnt_t get_prec() const
{
mp_bitcnt_t prec1 = expr.val1.get_prec(),
prec2 = expr.val2.get_prec();
{ eval_fun::eval(q, expr.val1.get_mpz_t(), expr.val2.get_mpq_t()); } \
const val1_type & get_val1() const { return expr.val1; } \
const val2_type & get_val2() const { return expr.val2; } \
- unsigned long int get_prec() const { return mpf_get_default_prec(); } \
+ mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); } \
}; \
\
template <> \
{ eval_fun::eval(q, expr.val1.get_mpq_t(), expr.val2.get_mpz_t()); } \
const val1_type & get_val1() const { return expr.val1; } \
const val2_type & get_val2() const { return expr.val2; } \
- unsigned long int get_prec() const { return mpf_get_default_prec(); } \
+ mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); } \
}; \
\
template <class T> \
} \
const val1_type & get_val1() const { return expr.val1; } \
const val2_type & get_val2() const { return expr.val2; } \
- unsigned long int get_prec() const { return mpf_get_default_prec(); } \
+ mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); } \
}; \
\
template <class T> \
} \
const val1_type & get_val1() const { return expr.val1; } \
const val2_type & get_val2() const { return expr.val2; } \
- unsigned long int get_prec() const { return mpf_get_default_prec(); } \
+ mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); } \
}; \
\
template <class T> \
} \
const val1_type & get_val1() const { return expr.val1; } \
const val2_type & get_val2() const { return expr.val2; } \
- unsigned long int get_prec() const { return mpf_get_default_prec(); } \
+ mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); } \
}; \
\
template <class T> \
} \
const val1_type & get_val1() const { return expr.val1; } \
const val2_type & get_val2() const { return expr.val2; } \
- unsigned long int get_prec() const { return mpf_get_default_prec(); } \
+ mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); } \
}; \
\
template <class T, class U> \
void eval(mpq_ptr q) const \
{ \
mpz_class temp1(expr.val1); \
- mpq_class temp2(expr.val2); \
- eval_fun::eval(q, temp1.get_mpz_t(), temp2.get_mpq_t()); \
+ expr.val2.eval(q); \
+ eval_fun::eval(q, temp1.get_mpz_t(), q); \
} \
const val1_type & get_val1() const { return expr.val1; } \
const val2_type & get_val2() const { return expr.val2; } \
- unsigned long int get_prec() const { return mpf_get_default_prec(); } \
+ mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); } \
}; \
\
template <class T, class U> \
: expr(val1, val2) { } \
void eval(mpq_ptr q) const \
{ \
- mpq_class temp1(expr.val1); \
mpz_class temp2(expr.val2); \
- eval_fun::eval(q, temp1.get_mpq_t(), temp2.get_mpz_t()); \
+ expr.val1.eval(q); \
+ eval_fun::eval(q, q, temp2.get_mpz_t()); \
} \
const val1_type & get_val1() const { return expr.val1; } \
const val2_type & get_val2() const { return expr.val2; } \
- unsigned long int get_prec() const { return mpf_get_default_prec(); } \
+ mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); } \
};
template <class T, class U> \
inline type fun(const __gmp_expr<T, U> &expr) \
{ \
- typename __gmp_resolve_temp<T, T, U>::temp_type temp(expr); \
+ __gmp_expr<T, T> const& temp(expr); \
return eval_fun::eval(temp.__get_mp()); \
}
__GMPNU_DEFINE_BINARY_FUNCTION(fun, eval_fun, unsigned long int) \
__GMPND_DEFINE_BINARY_FUNCTION(fun, eval_fun, float) \
__GMPND_DEFINE_BINARY_FUNCTION(fun, eval_fun, double) \
-__GMPNLD_DEFINE_BINARY_FUNCTION(fun, eval_fun, long double)
+/* __GMPNLD_DEFINE_BINARY_FUNCTION(fun, eval_fun, long double) */
#define __GMP_DEFINE_BINARY_FUNCTION(fun, eval_fun) \
__GMPP_DEFINE_BINARY_FUNCTION(fun, eval_fun) \
\
template <class T, class U> \
inline __gmp_expr \
-<T, __gmp_binary_expr<__gmp_expr<T, U>, unsigned long int, eval_fun> > \
-fun(const __gmp_expr<T, U> &expr, unsigned long int l) \
+<T, __gmp_binary_expr<__gmp_expr<T, U>, mp_bitcnt_t, eval_fun> > \
+fun(const __gmp_expr<T, U> &expr, mp_bitcnt_t l) \
{ \
return __gmp_expr<T, __gmp_binary_expr \
- <__gmp_expr<T, U>, unsigned long int, eval_fun> >(expr, l); \
+ <__gmp_expr<T, U>, mp_bitcnt_t, eval_fun> >(expr, l); \
}
const __gmp_expr<V, W> &expr2) \
{ \
typedef typename __gmp_resolve_expr<T, V>::value_type eval_type; \
- typename __gmp_resolve_temp<eval_type, T, U>::temp_type temp1(expr1); \
- typename __gmp_resolve_temp<eval_type, V, W>::temp_type temp2(expr2); \
+ __gmp_expr<eval_type, eval_type> const& temp1(expr1); \
+ __gmp_expr<eval_type, eval_type> const& temp2(expr2); \
return eval_fun::eval(temp1.__get_mp(), temp2.__get_mp()); \
}
template <class T, class U> \
inline type fun(const __gmp_expr<T, U> &expr, type2 t) \
{ \
- typename __gmp_resolve_temp<T, T, U>::temp_type temp(expr); \
+ __gmp_expr<T, T> const& temp(expr); \
return eval_fun::eval(temp.__get_mp(), static_cast<bigtype>(t)); \
} \
\
template <class T, class U> \
inline type fun(type2 t, const __gmp_expr<T, U> &expr) \
{ \
- typename __gmp_resolve_temp<T, T, U>::temp_type temp(expr); \
+ __gmp_expr<T, T> const& temp(expr); \
return eval_fun::eval(static_cast<bigtype>(t), temp.__get_mp()); \
}
__GMPNU_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, unsigned long int) \
__GMPND_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, float) \
__GMPND_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, double) \
-__GMPNLD_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, long double)
+/* __GMPNLD_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun, long double) */
#define __GMP_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun) \
__GMPP_DEFINE_BINARY_TYPE_FUNCTION(type, fun, eval_fun) \
#define __GMP_DEFINE_COMPOUND_OPERATOR_UI(type, fun, eval_fun) \
\
-inline type##_class & type##_class::fun(unsigned long int l) \
+inline type##_class & type##_class::fun(mp_bitcnt_t l) \
{ \
__gmp_set_expr(mp, __gmp_expr<type##_t, __gmp_binary_expr \
- <type##_class, unsigned long int, eval_fun> >(*this, l)); \
+ <type##_class, mp_bitcnt_t, eval_fun> >(*this, l)); \
return *this; \
}
__GMP_DEFINE_BINARY_FUNCTION_UI(operator>>, __gmp_binary_rshift)
__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator==, __gmp_binary_equal)
-__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator!=, __gmp_binary_not_equal)
+__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator!=, ! __gmp_binary_equal)
__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator<, __gmp_binary_less)
-__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator<=, __gmp_binary_less_equal)
+__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator<=, ! __gmp_binary_greater)
__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator>, __gmp_binary_greater)
-__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator>=, \
- __gmp_binary_greater_equal)
+__GMP_DEFINE_BINARY_TYPE_FUNCTION(bool, operator>=, ! __gmp_binary_less)
__GMP_DEFINE_UNARY_FUNCTION(abs, __gmp_abs_function)
__GMP_DEFINE_UNARY_FUNCTION(trunc, __gmp_trunc_function)
__GMP_DEFINE_UNARY_TYPE_FUNCTION(int, sgn, __gmp_sgn_function)
__GMP_DEFINE_BINARY_TYPE_FUNCTION(int, cmp, __gmp_cmp_function)
+template <class T>
+void swap(__gmp_expr<T, T>& x, __gmp_expr<T, T>& y) __GMPXX_NOEXCEPT
+{ x.swap(y); }
+
// member operators for mpz_class
__GMPZ_DEFINE_COMPOUND_OPERATOR(operator+=, __gmp_binary_plus)
{
private:
__gmp_randstate_struct *state;
- unsigned long int bits;
+ mp_bitcnt_t bits;
public:
- __gmp_expr(gmp_randstate_t s, unsigned long int l) : state(s), bits(l) { }
+ __gmp_expr(gmp_randstate_t s, mp_bitcnt_t l) : state(s), bits(l) { }
void eval(mpz_ptr z) const { __gmp_rand_function::eval(z, state, bits); }
- unsigned long int get_prec() const { return mpf_get_default_prec(); }
+ mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }
};
template <>
__gmp_expr(gmp_randstate_t s, const mpz_class &z) : state(s), range(z) { }
void eval(mpz_ptr z) const
{ __gmp_rand_function::eval(z, state, range.get_mpz_t()); }
- unsigned long int get_prec() const { return mpf_get_default_prec(); }
+ mp_bitcnt_t get_prec() const { return mpf_get_default_prec(); }
};
template <>
{
private:
__gmp_randstate_struct *state;
- unsigned long int bits;
+ mp_bitcnt_t bits;
public:
- __gmp_expr(gmp_randstate_t s, unsigned long int l) : state(s), bits(l) { }
- void eval(mpf_ptr f, mp_bitcnt_t prec) const
- { __gmp_rand_function::eval(f, state, (bits>0) ? get_prec() : prec); }
- unsigned long int get_prec() const
+ __gmp_expr(gmp_randstate_t s, mp_bitcnt_t l) : state(s), bits(l) { }
+ void eval(mpf_ptr f) const
+ {
+ __gmp_rand_function::eval(f, state,
+ (bits>0) ? bits : mpf_get_prec(f));
+ }
+ mp_bitcnt_t get_prec() const
{
if (bits == 0)
return mpf_get_default_prec();
extern "C" {
typedef void __gmp_randinit_default_t (gmp_randstate_t);
- typedef void __gmp_randinit_lc_2exp_t (gmp_randstate_t, mpz_srcptr, unsigned long int, unsigned long int);
- typedef int __gmp_randinit_lc_2exp_size_t (gmp_randstate_t, unsigned long int);
+ typedef void __gmp_randinit_lc_2exp_t (gmp_randstate_t, mpz_srcptr, unsigned long int, mp_bitcnt_t);
+ typedef int __gmp_randinit_lc_2exp_size_t (gmp_randstate_t, mp_bitcnt_t);
}
class gmp_randclass
// gmp_randinit_lc_2exp
gmp_randclass(__gmp_randinit_lc_2exp_t* f,
- mpz_class z, unsigned long int l1, unsigned long int l2)
+ mpz_class z, unsigned long int l1, mp_bitcnt_t l2)
{ f(state, z.get_mpz_t(), l1, l2); }
// gmp_randinit_lc_2exp_size
gmp_randclass(__gmp_randinit_lc_2exp_size_t* f,
- unsigned long int size)
+ mp_bitcnt_t size)
{
if (f (state, size) == 0)
throw std::length_error ("gmp_randinit_lc_2exp_size");
void seed(const mpz_class &z) { gmp_randseed(state, z.get_mpz_t()); }
// get random number
- __gmp_expr<mpz_t, __gmp_urandomb_value> get_z_bits(unsigned long int l)
+ __gmp_expr<mpz_t, __gmp_urandomb_value> get_z_bits(mp_bitcnt_t l)
{ return __gmp_expr<mpz_t, __gmp_urandomb_value>(state, l); }
__gmp_expr<mpz_t, __gmp_urandomb_value> get_z_bits(const mpz_class &z)
{ return get_z_bits(z.get_ui()); }
+ // FIXME: z.get_bitcnt_t() ?
__gmp_expr<mpz_t, __gmp_urandomm_value> get_z_range(const mpz_class &z)
{ return __gmp_expr<mpz_t, __gmp_urandomm_value>(state, z); }
};
+/**************** Specialize std::numeric_limits ****************/
+
+namespace std {
+ template <> class numeric_limits<mpz_class>
+ {
+ public:
+ static const bool is_specialized = true;
+ static mpz_class min() { return mpz_class(); }
+ static mpz_class max() { return mpz_class(); }
+ static mpz_class lowest() { return mpz_class(); }
+ static const int digits = 0;
+ static const int digits10 = 0;
+ static const int max_digits10 = 0;
+ static const bool is_signed = true;
+ static const bool is_integer = true;
+ static const bool is_exact = true;
+ static const int radix = 2;
+ static mpz_class epsilon() { return mpz_class(); }
+ static mpz_class round_error() { return mpz_class(); }
+ static const int min_exponent = 0;
+ static const int min_exponent10 = 0;
+ static const int max_exponent = 0;
+ static const int max_exponent10 = 0;
+ static const bool has_infinity = false;
+ static const bool has_quiet_NaN = false;
+ static const bool has_signaling_NaN = false;
+ static const float_denorm_style has_denorm = denorm_absent;
+ static const bool has_denorm_loss = false;
+ static mpz_class infinity() { return mpz_class(); }
+ static mpz_class quiet_NaN() { return mpz_class(); }
+ static mpz_class signaling_NaN() { return mpz_class(); }
+ static mpz_class denorm_min() { return mpz_class(); }
+ static const bool is_iec559 = false;
+ static const bool is_bounded = false;
+ static const bool is_modulo = false;
+ static const bool traps = false;
+ static const bool tinyness_before = false;
+ static const float_round_style round_style = round_toward_zero;
+ };
+
+ template <> class numeric_limits<mpq_class>
+ {
+ public:
+ static const bool is_specialized = true;
+ static mpq_class min() { return mpq_class(); }
+ static mpq_class max() { return mpq_class(); }
+ static mpq_class lowest() { return mpq_class(); }
+ static const int digits = 0;
+ static const int digits10 = 0;
+ static const int max_digits10 = 0;
+ static const bool is_signed = true;
+ static const bool is_integer = false;
+ static const bool is_exact = true;
+ static const int radix = 2;
+ static mpq_class epsilon() { return mpq_class(); }
+ static mpq_class round_error() { return mpq_class(); }
+ static const int min_exponent = 0;
+ static const int min_exponent10 = 0;
+ static const int max_exponent = 0;
+ static const int max_exponent10 = 0;
+ static const bool has_infinity = false;
+ static const bool has_quiet_NaN = false;
+ static const bool has_signaling_NaN = false;
+ static const float_denorm_style has_denorm = denorm_absent;
+ static const bool has_denorm_loss = false;
+ static mpq_class infinity() { return mpq_class(); }
+ static mpq_class quiet_NaN() { return mpq_class(); }
+ static mpq_class signaling_NaN() { return mpq_class(); }
+ static mpq_class denorm_min() { return mpq_class(); }
+ static const bool is_iec559 = false;
+ static const bool is_bounded = false;
+ static const bool is_modulo = false;
+ static const bool traps = false;
+ static const bool tinyness_before = false;
+ static const float_round_style round_style = round_toward_zero;
+ };
+
+ template <> class numeric_limits<mpf_class>
+ {
+ public:
+ static const bool is_specialized = true;
+ static mpf_class min() { return mpf_class(); }
+ static mpf_class max() { return mpf_class(); }
+ static mpf_class lowest() { return mpf_class(); }
+ static const int digits = 0;
+ static const int digits10 = 0;
+ static const int max_digits10 = 0;
+ static const bool is_signed = true;
+ static const bool is_integer = false;
+ static const bool is_exact = false;
+ static const int radix = 2;
+ static mpf_class epsilon() { return mpf_class(); }
+ static mpf_class round_error() { return mpf_class(); }
+ static const int min_exponent = 0;
+ static const int min_exponent10 = 0;
+ static const int max_exponent = 0;
+ static const int max_exponent10 = 0;
+ static const bool has_infinity = false;
+ static const bool has_quiet_NaN = false;
+ static const bool has_signaling_NaN = false;
+ static const float_denorm_style has_denorm = denorm_absent;
+ static const bool has_denorm_loss = false;
+ static mpf_class infinity() { return mpf_class(); }
+ static mpf_class quiet_NaN() { return mpf_class(); }
+ static mpf_class signaling_NaN() { return mpf_class(); }
+ static mpf_class denorm_min() { return mpf_class(); }
+ static const bool is_iec559 = false;
+ static const bool is_bounded = false;
+ static const bool is_modulo = false;
+ static const bool traps = false;
+ static const bool tinyness_before = false;
+ static const float_round_style round_style = round_indeterminate;
+ };
+}
+
+
/**************** #undef all private macros ****************/
#undef __GMPP_DECLARE_COMPOUND_OPERATOR
#undef __GMP_DEFINE_BINARY_TYPE_FUNCTION
#undef __GMPZ_DEFINE_COMPOUND_OPERATOR
-#undef __GMPZN_DEFINE_COMPOUND_OPERATOR
-#undef __GMPZNN_DEFINE_COMPOUND_OPERATOR
-#undef __GMPZNS_DEFINE_COMPOUND_OPERATOR
-#undef __GMPZNU_DEFINE_COMPOUND_OPERATOR
-#undef __GMPZND_DEFINE_COMPOUND_OPERATOR
-#undef __GMPZNLD_DEFINE_COMPOUND_OPERATOR
#undef __GMPP_DEFINE_COMPOUND_OPERATOR
#undef __GMPNN_DEFINE_COMPOUND_OPERATOR
#undef __GMPQ_DEFINE_INCREMENT_OPERATOR
#undef __GMPF_DEFINE_INCREMENT_OPERATOR
+#undef __GMPXX_CONSTANT
+
#endif /* __GMP_PLUSPLUS__ */
+++ /dev/null
-itom
-xtom
-move
-madd
-msub
-mult
-mdiv
-sdiv
-msqrt
-pow
-rpow
-gcd
-mcmp
-min
-mout
-mtox
-mfree
-__gmp_set_memory_functions
/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
Copyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
-2004, 2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc.
+2004, 2005, 2007, 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
This file is free software; you can redistribute it and/or modify it under the
terms of the GNU Lesser General Public License as published by the Free
#define __MPN(x) __##x
#endif
-#ifndef _PROTO
-#if (__STDC__-0) || defined (__cplusplus)
-#define _PROTO(x) x
-#else
-#define _PROTO(x) ()
-#endif
-#endif
-
/* Define auxiliary asm macros.
1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two
#if ! defined (count_leading_zeros) && ! defined (LONGLONG_STANDALONE)
#if HAVE_ATTRIBUTE_CONST
-long __MPN(count_leading_zeros) _PROTO ((UDItype)) __attribute__ ((const));
+long __MPN(count_leading_zeros) (UDItype) __attribute__ ((const));
#else
-long __MPN(count_leading_zeros) _PROTO ((UDItype));
+long __MPN(count_leading_zeros) (UDItype);
#endif
#define count_leading_zeros(count, x) \
((count) = __MPN(count_leading_zeros) (x))
#endif /* clz using mpn */
#endif /* __alpha */
+#if defined (__AVR) && W_TYPE_SIZE == 8
+#define umul_ppmm(ph, pl, m0, m1) \
+ do { \
+ unsigned short __p = (unsigned short) (m0) * (m1); \
+ (ph) = __p >> 8; \
+ (pl) = __p; \
+ } while (0)
+#endif /* AVR */
+
#if defined (_CRAY) && W_TYPE_SIZE == 64
#include <intrinsics.h>
#define UDIV_PREINV_ALWAYS 1
#define UDIV_NEEDS_NORMALIZATION 1
#define UDIV_TIME 220
-long __MPN(count_leading_zeros) _PROTO ((UDItype));
+long __MPN(count_leading_zeros) (UDItype);
#define count_leading_zeros(count, x) \
((count) = _leadz ((UWtype) (x)))
#if defined (_CRAYIEEE) /* I.e., Cray T90/ieee, T3D, and T3E */
"rIJ" ((USItype) (bl)))
#endif
-#if defined (__arm__) && W_TYPE_SIZE == 32
+#if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
__asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3" \
: "=r" (sh), "=&r" (sl) \
(q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \
(r) = __r; \
} while (0)
-extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
+extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
#define UDIV_TIME 200
#endif /* LONGLONG_STANDALONE */
#endif
-#if defined (__ARM_ARCH_5__)
-/* This actually requires arm 5 */
+/* This is a bizarre test, but GCC doesn't define useful common symbol. */
+#if defined (__ARM_ARCH_5__) || defined (__ARM_ARCH_5T__) || \
+ defined (__ARM_ARCH_5E__) || defined (__ARM_ARCH_5TE__)|| \
+ defined (__ARM_ARCH_6__) || defined (__ARM_ARCH_6J__) || \
+ defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6Z__) || \
+ defined (__ARM_ARCH_6ZK__)|| defined (__ARM_ARCH_6T2__)|| \
+ defined (__ARM_ARCH_6M__) || defined (__ARM_ARCH_7__) || \
+ defined (__ARM_ARCH_7A__) || defined (__ARM_ARCH_7R__) || \
+ defined (__ARM_ARCH_7M__) || defined (__ARM_ARCH_7EM__)
#define count_leading_zeros(count, x) \
__asm__ ("clz\t%0, %1" : "=r" (count) : "r" (x))
#define COUNT_LEADING_ZEROS_0 32
#endif
#endif /* __arm__ */
+#if defined (__aarch64__) && W_TYPE_SIZE == 64
+/* FIXME: Extend the immediate range for the low word by using both
+ ADDS and SUBS, since they set carry in the same way. */
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ __asm__ ("adds\t%1, %x4, %5\n\tadc\t%0, %x2, %x3" \
+ : "=r" (sh), "=&r" (sl) \
+ : "rZ" (ah), "rZ" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ __asm__ ("subs\t%1, %x4, %5\n\tsbc\t%0, %x2, %x3" \
+ : "=r,r" (sh), "=&r,&r" (sl) \
+ : "rZ,rZ" (ah), "rZ,rZ" (bh), "r,Z" (al), "rI,r" (bl) __CLOBBER_CC)
+#define umul_ppmm(ph, pl, m0, m1) \
+ do { \
+ UDItype __m0 = (m0), __m1 = (m1); \
+ __asm__ ("umulh\t%0, %1, %2" : "=r" (ph) : "r" (m0), "r" (m1)); \
+ (pl) = __m0 * __m1; \
+ } while (0)
+#define count_leading_zeros(count, x) \
+ __asm__ ("clz\t%0, %1" : "=r" (count) : "r" (x))
+#define COUNT_LEADING_ZEROS_0 64
+#endif /* __aarch64__ */
+
#if defined (__clipper__) && W_TYPE_SIZE == 32
#define umul_ppmm(w1, w0, u, v) \
({union {UDItype __ll; \
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
do { \
if (__builtin_constant_p (bh) && (bh) == 0) \
- __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
+ __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
- __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
+ __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
else \
- __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
+ __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \
: "=r" (sh), "=&r" (sl) \
: "r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
} while (0)
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
do { \
if (__builtin_constant_p (ah) && (ah) == 0) \
- __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
+ __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \
- __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
+ __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
else if (__builtin_constant_p (bh) && (bh) == 0) \
- __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
+ __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
- __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
+ __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
else \
- __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
+ __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
} while (0)
#define count_leading_zeros(count, x) \
- __asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x))
+ __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x))
#define COUNT_LEADING_ZEROS_0 32
#if HAVE_HOST_CPU_FAMILY_powerpc
#if __GMP_GNUC_PREREQ (4,4)
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
do { \
if (__builtin_constant_p (bh) && (bh) == 0) \
- __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
+ __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
- __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
+ __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
else \
- __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
+ __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \
: "=r" (sh), "=&r" (sl) \
: "r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
} while (0)
do { \
if (__builtin_constant_p (bl) && bl > -0x8000 && bl <= 0x8000) { \
if (__builtin_constant_p (ah) && (ah) == 0) \
- __asm__ ("{ai|addic} %1,%3,%4\n\t{sfze|subfze} %0,%2" \
+ __asm__ ("addic %1,%3,%4\n\tsubfze %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "*rI" (-bl)); \
else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
- __asm__ ("{ai|addic} %1,%3,%4\n\t{sfme|subfme} %0,%2" \
+ __asm__ ("addic %1,%3,%4\n\tsubfme %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "*rI" (-bl)); \
else if (__builtin_constant_p (bh) && (bh) == 0) \
- __asm__ ("{ai|addic} %1,%3,%4\n\t{ame|addme} %0,%2" \
+ __asm__ ("addic %1,%3,%4\n\taddme %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "*rI" (-bl)); \
else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
- __asm__ ("{ai|addic} %1,%3,%4\n\t{aze|addze} %0,%2" \
+ __asm__ ("addic %1,%3,%4\n\taddze %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "*rI" (-bl)); \
else \
- __asm__ ("{ai|addic} %1,%4,%5\n\t{sfe|subfe} %0,%3,%2" \
+ __asm__ ("addic %1,%4,%5\n\tsubfe %0,%3,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" (ah), "r" (bh), "rI" (al), "*rI" (-bl)); \
} else { \
if (__builtin_constant_p (ah) && (ah) == 0) \
- __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
+ __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl)); \
else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
- __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
+ __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl)); \
else if (__builtin_constant_p (bh) && (bh) == 0) \
- __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
+ __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl)); \
else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
- __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
+ __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \
: "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl)); \
else \
- __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
+ __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \
: "=r" (sh), "=&r" (sl) \
: "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
} \
} while (0)
#endif /* RT/ROMP */
-#if defined (__sh2__) && W_TYPE_SIZE == 32
+#if (defined (__SH2__) || defined (__SH3__) || defined (__SH4__)) && W_TYPE_SIZE == 32
#define umul_ppmm(w1, w0, u, v) \
__asm__ ("dmulu.l %2,%3\n\tsts macl,%1\n\tsts mach,%0" \
: "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "macl", "mach")
(q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \
(r) = __r; \
} while (0)
-extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
+extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
#ifndef UDIV_TIME
#define UDIV_TIME 140
#endif
__CLOBBER_CC)
#endif
-#if defined (__vax__) && W_TYPE_SIZE == 32
+#if (defined (__vax) || defined (__vax__)) && W_TYPE_SIZE == 32
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
__asm__ ("addl2 %5,%1\n\tadwc %3,%0" \
: "=g" (sh), "=&g" (sl) \
: "g" ((USItype) (x))); \
} while (0)
#endif
-#endif /* __vax__ */
+#endif /* vax */
#if defined (__z8000__) && W_TYPE_SIZE == 16
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
#endif /* NO_ASM */
+/* FIXME: "sidi" here is highly doubtful, should sometimes be "diti". */
#if !defined (umul_ppmm) && defined (__umulsidi3)
#define umul_ppmm(ph, pl, m0, m1) \
{ \
hppa. */
#define mpn_umul_ppmm __MPN(umul_ppmm)
-extern UWtype mpn_umul_ppmm _PROTO ((UWtype *, UWtype, UWtype));
+extern UWtype mpn_umul_ppmm (UWtype *, UWtype, UWtype);
#if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm \
&& ! defined (LONGLONG_STANDALONE)
#endif
#define mpn_umul_ppmm_r __MPN(umul_ppmm_r)
-extern UWtype mpn_umul_ppmm_r _PROTO ((UWtype, UWtype, UWtype *));
+extern UWtype mpn_umul_ppmm_r (UWtype, UWtype, UWtype *);
#if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm_r \
&& ! defined (LONGLONG_STANDALONE)
#endif
#define mpn_udiv_qrnnd __MPN(udiv_qrnnd)
-extern UWtype mpn_udiv_qrnnd _PROTO ((UWtype *, UWtype, UWtype, UWtype));
+extern UWtype mpn_udiv_qrnnd (UWtype *, UWtype, UWtype, UWtype);
#if ! defined (udiv_qrnnd) && HAVE_NATIVE_mpn_udiv_qrnnd \
&& ! defined (LONGLONG_STANDALONE)
#endif
#define mpn_udiv_qrnnd_r __MPN(udiv_qrnnd_r)
-extern UWtype mpn_udiv_qrnnd_r _PROTO ((UWtype, UWtype, UWtype, UWtype *));
+extern UWtype mpn_udiv_qrnnd_r (UWtype, UWtype, UWtype, UWtype *);
#if ! defined (udiv_qrnnd) && HAVE_NATIVE_mpn_udiv_qrnnd_r \
&& ! defined (LONGLONG_STANDALONE)
/* This version gives a well-defined value for zero. */
#define COUNT_LEADING_ZEROS_0 (W_TYPE_SIZE - 1)
#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
+#define COUNT_LEADING_ZEROS_SLOW
#endif
/* clz_tab needed by mpn/x86/pentium/mod_1.asm in a fat binary */
#endif
#ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
-extern const unsigned char __GMP_DECLSPEC __clz_tab[128];
+extern const unsigned char __GMP_DECLSPEC __clz_tab[129];
#endif
#if !defined (count_trailing_zeros)
-/* Define count_trailing_zeros using count_leading_zeros. The latter might be
- defined in asm, but if it is not, the C version above is good enough. */
-#define count_trailing_zeros(count, x) \
+#if !defined (COUNT_LEADING_ZEROS_SLOW)
+/* Define count_trailing_zeros using an asm count_leading_zeros. */
+#define count_trailing_zeros(count, x) \
do { \
UWtype __ctz_x = (x); \
UWtype __ctz_c; \
count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \
(count) = W_TYPE_SIZE - 1 - __ctz_c; \
} while (0)
+#else
+/* Define count_trailing_zeros in plain C, assuming small counts are common.
+ We use clz_tab without ado, since the C count_leading_zeros above will have
+ pulled it in. */
+#define count_trailing_zeros(count, x) \
+ do { \
+ UWtype __ctz_x = (x); \
+ int __ctz_c; \
+ \
+ if (LIKELY ((__ctz_x & 0xff) != 0)) \
+ (count) = __clz_tab[__ctz_x & -__ctz_x] - 2; \
+ else \
+ { \
+ for (__ctz_c = 8 - 2; __ctz_c < W_TYPE_SIZE - 2; __ctz_c += 8) \
+ { \
+ __ctz_x >>= 8; \
+ if (LIKELY ((__ctz_x & 0xff) != 0)) \
+ break; \
+ } \
+ \
+ (count) = __ctz_c + __clz_tab[__ctz_x & -__ctz_x]; \
+ } \
+ } while (0)
+#endif
#endif
#ifndef UDIV_NEEDS_NORMALIZATION
/* Memory allocation routines.
-Copyright 1991, 1993, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2012 Free Software Foundation,
+Inc.
This file is part of the GNU MP Library.
#include "gmp-impl.h"
-void * (*__gmp_allocate_func) __GMP_PROTO ((size_t)) = __gmp_default_allocate;
-void * (*__gmp_reallocate_func) __GMP_PROTO ((void *, size_t, size_t))
- = __gmp_default_reallocate;
-void (*__gmp_free_func) __GMP_PROTO ((void *, size_t)) = __gmp_default_free;
+void * (*__gmp_allocate_func) (size_t) = __gmp_default_allocate;
+void * (*__gmp_reallocate_func) (void *, size_t, size_t) = __gmp_default_reallocate;
+void (*__gmp_free_func) (void *, size_t) = __gmp_default_free;
/* Default allocation functions. In case of failure to allocate/reallocate
--- /dev/null
+Copyright 2011, 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+
+This is "mini-gmp", a small implementation of a subset of GMP's mpn
+and mpz interfaces.
+
+It is intended for applications which need arithmetic on numbers
+larger than a machine word, but which don't need to handle very large
+numbers very efficiently. Those applications can include a copy of
+mini-gmp to get a GMP-compatible interface with small footprint. One
+can also arrange for optional linking with the real GMP library, using
+mini-gmp as a fallback when for some reason GMP is not available, or
+not desired as a dependency.
+
+The supported GMP subset is declared in mini-gmp.h. The implemented
+functions are fully compatible with the corresponding GMP functions,
+as specified in the GMP manual, with a few exceptions:
+
+ mpz_set_str, mpz_init_set_str, mpz_get_str, mpz_out_str and
+ mpz_sizeinbase support only |base| <= 36;
+ mpz_export and mpz_import support only NAILS = 0.
+
+ The REALLOC_FUNC and FREE_FUNC registered with
+ mp_set_memory_functions does not get the correct size of the
+ allocated block in the corresponding argument. mini-gmp always
+ passes zero for these rarely used arguments.
+
+The implementation is a single file, mini-gmp.c.
+
+The performance target for mini-gmp is to be at most 10 times slower
+than the real GMP library, for numbers of size up to a few hundred
+bits. No asymptotically fast algorithms are included in mini-gmp, so
+it will be many orders of magnitude slower than GMP for very large
+numbers.
+
+You should never "install" mini-gmp. Applications can either just
+#include mini-gmp.c (but then, beware that it defines several macros
+and functions outside of the advertised interface). Or compile
+mini-gmp.c as a separate compilation unit, and use the declarations in
+mini-gmp.h.
+
+The tests subdirectory contains a testsuite. To use it, you need GMP
+and GNU make. Just run make check in the tests directory. If the
+hard-coded compiler settings are not right, you have to either edit the
+Makefile or pass overriding values on the make command line (e.g.,
+make CC=cc check). Testing is not (yet) as thorough as for the real
+GMP.
+
+The current version was put together by Niels Möller
+<nisse@lysator.liu.se>, with a fair amount of copy-and-paste from the
+GMP sources.
--- /dev/null
+/* mini-gmp, a minimalistic implementation of a GNU GMP subset.
+
+ Contributed to the GNU project by Niels Möller
+
+Copyright 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001,
+2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013
+Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+/* NOTE: All functions in this file which are not declared in
+ mini-gmp.h are internal, and are not intended to be compatible
+ neither with GMP nor with future versions of mini-gmp. */
+
+/* Much of the material copied from GMP files, including: gmp-impl.h,
+ longlong.h, mpn/generic/add_n.c, mpn/generic/addmul_1.c,
+ mpn/generic/lshift.c, mpn/generic/mul_1.c,
+ mpn/generic/mul_basecase.c, mpn/generic/rshift.c,
+ mpn/generic/sbpi1_div_qr.c, mpn/generic/sub_n.c,
+ mpn/generic/submul_1.c. */
+
+#include <assert.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mini-gmp.h"
+
+\f
+/* Macros */
+#define GMP_LIMB_BITS (sizeof(mp_limb_t) * CHAR_BIT)
+
+#define GMP_LIMB_MAX (~ (mp_limb_t) 0)
+#define GMP_LIMB_HIGHBIT ((mp_limb_t) 1 << (GMP_LIMB_BITS - 1))
+
+#define GMP_HLIMB_BIT ((mp_limb_t) 1 << (GMP_LIMB_BITS / 2))
+#define GMP_LLIMB_MASK (GMP_HLIMB_BIT - 1)
+
+#define GMP_ULONG_BITS (sizeof(unsigned long) * CHAR_BIT)
+#define GMP_ULONG_HIGHBIT ((unsigned long) 1 << (GMP_ULONG_BITS - 1))
+
+#define GMP_ABS(x) ((x) >= 0 ? (x) : -(x))
+#define GMP_NEG_CAST(T,x) (-((T)((x) + 1) - 1))
+
+#define GMP_MIN(a, b) ((a) < (b) ? (a) : (b))
+#define GMP_MAX(a, b) ((a) > (b) ? (a) : (b))
+
+#define gmp_assert_nocarry(x) do { \
+ mp_limb_t __cy = x; \
+ assert (__cy == 0); \
+ } while (0)
+
+#define gmp_clz(count, x) do { \
+ mp_limb_t __clz_x = (x); \
+ unsigned __clz_c; \
+ for (__clz_c = 0; \
+ (__clz_x & ((mp_limb_t) 0xff << (GMP_LIMB_BITS - 8))) == 0; \
+ __clz_c += 8) \
+ __clz_x <<= 8; \
+ for (; (__clz_x & GMP_LIMB_HIGHBIT) == 0; __clz_c++) \
+ __clz_x <<= 1; \
+ (count) = __clz_c; \
+ } while (0)
+
+#define gmp_ctz(count, x) do { \
+ mp_limb_t __ctz_x = (x); \
+ unsigned __ctz_c = 0; \
+ gmp_clz (__ctz_c, __ctz_x & - __ctz_x); \
+ (count) = GMP_LIMB_BITS - 1 - __ctz_c; \
+ } while (0)
+
+#define gmp_add_ssaaaa(sh, sl, ah, al, bh, bl) \
+ do { \
+ mp_limb_t __x; \
+ __x = (al) + (bl); \
+ (sh) = (ah) + (bh) + (__x < (al)); \
+ (sl) = __x; \
+ } while (0)
+
+#define gmp_sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ do { \
+ mp_limb_t __x; \
+ __x = (al) - (bl); \
+ (sh) = (ah) - (bh) - ((al) < (bl)); \
+ (sl) = __x; \
+ } while (0)
+
+#define gmp_umul_ppmm(w1, w0, u, v) \
+ do { \
+ mp_limb_t __x0, __x1, __x2, __x3; \
+ unsigned __ul, __vl, __uh, __vh; \
+ mp_limb_t __u = (u), __v = (v); \
+ \
+ __ul = __u & GMP_LLIMB_MASK; \
+ __uh = __u >> (GMP_LIMB_BITS / 2); \
+ __vl = __v & GMP_LLIMB_MASK; \
+ __vh = __v >> (GMP_LIMB_BITS / 2); \
+ \
+ __x0 = (mp_limb_t) __ul * __vl; \
+ __x1 = (mp_limb_t) __ul * __vh; \
+ __x2 = (mp_limb_t) __uh * __vl; \
+ __x3 = (mp_limb_t) __uh * __vh; \
+ \
+ __x1 += __x0 >> (GMP_LIMB_BITS / 2);/* this can't give carry */ \
+ __x1 += __x2; /* but this indeed can */ \
+ if (__x1 < __x2) /* did we get it? */ \
+ __x3 += GMP_HLIMB_BIT; /* yes, add it in the proper pos. */ \
+ \
+ (w1) = __x3 + (__x1 >> (GMP_LIMB_BITS / 2)); \
+ (w0) = (__x1 << (GMP_LIMB_BITS / 2)) + (__x0 & GMP_LLIMB_MASK); \
+ } while (0)
+
+#define gmp_udiv_qrnnd_preinv(q, r, nh, nl, d, di) \
+ do { \
+ mp_limb_t _qh, _ql, _r, _mask; \
+ gmp_umul_ppmm (_qh, _ql, (nh), (di)); \
+ gmp_add_ssaaaa (_qh, _ql, _qh, _ql, (nh) + 1, (nl)); \
+ _r = (nl) - _qh * (d); \
+ _mask = -(mp_limb_t) (_r > _ql); /* both > and >= are OK */ \
+ _qh += _mask; \
+ _r += _mask & (d); \
+ if (_r >= (d)) \
+ { \
+ _r -= (d); \
+ _qh++; \
+ } \
+ \
+ (r) = _r; \
+ (q) = _qh; \
+ } while (0)
+
+#define gmp_udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv) \
+ do { \
+ mp_limb_t _q0, _t1, _t0, _mask; \
+ gmp_umul_ppmm ((q), _q0, (n2), (dinv)); \
+ gmp_add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1)); \
+ \
+ /* Compute the two most significant limbs of n - q'd */ \
+ (r1) = (n1) - (d1) * (q); \
+ gmp_sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0)); \
+ gmp_umul_ppmm (_t1, _t0, (d0), (q)); \
+ gmp_sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0); \
+ (q)++; \
+ \
+ /* Conditionally adjust q and the remainders */ \
+ _mask = - (mp_limb_t) ((r1) >= _q0); \
+ (q) += _mask; \
+ gmp_add_ssaaaa ((r1), (r0), (r1), (r0), _mask & (d1), _mask & (d0)); \
+ if ((r1) >= (d1)) \
+ { \
+ if ((r1) > (d1) || (r0) >= (d0)) \
+ { \
+ (q)++; \
+ gmp_sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \
+ } \
+ } \
+ } while (0)
+
+/* Swap macros. */
+#define MP_LIMB_T_SWAP(x, y) \
+ do { \
+ mp_limb_t __mp_limb_t_swap__tmp = (x); \
+ (x) = (y); \
+ (y) = __mp_limb_t_swap__tmp; \
+ } while (0)
+#define MP_SIZE_T_SWAP(x, y) \
+ do { \
+ mp_size_t __mp_size_t_swap__tmp = (x); \
+ (x) = (y); \
+ (y) = __mp_size_t_swap__tmp; \
+ } while (0)
+#define MP_BITCNT_T_SWAP(x,y) \
+ do { \
+ mp_bitcnt_t __mp_bitcnt_t_swap__tmp = (x); \
+ (x) = (y); \
+ (y) = __mp_bitcnt_t_swap__tmp; \
+ } while (0)
+#define MP_PTR_SWAP(x, y) \
+ do { \
+ mp_ptr __mp_ptr_swap__tmp = (x); \
+ (x) = (y); \
+ (y) = __mp_ptr_swap__tmp; \
+ } while (0)
+#define MP_SRCPTR_SWAP(x, y) \
+ do { \
+ mp_srcptr __mp_srcptr_swap__tmp = (x); \
+ (x) = (y); \
+ (y) = __mp_srcptr_swap__tmp; \
+ } while (0)
+
+#define MPN_PTR_SWAP(xp,xs, yp,ys) \
+ do { \
+ MP_PTR_SWAP (xp, yp); \
+ MP_SIZE_T_SWAP (xs, ys); \
+ } while(0)
+#define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \
+ do { \
+ MP_SRCPTR_SWAP (xp, yp); \
+ MP_SIZE_T_SWAP (xs, ys); \
+ } while(0)
+
+#define MPZ_PTR_SWAP(x, y) \
+ do { \
+ mpz_ptr __mpz_ptr_swap__tmp = (x); \
+ (x) = (y); \
+ (y) = __mpz_ptr_swap__tmp; \
+ } while (0)
+#define MPZ_SRCPTR_SWAP(x, y) \
+ do { \
+ mpz_srcptr __mpz_srcptr_swap__tmp = (x); \
+ (x) = (y); \
+ (y) = __mpz_srcptr_swap__tmp; \
+ } while (0)
+
+\f
+/* Memory allocation and other helper functions. */
+static void
+gmp_die (const char *msg)
+{
+ fprintf (stderr, "%s\n", msg);
+ abort();
+}
+
+static void *
+gmp_default_alloc (size_t size)
+{
+ void *p;
+
+ assert (size > 0);
+
+ p = malloc (size);
+ if (!p)
+ gmp_die("gmp_default_alloc: Virtual memory exhausted.");
+
+ return p;
+}
+
+static void *
+gmp_default_realloc (void *old, size_t old_size, size_t new_size)
+{
+ mp_ptr p;
+
+ p = realloc (old, new_size);
+
+ if (!p)
+ gmp_die("gmp_default_realoc: Virtual memory exhausted.");
+
+ return p;
+}
+
+static void
+gmp_default_free (void *p, size_t size)
+{
+ free (p);
+}
+
+static void * (*gmp_allocate_func) (size_t) = gmp_default_alloc;
+static void * (*gmp_reallocate_func) (void *, size_t, size_t) = gmp_default_realloc;
+static void (*gmp_free_func) (void *, size_t) = gmp_default_free;
+
+void
+mp_get_memory_functions (void *(**alloc_func) (size_t),
+ void *(**realloc_func) (void *, size_t, size_t),
+ void (**free_func) (void *, size_t))
+{
+ if (alloc_func)
+ *alloc_func = gmp_allocate_func;
+
+ if (realloc_func)
+ *realloc_func = gmp_reallocate_func;
+
+ if (free_func)
+ *free_func = gmp_free_func;
+}
+
+void
+mp_set_memory_functions (void *(*alloc_func) (size_t),
+ void *(*realloc_func) (void *, size_t, size_t),
+ void (*free_func) (void *, size_t))
+{
+ if (!alloc_func)
+ alloc_func = gmp_default_alloc;
+ if (!realloc_func)
+ realloc_func = gmp_default_realloc;
+ if (!free_func)
+ free_func = gmp_default_free;
+
+ gmp_allocate_func = alloc_func;
+ gmp_reallocate_func = realloc_func;
+ gmp_free_func = free_func;
+}
+
+#define gmp_xalloc(size) ((*gmp_allocate_func)((size)))
+#define gmp_free(p) ((*gmp_free_func) ((p), 0))
+
+static mp_ptr
+gmp_xalloc_limbs (mp_size_t size)
+{
+ return gmp_xalloc (size * sizeof (mp_limb_t));
+}
+
+static mp_ptr
+gmp_xrealloc_limbs (mp_ptr old, mp_size_t size)
+{
+ assert (size > 0);
+ return (*gmp_reallocate_func) (old, 0, size * sizeof (mp_limb_t));
+}
+
+\f
+/* MPN interface */
+
+void
+mpn_copyi (mp_ptr d, mp_srcptr s, mp_size_t n)
+{
+ mp_size_t i;
+ for (i = 0; i < n; i++)
+ d[i] = s[i];
+}
+
+void
+mpn_copyd (mp_ptr d, mp_srcptr s, mp_size_t n)
+{
+ while (n-- > 0)
+ d[n] = s[n];
+}
+
+int
+mpn_cmp (mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+ for (; n > 0; n--)
+ {
+ if (ap[n-1] < bp[n-1])
+ return -1;
+ else if (ap[n-1] > bp[n-1])
+ return 1;
+ }
+ return 0;
+}
+
+static int
+mpn_cmp4 (mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
+{
+ if (an > bn)
+ return 1;
+ else if (an < bn)
+ return -1;
+ else
+ return mpn_cmp (ap, bp, an);
+}
+
+static mp_size_t
+mpn_normalized_size (mp_srcptr xp, mp_size_t n)
+{
+ for (; n > 0 && xp[n-1] == 0; n--)
+ ;
+ return n;
+}
+
+#define mpn_zero_p(xp, n) (mpn_normalized_size ((xp), (n)) == 0)
+
+mp_limb_t
+mpn_add_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b)
+{
+ mp_size_t i;
+
+ assert (n > 0);
+
+ for (i = 0; i < n; i++)
+ {
+ mp_limb_t r = ap[i] + b;
+ /* Carry out */
+ b = (r < b);
+ rp[i] = r;
+ }
+ return b;
+}
+
+mp_limb_t
+mpn_add_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+ mp_size_t i;
+ mp_limb_t cy;
+
+ for (i = 0, cy = 0; i < n; i++)
+ {
+ mp_limb_t a, b, r;
+ a = ap[i]; b = bp[i];
+ r = a + cy;
+ cy = (r < cy);
+ r += b;
+ cy += (r < b);
+ rp[i] = r;
+ }
+ return cy;
+}
+
+mp_limb_t
+mpn_add (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
+{
+ mp_limb_t cy;
+
+ assert (an >= bn);
+
+ cy = mpn_add_n (rp, ap, bp, bn);
+ if (an > bn)
+ cy = mpn_add_1 (rp + bn, ap + bn, an - bn, cy);
+ return cy;
+}
+
+mp_limb_t
+mpn_sub_1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t b)
+{
+ mp_size_t i;
+
+ assert (n > 0);
+
+ for (i = 0; i < n; i++)
+ {
+ mp_limb_t a = ap[i];
+ /* Carry out */
+ mp_limb_t cy = a < b;;
+ rp[i] = a - b;
+ b = cy;
+ }
+ return b;
+}
+
+mp_limb_t
+mpn_sub_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+ mp_size_t i;
+ mp_limb_t cy;
+
+ for (i = 0, cy = 0; i < n; i++)
+ {
+ mp_limb_t a, b;
+ a = ap[i]; b = bp[i];
+ b += cy;
+ cy = (b < cy);
+ cy += (a < b);
+ rp[i] = a - b;
+ }
+ return cy;
+}
+
+mp_limb_t
+mpn_sub (mp_ptr rp, mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
+{
+ mp_limb_t cy;
+
+ assert (an >= bn);
+
+ cy = mpn_sub_n (rp, ap, bp, bn);
+ if (an > bn)
+ cy = mpn_sub_1 (rp + bn, ap + bn, an - bn, cy);
+ return cy;
+}
+
+mp_limb_t
+mpn_mul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+ mp_limb_t ul, cl, hpl, lpl;
+
+ assert (n >= 1);
+
+ cl = 0;
+ do
+ {
+ ul = *up++;
+ gmp_umul_ppmm (hpl, lpl, ul, vl);
+
+ lpl += cl;
+ cl = (lpl < cl) + hpl;
+
+ *rp++ = lpl;
+ }
+ while (--n != 0);
+
+ return cl;
+}
+
+mp_limb_t
+mpn_addmul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+ mp_limb_t ul, cl, hpl, lpl, rl;
+
+ assert (n >= 1);
+
+ cl = 0;
+ do
+ {
+ ul = *up++;
+ gmp_umul_ppmm (hpl, lpl, ul, vl);
+
+ lpl += cl;
+ cl = (lpl < cl) + hpl;
+
+ rl = *rp;
+ lpl = rl + lpl;
+ cl += lpl < rl;
+ *rp++ = lpl;
+ }
+ while (--n != 0);
+
+ return cl;
+}
+
+mp_limb_t
+mpn_submul_1 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_limb_t vl)
+{
+ mp_limb_t ul, cl, hpl, lpl, rl;
+
+ assert (n >= 1);
+
+ cl = 0;
+ do
+ {
+ ul = *up++;
+ gmp_umul_ppmm (hpl, lpl, ul, vl);
+
+ lpl += cl;
+ cl = (lpl < cl) + hpl;
+
+ rl = *rp;
+ lpl = rl - lpl;
+ cl += lpl > rl;
+ *rp++ = lpl;
+ }
+ while (--n != 0);
+
+ return cl;
+}
+
+mp_limb_t
+mpn_mul (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr vp, mp_size_t vn)
+{
+ assert (un >= vn);
+ assert (vn >= 1);
+
+ /* We first multiply by the low order limb. This result can be
+ stored, not added, to rp. We also avoid a loop for zeroing this
+ way. */
+
+ rp[un] = mpn_mul_1 (rp, up, un, vp[0]);
+ rp += 1, vp += 1, vn -= 1;
+
+ /* Now accumulate the product of up[] and the next higher limb from
+ vp[]. */
+
+ while (vn >= 1)
+ {
+ rp[un] = mpn_addmul_1 (rp, up, un, vp[0]);
+ rp += 1, vp += 1, vn -= 1;
+ }
+ return rp[un - 1];
+}
+
+void
+mpn_mul_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+ mpn_mul (rp, ap, n, bp, n);
+}
+
+void
+mpn_sqr (mp_ptr rp, mp_srcptr ap, mp_size_t n)
+{
+ mpn_mul (rp, ap, n, ap, n);
+}
+
+mp_limb_t
+mpn_lshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+ mp_limb_t high_limb, low_limb;
+ unsigned int tnc;
+ mp_size_t i;
+ mp_limb_t retval;
+
+ assert (n >= 1);
+ assert (cnt >= 1);
+ assert (cnt < GMP_LIMB_BITS);
+
+ up += n;
+ rp += n;
+
+ tnc = GMP_LIMB_BITS - cnt;
+ low_limb = *--up;
+ retval = low_limb >> tnc;
+ high_limb = (low_limb << cnt);
+
+ for (i = n - 1; i != 0; i--)
+ {
+ low_limb = *--up;
+ *--rp = high_limb | (low_limb >> tnc);
+ high_limb = (low_limb << cnt);
+ }
+ *--rp = high_limb;
+
+ return retval;
+}
+
+mp_limb_t
+mpn_rshift (mp_ptr rp, mp_srcptr up, mp_size_t n, unsigned int cnt)
+{
+ mp_limb_t high_limb, low_limb;
+ unsigned int tnc;
+ mp_size_t i;
+ mp_limb_t retval;
+
+ assert (n >= 1);
+ assert (cnt >= 1);
+ assert (cnt < GMP_LIMB_BITS);
+
+ tnc = GMP_LIMB_BITS - cnt;
+ high_limb = *up++;
+ retval = (high_limb << tnc);
+ low_limb = high_limb >> cnt;
+
+ for (i = n - 1; i != 0; i--)
+ {
+ high_limb = *up++;
+ *rp++ = low_limb | (high_limb << tnc);
+ low_limb = high_limb >> cnt;
+ }
+ *rp = low_limb;
+
+ return retval;
+}
+
+\f
+/* MPN division interface. */
+mp_limb_t
+mpn_invert_3by2 (mp_limb_t u1, mp_limb_t u0)
+{
+ mp_limb_t r, p, m;
+ unsigned ul, uh;
+ unsigned ql, qh;
+
+ /* First, do a 2/1 inverse. */
+ /* The inverse m is defined as floor( (B^2 - 1 - u1)/u1 ), so that 0 <
+ * B^2 - (B + m) u1 <= u1 */
+ assert (u1 >= GMP_LIMB_HIGHBIT);
+
+ ul = u1 & GMP_LLIMB_MASK;
+ uh = u1 >> (GMP_LIMB_BITS / 2);
+
+ qh = ~u1 / uh;
+ r = ((~u1 - (mp_limb_t) qh * uh) << (GMP_LIMB_BITS / 2)) | GMP_LLIMB_MASK;
+
+ p = (mp_limb_t) qh * ul;
+ /* Adjustment steps taken from udiv_qrnnd_c */
+ if (r < p)
+ {
+ qh--;
+ r += u1;
+ if (r >= u1) /* i.e. we didn't get carry when adding to r */
+ if (r < p)
+ {
+ qh--;
+ r += u1;
+ }
+ }
+ r -= p;
+
+ /* Do a 3/2 division (with half limb size) */
+ p = (r >> (GMP_LIMB_BITS / 2)) * qh + r;
+ ql = (p >> (GMP_LIMB_BITS / 2)) + 1;
+
+ /* By the 3/2 method, we don't need the high half limb. */
+ r = (r << (GMP_LIMB_BITS / 2)) + GMP_LLIMB_MASK - ql * u1;
+
+ if (r >= (p << (GMP_LIMB_BITS / 2)))
+ {
+ ql--;
+ r += u1;
+ }
+ m = ((mp_limb_t) qh << (GMP_LIMB_BITS / 2)) + ql;
+ if (r >= u1)
+ {
+ m++;
+ r -= u1;
+ }
+
+ if (u0 > 0)
+ {
+ mp_limb_t th, tl;
+ r = ~r;
+ r += u0;
+ if (r < u0)
+ {
+ m--;
+ if (r >= u1)
+ {
+ m--;
+ r -= u1;
+ }
+ r -= u1;
+ }
+ gmp_umul_ppmm (th, tl, u0, m);
+ r += th;
+ if (r < th)
+ {
+ m--;
+ if (r > u1 || (r == u1 && tl > u0))
+ m--;
+ }
+ }
+
+ return m;
+}
+
+struct gmp_div_inverse
+{
+ /* Normalization shift count. */
+ unsigned shift;
+ /* Normalized divisor (d0 unused for mpn_div_qr_1) */
+ mp_limb_t d1, d0;
+ /* Inverse, for 2/1 or 3/2. */
+ mp_limb_t di;
+};
+
+static void
+mpn_div_qr_1_invert (struct gmp_div_inverse *inv, mp_limb_t d)
+{
+ unsigned shift;
+
+ assert (d > 0);
+ gmp_clz (shift, d);
+ inv->shift = shift;
+ inv->d1 = d << shift;
+ inv->di = mpn_invert_limb (inv->d1);
+}
+
+static void
+mpn_div_qr_2_invert (struct gmp_div_inverse *inv,
+ mp_limb_t d1, mp_limb_t d0)
+{
+ unsigned shift;
+
+ assert (d1 > 0);
+ gmp_clz (shift, d1);
+ inv->shift = shift;
+ if (shift > 0)
+ {
+ d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift));
+ d0 <<= shift;
+ }
+ inv->d1 = d1;
+ inv->d0 = d0;
+ inv->di = mpn_invert_3by2 (d1, d0);
+}
+
+static void
+mpn_div_qr_invert (struct gmp_div_inverse *inv,
+ mp_srcptr dp, mp_size_t dn)
+{
+ assert (dn > 0);
+
+ if (dn == 1)
+ mpn_div_qr_1_invert (inv, dp[0]);
+ else if (dn == 2)
+ mpn_div_qr_2_invert (inv, dp[1], dp[0]);
+ else
+ {
+ unsigned shift;
+ mp_limb_t d1, d0;
+
+ d1 = dp[dn-1];
+ d0 = dp[dn-2];
+ assert (d1 > 0);
+ gmp_clz (shift, d1);
+ inv->shift = shift;
+ if (shift > 0)
+ {
+ d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift));
+ d0 = (d0 << shift) | (dp[dn-3] >> (GMP_LIMB_BITS - shift));
+ }
+ inv->d1 = d1;
+ inv->d0 = d0;
+ inv->di = mpn_invert_3by2 (d1, d0);
+ }
+}
+
+/* Not matching current public gmp interface, rather corresponding to
+ the sbpi1_div_* functions. */
+static mp_limb_t
+mpn_div_qr_1_preinv (mp_ptr qp, mp_srcptr np, mp_size_t nn,
+ const struct gmp_div_inverse *inv)
+{
+ mp_limb_t d, di;
+ mp_limb_t r;
+ mp_ptr tp = NULL;
+
+ if (inv->shift > 0)
+ {
+ tp = gmp_xalloc_limbs (nn);
+ r = mpn_lshift (tp, np, nn, inv->shift);
+ np = tp;
+ }
+ else
+ r = 0;
+
+ d = inv->d1;
+ di = inv->di;
+ while (nn-- > 0)
+ {
+ mp_limb_t q;
+
+ gmp_udiv_qrnnd_preinv (q, r, r, np[nn], d, di);
+ if (qp)
+ qp[nn] = q;
+ }
+ if (inv->shift > 0)
+ gmp_free (tp);
+
+ return r >> inv->shift;
+}
+
+static mp_limb_t
+mpn_div_qr_1 (mp_ptr qp, mp_srcptr np, mp_size_t nn, mp_limb_t d)
+{
+ assert (d > 0);
+
+ /* Special case for powers of two. */
+ if (d > 1 && (d & (d-1)) == 0)
+ {
+ unsigned shift;
+ mp_limb_t r = np[0] & (d-1);
+ gmp_ctz (shift, d);
+ if (qp)
+ mpn_rshift (qp, np, nn, shift);
+
+ return r;
+ }
+ else
+ {
+ struct gmp_div_inverse inv;
+ mpn_div_qr_1_invert (&inv, d);
+ return mpn_div_qr_1_preinv (qp, np, nn, &inv);
+ }
+}
+
+static void
+mpn_div_qr_2_preinv (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+ const struct gmp_div_inverse *inv)
+{
+ unsigned shift;
+ mp_size_t i;
+ mp_limb_t d1, d0, di, r1, r0;
+ mp_ptr tp;
+
+ assert (nn >= 2);
+ shift = inv->shift;
+ d1 = inv->d1;
+ d0 = inv->d0;
+ di = inv->di;
+
+ if (shift > 0)
+ {
+ tp = gmp_xalloc_limbs (nn);
+ r1 = mpn_lshift (tp, np, nn, shift);
+ np = tp;
+ }
+ else
+ r1 = 0;
+
+ r0 = np[nn - 1];
+
+ for (i = nn - 2; i >= 0; i--)
+ {
+ mp_limb_t n0, q;
+ n0 = np[i];
+ gmp_udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di);
+
+ if (qp)
+ qp[i] = q;
+ }
+
+ if (shift > 0)
+ {
+ assert ((r0 << (GMP_LIMB_BITS - shift)) == 0);
+ r0 = (r0 >> shift) | (r1 << (GMP_LIMB_BITS - shift));
+ r1 >>= shift;
+
+ gmp_free (tp);
+ }
+
+ rp[1] = r1;
+ rp[0] = r0;
+}
+
+#if 0
+static void
+mpn_div_qr_2 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+ mp_limb_t d1, mp_limb_t d0)
+{
+ struct gmp_div_inverse inv;
+ assert (nn >= 2);
+
+ mpn_div_qr_2_invert (&inv, d1, d0);
+ mpn_div_qr_2_preinv (qp, rp, np, nn, &inv);
+}
+#endif
+
+static void
+mpn_div_qr_pi1 (mp_ptr qp,
+ mp_ptr np, mp_size_t nn, mp_limb_t n1,
+ mp_srcptr dp, mp_size_t dn,
+ mp_limb_t dinv)
+{
+ mp_size_t i;
+
+ mp_limb_t d1, d0;
+ mp_limb_t cy, cy1;
+ mp_limb_t q;
+
+ assert (dn > 2);
+ assert (nn >= dn);
+
+ d1 = dp[dn - 1];
+ d0 = dp[dn - 2];
+
+ assert ((d1 & GMP_LIMB_HIGHBIT) != 0);
+ /* Iteration variable is the index of the q limb.
+ *
+ * We divide <n1, np[dn-1+i], np[dn-2+i], np[dn-3+i],..., np[i]>
+ * by <d1, d0, dp[dn-3], ..., dp[0] >
+ */
+
+ for (i = nn - dn; i >= 0; i--)
+ {
+ mp_limb_t n0 = np[dn-1+i];
+
+ if (n1 == d1 && n0 == d0)
+ {
+ q = GMP_LIMB_MAX;
+ mpn_submul_1 (np+i, dp, dn, q);
+ n1 = np[dn-1+i]; /* update n1, last loop's value will now be invalid */
+ }
+ else
+ {
+ gmp_udiv_qr_3by2 (q, n1, n0, n1, n0, np[dn-2+i], d1, d0, dinv);
+
+ cy = mpn_submul_1 (np + i, dp, dn-2, q);
+
+ cy1 = n0 < cy;
+ n0 = n0 - cy;
+ cy = n1 < cy1;
+ n1 = n1 - cy1;
+ np[dn-2+i] = n0;
+
+ if (cy != 0)
+ {
+ n1 += d1 + mpn_add_n (np + i, np + i, dp, dn - 1);
+ q--;
+ }
+ }
+
+ if (qp)
+ qp[i] = q;
+ }
+
+ np[dn - 1] = n1;
+}
+
+static void
+mpn_div_qr_preinv (mp_ptr qp, mp_ptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn,
+ const struct gmp_div_inverse *inv)
+{
+ assert (dn > 0);
+ assert (nn >= dn);
+
+ if (dn == 1)
+ np[0] = mpn_div_qr_1_preinv (qp, np, nn, inv);
+ else if (dn == 2)
+ mpn_div_qr_2_preinv (qp, np, np, nn, inv);
+ else
+ {
+ mp_limb_t nh;
+ unsigned shift;
+
+ assert (inv->d1 == dp[dn-1]);
+ assert (inv->d0 == dp[dn-2]);
+ assert ((inv->d1 & GMP_LIMB_HIGHBIT) != 0);
+
+ shift = inv->shift;
+ if (shift > 0)
+ nh = mpn_lshift (np, np, nn, shift);
+ else
+ nh = 0;
+
+ mpn_div_qr_pi1 (qp, np, nn, nh, dp, dn, inv->di);
+
+ if (shift > 0)
+ gmp_assert_nocarry (mpn_rshift (np, np, dn, shift));
+ }
+}
+
+static void
+mpn_div_qr (mp_ptr qp, mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
+{
+ struct gmp_div_inverse inv;
+ mp_ptr tp = NULL;
+
+ assert (dn > 0);
+ assert (nn >= dn);
+
+ mpn_div_qr_invert (&inv, dp, dn);
+ if (dn > 2 && inv.shift > 0)
+ {
+ tp = gmp_xalloc_limbs (dn);
+ gmp_assert_nocarry (mpn_lshift (tp, dp, dn, inv.shift));
+ dp = tp;
+ }
+ mpn_div_qr_preinv (qp, np, nn, dp, dn, &inv);
+ if (tp)
+ gmp_free (tp);
+}
+
+\f
+/* MPN base conversion. */
+static unsigned
+mpn_base_power_of_two_p (unsigned b)
+{
+ switch (b)
+ {
+ case 2: return 1;
+ case 4: return 2;
+ case 8: return 3;
+ case 16: return 4;
+ case 32: return 5;
+ case 64: return 6;
+ case 128: return 7;
+ case 256: return 8;
+ default: return 0;
+ }
+}
+
+struct mpn_base_info
+{
+ /* bb is the largest power of the base which fits in one limb, and
+ exp is the corresponding exponent. */
+ unsigned exp;
+ mp_limb_t bb;
+};
+
+static void
+mpn_get_base_info (struct mpn_base_info *info, mp_limb_t b)
+{
+ mp_limb_t m;
+ mp_limb_t p;
+ unsigned exp;
+
+ m = GMP_LIMB_MAX / b;
+ for (exp = 1, p = b; p <= m; exp++)
+ p *= b;
+
+ info->exp = exp;
+ info->bb = p;
+}
+
+static mp_bitcnt_t
+mpn_limb_size_in_base_2 (mp_limb_t u)
+{
+ unsigned shift;
+
+ assert (u > 0);
+ gmp_clz (shift, u);
+ return GMP_LIMB_BITS - shift;
+}
+
+static size_t
+mpn_get_str_bits (unsigned char *sp, unsigned bits, mp_srcptr up, mp_size_t un)
+{
+ unsigned char mask;
+ size_t sn, j;
+ mp_size_t i;
+ int shift;
+
+ sn = ((un - 1) * GMP_LIMB_BITS + mpn_limb_size_in_base_2 (up[un-1])
+ + bits - 1) / bits;
+
+ mask = (1U << bits) - 1;
+
+ for (i = 0, j = sn, shift = 0; j-- > 0;)
+ {
+ unsigned char digit = up[i] >> shift;
+
+ shift += bits;
+
+ if (shift >= GMP_LIMB_BITS && ++i < un)
+ {
+ shift -= GMP_LIMB_BITS;
+ digit |= up[i] << (bits - shift);
+ }
+ sp[j] = digit & mask;
+ }
+ return sn;
+}
+
+/* We generate digits from the least significant end, and reverse at
+ the end. */
+static size_t
+mpn_limb_get_str (unsigned char *sp, mp_limb_t w,
+ const struct gmp_div_inverse *binv)
+{
+ mp_size_t i;
+ for (i = 0; w > 0; i++)
+ {
+ mp_limb_t h, l, r;
+
+ h = w >> (GMP_LIMB_BITS - binv->shift);
+ l = w << binv->shift;
+
+ gmp_udiv_qrnnd_preinv (w, r, h, l, binv->d1, binv->di);
+ assert ( (r << (GMP_LIMB_BITS - binv->shift)) == 0);
+ r >>= binv->shift;
+
+ sp[i] = r;
+ }
+ return i;
+}
+
+static size_t
+mpn_get_str_other (unsigned char *sp,
+ int base, const struct mpn_base_info *info,
+ mp_ptr up, mp_size_t un)
+{
+ struct gmp_div_inverse binv;
+ size_t sn;
+ size_t i;
+
+ mpn_div_qr_1_invert (&binv, base);
+
+ sn = 0;
+
+ if (un > 1)
+ {
+ struct gmp_div_inverse bbinv;
+ mpn_div_qr_1_invert (&bbinv, info->bb);
+
+ do
+ {
+ mp_limb_t w;
+ size_t done;
+ w = mpn_div_qr_1_preinv (up, up, un, &bbinv);
+ un -= (up[un-1] == 0);
+ done = mpn_limb_get_str (sp + sn, w, &binv);
+
+ for (sn += done; done < info->exp; done++)
+ sp[sn++] = 0;
+ }
+ while (un > 1);
+ }
+ sn += mpn_limb_get_str (sp + sn, up[0], &binv);
+
+ /* Reverse order */
+ for (i = 0; 2*i + 1 < sn; i++)
+ {
+ unsigned char t = sp[i];
+ sp[i] = sp[sn - i - 1];
+ sp[sn - i - 1] = t;
+ }
+
+ return sn;
+}
+
+size_t
+mpn_get_str (unsigned char *sp, int base, mp_ptr up, mp_size_t un)
+{
+ unsigned bits;
+
+ assert (un > 0);
+ assert (up[un-1] > 0);
+
+ bits = mpn_base_power_of_two_p (base);
+ if (bits)
+ return mpn_get_str_bits (sp, bits, up, un);
+ else
+ {
+ struct mpn_base_info info;
+
+ mpn_get_base_info (&info, base);
+ return mpn_get_str_other (sp, base, &info, up, un);
+ }
+}
+
+static mp_size_t
+mpn_set_str_bits (mp_ptr rp, const unsigned char *sp, size_t sn,
+ unsigned bits)
+{
+ mp_size_t rn;
+ size_t j;
+ unsigned shift;
+
+ for (j = sn, rn = 0, shift = 0; j-- > 0; )
+ {
+ if (shift == 0)
+ {
+ rp[rn++] = sp[j];
+ shift += bits;
+ }
+ else
+ {
+ rp[rn-1] |= (mp_limb_t) sp[j] << shift;
+ shift += bits;
+ if (shift >= GMP_LIMB_BITS)
+ {
+ shift -= GMP_LIMB_BITS;
+ if (shift > 0)
+ rp[rn++] = (mp_limb_t) sp[j] >> (bits - shift);
+ }
+ }
+ }
+ rn = mpn_normalized_size (rp, rn);
+ return rn;
+}
+
+static mp_size_t
+mpn_set_str_other (mp_ptr rp, const unsigned char *sp, size_t sn,
+ mp_limb_t b, const struct mpn_base_info *info)
+{
+ mp_size_t rn;
+ mp_limb_t w;
+ unsigned first;
+ unsigned k;
+ size_t j;
+
+ first = 1 + (sn - 1) % info->exp;
+
+ j = 0;
+ w = sp[j++];
+ for (k = 1; k < first; k++)
+ w = w * b + sp[j++];
+
+ rp[0] = w;
+
+ for (rn = (w > 0); j < sn;)
+ {
+ mp_limb_t cy;
+
+ w = sp[j++];
+ for (k = 1; k < info->exp; k++)
+ w = w * b + sp[j++];
+
+ cy = mpn_mul_1 (rp, rp, rn, info->bb);
+ cy += mpn_add_1 (rp, rp, rn, w);
+ if (cy > 0)
+ rp[rn++] = cy;
+ }
+ assert (j == sn);
+
+ return rn;
+}
+
+mp_size_t
+mpn_set_str (mp_ptr rp, const unsigned char *sp, size_t sn, int base)
+{
+ unsigned bits;
+
+ if (sn == 0)
+ return 0;
+
+ bits = mpn_base_power_of_two_p (base);
+ if (bits)
+ return mpn_set_str_bits (rp, sp, sn, bits);
+ else
+ {
+ struct mpn_base_info info;
+
+ mpn_get_base_info (&info, base);
+ return mpn_set_str_other (rp, sp, sn, base, &info);
+ }
+}
+
+\f
+/* MPZ interface */
+void
+mpz_init (mpz_t r)
+{
+ r->_mp_alloc = 1;
+ r->_mp_size = 0;
+ r->_mp_d = gmp_xalloc_limbs (1);
+}
+
+/* The utility of this function is a bit limited, since many functions
+ assings the result variable using mpz_swap. */
+void
+mpz_init2 (mpz_t r, mp_bitcnt_t bits)
+{
+ mp_size_t rn;
+
+ bits -= (bits != 0); /* Round down, except if 0 */
+ rn = 1 + bits / GMP_LIMB_BITS;
+
+ r->_mp_alloc = rn;
+ r->_mp_size = 0;
+ r->_mp_d = gmp_xalloc_limbs (rn);
+}
+
+void
+mpz_clear (mpz_t r)
+{
+ gmp_free (r->_mp_d);
+}
+
+static void *
+mpz_realloc (mpz_t r, mp_size_t size)
+{
+ size = GMP_MAX (size, 1);
+
+ r->_mp_d = gmp_xrealloc_limbs (r->_mp_d, size);
+ r->_mp_alloc = size;
+
+ if (GMP_ABS (r->_mp_size) > size)
+ r->_mp_size = 0;
+
+ return r->_mp_d;
+}
+
+/* Realloc for an mpz_t WHAT if it has less than NEEDED limbs. */
+#define MPZ_REALLOC(z,n) ((n) > (z)->_mp_alloc \
+ ? mpz_realloc(z,n) \
+ : (z)->_mp_d)
+\f
+/* MPZ assignment and basic conversions. */
+void
+mpz_set_si (mpz_t r, signed long int x)
+{
+ if (x >= 0)
+ mpz_set_ui (r, x);
+ else /* (x < 0) */
+ {
+ r->_mp_size = -1;
+ r->_mp_d[0] = GMP_NEG_CAST (unsigned long int, x);
+ }
+}
+
+void
+mpz_set_ui (mpz_t r, unsigned long int x)
+{
+ if (x > 0)
+ {
+ r->_mp_size = 1;
+ r->_mp_d[0] = x;
+ }
+ else
+ r->_mp_size = 0;
+}
+
+void
+mpz_set (mpz_t r, const mpz_t x)
+{
+ /* Allow the NOP r == x */
+ if (r != x)
+ {
+ mp_size_t n;
+ mp_ptr rp;
+
+ n = GMP_ABS (x->_mp_size);
+ rp = MPZ_REALLOC (r, n);
+
+ mpn_copyi (rp, x->_mp_d, n);
+ r->_mp_size = x->_mp_size;
+ }
+}
+
+void
+mpz_init_set_si (mpz_t r, signed long int x)
+{
+ mpz_init (r);
+ mpz_set_si (r, x);
+}
+
+void
+mpz_init_set_ui (mpz_t r, unsigned long int x)
+{
+ mpz_init (r);
+ mpz_set_ui (r, x);
+}
+
+void
+mpz_init_set (mpz_t r, const mpz_t x)
+{
+ mpz_init (r);
+ mpz_set (r, x);
+}
+
+int
+mpz_fits_slong_p (const mpz_t u)
+{
+ mp_size_t us = u->_mp_size;
+
+ if (us == 0)
+ return 1;
+ else if (us == 1)
+ return u->_mp_d[0] < GMP_LIMB_HIGHBIT;
+ else if (us == -1)
+ return u->_mp_d[0] <= GMP_LIMB_HIGHBIT;
+ else
+ return 0;
+}
+
+int
+mpz_fits_ulong_p (const mpz_t u)
+{
+ mp_size_t us = u->_mp_size;
+
+ return us == 0 || us == 1;
+}
+
+long int
+mpz_get_si (const mpz_t u)
+{
+ mp_size_t us = u->_mp_size;
+
+ if (us > 0)
+ return (long) (u->_mp_d[0] & ~GMP_LIMB_HIGHBIT);
+ else if (us < 0)
+ return (long) (- u->_mp_d[0] | GMP_LIMB_HIGHBIT);
+ else
+ return 0;
+}
+
+unsigned long int
+mpz_get_ui (const mpz_t u)
+{
+ return u->_mp_size == 0 ? 0 : u->_mp_d[0];
+}
+
+size_t
+mpz_size (const mpz_t u)
+{
+ return GMP_ABS (u->_mp_size);
+}
+
+mp_limb_t
+mpz_getlimbn (const mpz_t u, mp_size_t n)
+{
+ if (n >= 0 && n < GMP_ABS (u->_mp_size))
+ return u->_mp_d[n];
+ else
+ return 0;
+}
+
+\f
+/* Conversions and comparison to double. */
+void
+mpz_set_d (mpz_t r, double x)
+{
+ int sign;
+ mp_ptr rp;
+ mp_size_t rn, i;
+ double B;
+ double Bi;
+ mp_limb_t f;
+
+ /* x != x is true when x is a NaN, and x == x * 0.5 is true when x is
+ zero or infinity. */
+ if (x == 0.0 || x != x || x == x * 0.5)
+ {
+ r->_mp_size = 0;
+ return;
+ }
+
+ if (x < 0.0)
+ {
+ x = - x;
+ sign = 1;
+ }
+ else
+ sign = 0;
+
+ if (x < 1.0)
+ {
+ r->_mp_size = 0;
+ return;
+ }
+ B = 2.0 * (double) GMP_LIMB_HIGHBIT;
+ Bi = 1.0 / B;
+ for (rn = 1; x >= B; rn++)
+ x *= Bi;
+
+ rp = MPZ_REALLOC (r, rn);
+
+ f = (mp_limb_t) x;
+ x -= f;
+ assert (x < 1.0);
+ rp[rn-1] = f;
+ for (i = rn-1; i-- > 0; )
+ {
+ x = B * x;
+ f = (mp_limb_t) x;
+ x -= f;
+ assert (x < 1.0);
+ rp[i] = f;
+ }
+
+ r->_mp_size = sign ? - rn : rn;
+}
+
+void
+mpz_init_set_d (mpz_t r, double x)
+{
+ mpz_init (r);
+ mpz_set_d (r, x);
+}
+
+double
+mpz_get_d (const mpz_t u)
+{
+ mp_size_t un;
+ double x;
+ double B = 2.0 * (double) GMP_LIMB_HIGHBIT;
+
+ un = GMP_ABS (u->_mp_size);
+
+ if (un == 0)
+ return 0.0;
+
+ x = u->_mp_d[--un];
+ while (un > 0)
+ x = B*x + u->_mp_d[--un];
+
+ if (u->_mp_size < 0)
+ x = -x;
+
+ return x;
+}
+
+int
+mpz_cmpabs_d (const mpz_t x, double d)
+{
+ mp_size_t xn;
+ double B, Bi;
+ mp_size_t i;
+
+ xn = x->_mp_size;
+ d = GMP_ABS (d);
+
+ if (xn != 0)
+ {
+ xn = GMP_ABS (xn);
+
+ B = 2.0 * (double) GMP_LIMB_HIGHBIT;
+ Bi = 1.0 / B;
+
+ /* Scale d so it can be compared with the top limb. */
+ for (i = 1; i < xn; i++)
+ d *= Bi;
+
+ if (d >= B)
+ return -1;
+
+ /* Compare floor(d) to top limb, subtract and cancel when equal. */
+ for (i = xn; i-- > 0;)
+ {
+ mp_limb_t f, xl;
+
+ f = (mp_limb_t) d;
+ xl = x->_mp_d[i];
+ if (xl > f)
+ return 1;
+ else if (xl < f)
+ return -1;
+ d = B * (d - f);
+ }
+ }
+ return - (d > 0.0);
+}
+
+int
+mpz_cmp_d (const mpz_t x, double d)
+{
+ if (x->_mp_size < 0)
+ {
+ if (d >= 0.0)
+ return -1;
+ else
+ return -mpz_cmpabs_d (x, d);
+ }
+ else
+ {
+ if (d < 0.0)
+ return 1;
+ else
+ return mpz_cmpabs_d (x, d);
+ }
+}
+
+\f
+/* MPZ comparisons and the like. */
+int
+mpz_sgn (const mpz_t u)
+{
+ mp_size_t usize = u->_mp_size;
+
+ if (usize > 0)
+ return 1;
+ else if (usize < 0)
+ return -1;
+ else
+ return 0;
+}
+
+int
+mpz_cmp_si (const mpz_t u, long v)
+{
+ mp_size_t usize = u->_mp_size;
+
+ if (usize < -1)
+ return -1;
+ else if (v >= 0)
+ return mpz_cmp_ui (u, v);
+ else if (usize >= 0)
+ return 1;
+ else /* usize == -1 */
+ {
+ mp_limb_t ul = u->_mp_d[0];
+ if ((mp_limb_t)GMP_NEG_CAST (unsigned long int, v) < ul)
+ return -1;
+ else if ( (mp_limb_t)GMP_NEG_CAST (unsigned long int, v) > ul)
+ return 1;
+ }
+ return 0;
+}
+
+int
+mpz_cmp_ui (const mpz_t u, unsigned long v)
+{
+ mp_size_t usize = u->_mp_size;
+
+ if (usize > 1)
+ return 1;
+ else if (usize < 0)
+ return -1;
+ else
+ {
+ mp_limb_t ul = (usize > 0) ? u->_mp_d[0] : 0;
+ if (ul > v)
+ return 1;
+ else if (ul < v)
+ return -1;
+ }
+ return 0;
+}
+
+int
+mpz_cmp (const mpz_t a, const mpz_t b)
+{
+ mp_size_t asize = a->_mp_size;
+ mp_size_t bsize = b->_mp_size;
+
+ if (asize > bsize)
+ return 1;
+ else if (asize < bsize)
+ return -1;
+ else if (asize > 0)
+ return mpn_cmp (a->_mp_d, b->_mp_d, asize);
+ else if (asize < 0)
+ return -mpn_cmp (a->_mp_d, b->_mp_d, -asize);
+ else
+ return 0;
+}
+
+int
+mpz_cmpabs_ui (const mpz_t u, unsigned long v)
+{
+ mp_size_t un = GMP_ABS (u->_mp_size);
+ mp_limb_t ul;
+
+ if (un > 1)
+ return 1;
+
+ ul = (un == 1) ? u->_mp_d[0] : 0;
+
+ if (ul > v)
+ return 1;
+ else if (ul < v)
+ return -1;
+ else
+ return 0;
+}
+
+int
+mpz_cmpabs (const mpz_t u, const mpz_t v)
+{
+ return mpn_cmp4 (u->_mp_d, GMP_ABS (u->_mp_size),
+ v->_mp_d, GMP_ABS (v->_mp_size));
+}
+
+void
+mpz_abs (mpz_t r, const mpz_t u)
+{
+ if (r != u)
+ mpz_set (r, u);
+
+ r->_mp_size = GMP_ABS (r->_mp_size);
+}
+
+void
+mpz_neg (mpz_t r, const mpz_t u)
+{
+ if (r != u)
+ mpz_set (r, u);
+
+ r->_mp_size = -r->_mp_size;
+}
+
+void
+mpz_swap (mpz_t u, mpz_t v)
+{
+ MP_SIZE_T_SWAP (u->_mp_size, v->_mp_size);
+ MP_SIZE_T_SWAP (u->_mp_alloc, v->_mp_alloc);
+ MP_PTR_SWAP (u->_mp_d, v->_mp_d);
+}
+
+\f
+/* MPZ addition and subtraction */
+
+/* Adds to the absolute value. Returns new size, but doesn't store it. */
+static mp_size_t
+mpz_abs_add_ui (mpz_t r, const mpz_t a, unsigned long b)
+{
+ mp_size_t an;
+ mp_ptr rp;
+ mp_limb_t cy;
+
+ an = GMP_ABS (a->_mp_size);
+ if (an == 0)
+ {
+ r->_mp_d[0] = b;
+ return b > 0;
+ }
+
+ rp = MPZ_REALLOC (r, an + 1);
+
+ cy = mpn_add_1 (rp, a->_mp_d, an, b);
+ rp[an] = cy;
+ an += (cy > 0);
+
+ return an;
+}
+
+/* Subtract from the absolute value. Returns new size, (or -1 on underflow),
+ but doesn't store it. */
+static mp_size_t
+mpz_abs_sub_ui (mpz_t r, const mpz_t a, unsigned long b)
+{
+ mp_size_t an = GMP_ABS (a->_mp_size);
+ mp_ptr rp = MPZ_REALLOC (r, an);
+
+ if (an == 0)
+ {
+ rp[0] = b;
+ return -(b > 0);
+ }
+ else if (an == 1 && a->_mp_d[0] < b)
+ {
+ rp[0] = b - a->_mp_d[0];
+ return -1;
+ }
+ else
+ {
+ gmp_assert_nocarry (mpn_sub_1 (rp, a->_mp_d, an, b));
+ return mpn_normalized_size (rp, an);
+ }
+}
+
+void
+mpz_add_ui (mpz_t r, const mpz_t a, unsigned long b)
+{
+ if (a->_mp_size >= 0)
+ r->_mp_size = mpz_abs_add_ui (r, a, b);
+ else
+ r->_mp_size = -mpz_abs_sub_ui (r, a, b);
+}
+
+void
+mpz_sub_ui (mpz_t r, const mpz_t a, unsigned long b)
+{
+ if (a->_mp_size < 0)
+ r->_mp_size = -mpz_abs_add_ui (r, a, b);
+ else
+ r->_mp_size = mpz_abs_sub_ui (r, a, b);
+}
+
+void
+mpz_ui_sub (mpz_t r, unsigned long a, const mpz_t b)
+{
+ if (b->_mp_size < 0)
+ r->_mp_size = mpz_abs_add_ui (r, b, a);
+ else
+ r->_mp_size = -mpz_abs_sub_ui (r, b, a);
+}
+
+static mp_size_t
+mpz_abs_add (mpz_t r, const mpz_t a, const mpz_t b)
+{
+ mp_size_t an = GMP_ABS (a->_mp_size);
+ mp_size_t bn = GMP_ABS (b->_mp_size);
+ mp_size_t rn;
+ mp_ptr rp;
+ mp_limb_t cy;
+
+ rn = GMP_MAX (an, bn);
+ rp = MPZ_REALLOC (r, rn + 1);
+ if (an >= bn)
+ cy = mpn_add (rp, a->_mp_d, an, b->_mp_d, bn);
+ else
+ cy = mpn_add (rp, b->_mp_d, bn, a->_mp_d, an);
+
+ rp[rn] = cy;
+
+ return rn + (cy > 0);
+}
+
+static mp_size_t
+mpz_abs_sub (mpz_t r, const mpz_t a, const mpz_t b)
+{
+ mp_size_t an = GMP_ABS (a->_mp_size);
+ mp_size_t bn = GMP_ABS (b->_mp_size);
+ int cmp;
+ mp_ptr rp;
+
+ cmp = mpn_cmp4 (a->_mp_d, an, b->_mp_d, bn);
+ if (cmp > 0)
+ {
+ rp = MPZ_REALLOC (r, an);
+ gmp_assert_nocarry (mpn_sub (rp, a->_mp_d, an, b->_mp_d, bn));
+ return mpn_normalized_size (rp, an);
+ }
+ else if (cmp < 0)
+ {
+ rp = MPZ_REALLOC (r, bn);
+ gmp_assert_nocarry (mpn_sub (rp, b->_mp_d, bn, a->_mp_d, an));
+ return -mpn_normalized_size (rp, bn);
+ }
+ else
+ return 0;
+}
+
+void
+mpz_add (mpz_t r, const mpz_t a, const mpz_t b)
+{
+ mp_size_t rn;
+
+ if ( (a->_mp_size ^ b->_mp_size) >= 0)
+ rn = mpz_abs_add (r, a, b);
+ else
+ rn = mpz_abs_sub (r, a, b);
+
+ r->_mp_size = a->_mp_size >= 0 ? rn : - rn;
+}
+
+void
+mpz_sub (mpz_t r, const mpz_t a, const mpz_t b)
+{
+ mp_size_t rn;
+
+ if ( (a->_mp_size ^ b->_mp_size) >= 0)
+ rn = mpz_abs_sub (r, a, b);
+ else
+ rn = mpz_abs_add (r, a, b);
+
+ r->_mp_size = a->_mp_size >= 0 ? rn : - rn;
+}
+
+\f
+/* MPZ multiplication */
+void
+mpz_mul_si (mpz_t r, const mpz_t u, long int v)
+{
+ if (v < 0)
+ {
+ mpz_mul_ui (r, u, GMP_NEG_CAST (unsigned long int, v));
+ mpz_neg (r, r);
+ }
+ else
+ mpz_mul_ui (r, u, (unsigned long int) v);
+}
+
+void
+mpz_mul_ui (mpz_t r, const mpz_t u, unsigned long int v)
+{
+ mp_size_t un;
+ mpz_t t;
+ mp_ptr tp;
+ mp_limb_t cy;
+
+ un = GMP_ABS (u->_mp_size);
+
+ if (un == 0 || v == 0)
+ {
+ r->_mp_size = 0;
+ return;
+ }
+
+ mpz_init2 (t, (un + 1) * GMP_LIMB_BITS);
+
+ tp = t->_mp_d;
+ cy = mpn_mul_1 (tp, u->_mp_d, un, v);
+ tp[un] = cy;
+
+ t->_mp_size = un + (cy > 0);
+ if (u->_mp_size < 0)
+ t->_mp_size = - t->_mp_size;
+
+ mpz_swap (r, t);
+ mpz_clear (t);
+}
+
+void
+mpz_mul (mpz_t r, const mpz_t u, const mpz_t v)
+{
+ int sign;
+ mp_size_t un, vn, rn;
+ mpz_t t;
+ mp_ptr tp;
+
+ un = GMP_ABS (u->_mp_size);
+ vn = GMP_ABS (v->_mp_size);
+
+ if (un == 0 || vn == 0)
+ {
+ r->_mp_size = 0;
+ return;
+ }
+
+ sign = (u->_mp_size ^ v->_mp_size) < 0;
+
+ mpz_init2 (t, (un + vn) * GMP_LIMB_BITS);
+
+ tp = t->_mp_d;
+ if (un >= vn)
+ mpn_mul (tp, u->_mp_d, un, v->_mp_d, vn);
+ else
+ mpn_mul (tp, v->_mp_d, vn, u->_mp_d, un);
+
+ rn = un + vn;
+ rn -= tp[rn-1] == 0;
+
+ t->_mp_size = sign ? - rn : rn;
+ mpz_swap (r, t);
+ mpz_clear (t);
+}
+
+void
+mpz_mul_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bits)
+{
+ mp_size_t un, rn;
+ mp_size_t limbs;
+ unsigned shift;
+ mp_ptr rp;
+
+ un = GMP_ABS (u->_mp_size);
+ if (un == 0)
+ {
+ r->_mp_size = 0;
+ return;
+ }
+
+ limbs = bits / GMP_LIMB_BITS;
+ shift = bits % GMP_LIMB_BITS;
+
+ rn = un + limbs + (shift > 0);
+ rp = MPZ_REALLOC (r, rn);
+ if (shift > 0)
+ {
+ mp_limb_t cy = mpn_lshift (rp + limbs, u->_mp_d, un, shift);
+ rp[rn-1] = cy;
+ rn -= (cy == 0);
+ }
+ else
+ mpn_copyd (rp + limbs, u->_mp_d, un);
+
+ while (limbs > 0)
+ rp[--limbs] = 0;
+
+ r->_mp_size = (u->_mp_size < 0) ? - rn : rn;
+}
+
+\f
+/* MPZ division */
+enum mpz_div_round_mode { GMP_DIV_FLOOR, GMP_DIV_CEIL, GMP_DIV_TRUNC };
+
+/* Allows q or r to be zero. Returns 1 iff remainder is non-zero. */
+static int
+mpz_div_qr (mpz_t q, mpz_t r,
+ const mpz_t n, const mpz_t d, enum mpz_div_round_mode mode)
+{
+ mp_size_t ns, ds, nn, dn, qs;
+ ns = n->_mp_size;
+ ds = d->_mp_size;
+
+ if (ds == 0)
+ gmp_die("mpz_div_qr: Divide by zero.");
+
+ if (ns == 0)
+ {
+ if (q)
+ q->_mp_size = 0;
+ if (r)
+ r->_mp_size = 0;
+ return 0;
+ }
+
+ nn = GMP_ABS (ns);
+ dn = GMP_ABS (ds);
+
+ qs = ds ^ ns;
+
+ if (nn < dn)
+ {
+ if (mode == GMP_DIV_CEIL && qs >= 0)
+ {
+ /* q = 1, r = n - d */
+ if (r)
+ mpz_sub (r, n, d);
+ if (q)
+ mpz_set_ui (q, 1);
+ }
+ else if (mode == GMP_DIV_FLOOR && qs < 0)
+ {
+ /* q = -1, r = n + d */
+ if (r)
+ mpz_add (r, n, d);
+ if (q)
+ mpz_set_si (q, -1);
+ }
+ else
+ {
+ /* q = 0, r = d */
+ if (r)
+ mpz_set (r, n);
+ if (q)
+ q->_mp_size = 0;
+ }
+ return 1;
+ }
+ else
+ {
+ mp_ptr np, qp;
+ mp_size_t qn, rn;
+ mpz_t tq, tr;
+
+ mpz_init (tr);
+ mpz_set (tr, n);
+ np = tr->_mp_d;
+
+ qn = nn - dn + 1;
+
+ if (q)
+ {
+ mpz_init2 (tq, qn * GMP_LIMB_BITS);
+ qp = tq->_mp_d;
+ }
+ else
+ qp = NULL;
+
+ mpn_div_qr (qp, np, nn, d->_mp_d, dn);
+
+ if (qp)
+ {
+ qn -= (qp[qn-1] == 0);
+
+ tq->_mp_size = qs < 0 ? -qn : qn;
+ }
+ rn = mpn_normalized_size (np, dn);
+ tr->_mp_size = ns < 0 ? - rn : rn;
+
+ if (mode == GMP_DIV_FLOOR && qs < 0 && rn != 0)
+ {
+ if (q)
+ mpz_sub_ui (tq, tq, 1);
+ if (r)
+ mpz_add (tr, tr, d);
+ }
+ else if (mode == GMP_DIV_CEIL && qs >= 0 && rn != 0)
+ {
+ if (q)
+ mpz_add_ui (tq, tq, 1);
+ if (r)
+ mpz_sub (tr, tr, d);
+ }
+
+ if (q)
+ {
+ mpz_swap (tq, q);
+ mpz_clear (tq);
+ }
+ if (r)
+ mpz_swap (tr, r);
+
+ mpz_clear (tr);
+
+ return rn != 0;
+ }
+}
+
+void
+mpz_cdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d)
+{
+ mpz_div_qr (q, r, n, d, GMP_DIV_CEIL);
+}
+
+void
+mpz_fdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d)
+{
+ mpz_div_qr (q, r, n, d, GMP_DIV_FLOOR);
+}
+
+void
+mpz_tdiv_qr (mpz_t q, mpz_t r, const mpz_t n, const mpz_t d)
+{
+ mpz_div_qr (q, r, n, d, GMP_DIV_TRUNC);
+}
+
+void
+mpz_cdiv_q (mpz_t q, const mpz_t n, const mpz_t d)
+{
+ mpz_div_qr (q, NULL, n, d, GMP_DIV_CEIL);
+}
+
+void
+mpz_fdiv_q (mpz_t q, const mpz_t n, const mpz_t d)
+{
+ mpz_div_qr (q, NULL, n, d, GMP_DIV_FLOOR);
+}
+
+void
+mpz_tdiv_q (mpz_t q, const mpz_t n, const mpz_t d)
+{
+ mpz_div_qr (q, NULL, n, d, GMP_DIV_TRUNC);
+}
+
+void
+mpz_cdiv_r (mpz_t r, const mpz_t n, const mpz_t d)
+{
+ mpz_div_qr (NULL, r, n, d, GMP_DIV_CEIL);
+}
+
+void
+mpz_fdiv_r (mpz_t r, const mpz_t n, const mpz_t d)
+{
+ mpz_div_qr (NULL, r, n, d, GMP_DIV_FLOOR);
+}
+
+void
+mpz_tdiv_r (mpz_t r, const mpz_t n, const mpz_t d)
+{
+ mpz_div_qr (NULL, r, n, d, GMP_DIV_TRUNC);
+}
+
+void
+mpz_mod (mpz_t r, const mpz_t n, const mpz_t d)
+{
+ if (d->_mp_size >= 0)
+ mpz_div_qr (NULL, r, n, d, GMP_DIV_FLOOR);
+ else
+ mpz_div_qr (NULL, r, n, d, GMP_DIV_CEIL);
+}
+
+static void
+mpz_div_q_2exp (mpz_t q, const mpz_t u, mp_bitcnt_t bit_index,
+ enum mpz_div_round_mode mode)
+{
+ mp_size_t un, qn;
+ mp_size_t limb_cnt;
+ mp_ptr qp;
+ mp_limb_t adjust;
+
+ un = u->_mp_size;
+ if (un == 0)
+ {
+ q->_mp_size = 0;
+ return;
+ }
+ limb_cnt = bit_index / GMP_LIMB_BITS;
+ qn = GMP_ABS (un) - limb_cnt;
+ bit_index %= GMP_LIMB_BITS;
+
+ if (mode == ((un > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* un != 0 here. */
+ /* Note: Below, the final indexing at limb_cnt is valid because at
+ that point we have qn > 0. */
+ adjust = (qn <= 0
+ || !mpn_zero_p (u->_mp_d, limb_cnt)
+ || (u->_mp_d[limb_cnt]
+ & (((mp_limb_t) 1 << bit_index) - 1)));
+ else
+ adjust = 0;
+
+ if (qn <= 0)
+ qn = 0;
+
+ else
+ {
+ qp = MPZ_REALLOC (q, qn);
+
+ if (bit_index != 0)
+ {
+ mpn_rshift (qp, u->_mp_d + limb_cnt, qn, bit_index);
+ qn -= qp[qn - 1] == 0;
+ }
+ else
+ {
+ mpn_copyi (qp, u->_mp_d + limb_cnt, qn);
+ }
+ }
+
+ q->_mp_size = qn;
+
+ mpz_add_ui (q, q, adjust);
+ if (un < 0)
+ mpz_neg (q, q);
+}
+
+static void
+mpz_div_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t bit_index,
+ enum mpz_div_round_mode mode)
+{
+ mp_size_t us, un, rn;
+ mp_ptr rp;
+ mp_limb_t mask;
+
+ us = u->_mp_size;
+ if (us == 0 || bit_index == 0)
+ {
+ r->_mp_size = 0;
+ return;
+ }
+ rn = (bit_index + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS;
+ assert (rn > 0);
+
+ rp = MPZ_REALLOC (r, rn);
+ un = GMP_ABS (us);
+
+ mask = GMP_LIMB_MAX >> (rn * GMP_LIMB_BITS - bit_index);
+
+ if (rn > un)
+ {
+ /* Quotient (with truncation) is zero, and remainder is
+ non-zero */
+ if (mode == ((us > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* us != 0 here. */
+ {
+ /* Have to negate and sign extend. */
+ mp_size_t i;
+ mp_limb_t cy;
+
+ for (cy = 1, i = 0; i < un; i++)
+ {
+ mp_limb_t s = ~u->_mp_d[i] + cy;
+ cy = s < cy;
+ rp[i] = s;
+ }
+ assert (cy == 0);
+ for (; i < rn - 1; i++)
+ rp[i] = GMP_LIMB_MAX;
+
+ rp[rn-1] = mask;
+ us = -us;
+ }
+ else
+ {
+ /* Just copy */
+ if (r != u)
+ mpn_copyi (rp, u->_mp_d, un);
+
+ rn = un;
+ }
+ }
+ else
+ {
+ if (r != u)
+ mpn_copyi (rp, u->_mp_d, rn - 1);
+
+ rp[rn-1] = u->_mp_d[rn-1] & mask;
+
+ if (mode == ((us > 0) ? GMP_DIV_CEIL : GMP_DIV_FLOOR)) /* us != 0 here. */
+ {
+ /* If r != 0, compute 2^{bit_count} - r. */
+ mp_size_t i;
+
+ for (i = 0; i < rn && rp[i] == 0; i++)
+ ;
+ if (i < rn)
+ {
+ /* r > 0, need to flip sign. */
+ rp[i] = ~rp[i] + 1;
+ for (i++; i < rn; i++)
+ rp[i] = ~rp[i];
+
+ rp[rn-1] &= mask;
+
+ /* us is not used for anything else, so we can modify it
+ here to indicate flipped sign. */
+ us = -us;
+ }
+ }
+ }
+ rn = mpn_normalized_size (rp, rn);
+ r->_mp_size = us < 0 ? -rn : rn;
+}
+
+void
+mpz_cdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
+{
+ mpz_div_q_2exp (r, u, cnt, GMP_DIV_CEIL);
+}
+
+void
+mpz_fdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
+{
+ mpz_div_q_2exp (r, u, cnt, GMP_DIV_FLOOR);
+}
+
+void
+mpz_tdiv_q_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
+{
+ mpz_div_q_2exp (r, u, cnt, GMP_DIV_TRUNC);
+}
+
+void
+mpz_cdiv_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
+{
+ mpz_div_r_2exp (r, u, cnt, GMP_DIV_CEIL);
+}
+
+void
+mpz_fdiv_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
+{
+ mpz_div_r_2exp (r, u, cnt, GMP_DIV_FLOOR);
+}
+
+void
+mpz_tdiv_r_2exp (mpz_t r, const mpz_t u, mp_bitcnt_t cnt)
+{
+ mpz_div_r_2exp (r, u, cnt, GMP_DIV_TRUNC);
+}
+
+void
+mpz_divexact (mpz_t q, const mpz_t n, const mpz_t d)
+{
+ gmp_assert_nocarry (mpz_div_qr (q, NULL, n, d, GMP_DIV_TRUNC));
+}
+
+int
+mpz_divisible_p (const mpz_t n, const mpz_t d)
+{
+ return mpz_div_qr (NULL, NULL, n, d, GMP_DIV_TRUNC) == 0;
+}
+
+static unsigned long
+mpz_div_qr_ui (mpz_t q, mpz_t r,
+ const mpz_t n, unsigned long d, enum mpz_div_round_mode mode)
+{
+ mp_size_t ns, qn;
+ mp_ptr qp;
+ mp_limb_t rl;
+ mp_size_t rs;
+
+ ns = n->_mp_size;
+ if (ns == 0)
+ {
+ if (q)
+ q->_mp_size = 0;
+ if (r)
+ r->_mp_size = 0;
+ return 0;
+ }
+
+ qn = GMP_ABS (ns);
+ if (q)
+ qp = MPZ_REALLOC (q, qn);
+ else
+ qp = NULL;
+
+ rl = mpn_div_qr_1 (qp, n->_mp_d, qn, d);
+ assert (rl < d);
+
+ rs = rl > 0;
+ rs = (ns < 0) ? -rs : rs;
+
+ if (rl > 0 && ( (mode == GMP_DIV_FLOOR && ns < 0)
+ || (mode == GMP_DIV_CEIL && ns >= 0)))
+ {
+ if (q)
+ gmp_assert_nocarry (mpn_add_1 (qp, qp, qn, 1));
+ rl = d - rl;
+ rs = -rs;
+ }
+
+ if (r)
+ {
+ r->_mp_d[0] = rl;
+ r->_mp_size = rs;
+ }
+ if (q)
+ {
+ qn -= (qp[qn-1] == 0);
+ assert (qn == 0 || qp[qn-1] > 0);
+
+ q->_mp_size = (ns < 0) ? - qn : qn;
+ }
+
+ return rl;
+}
+
+unsigned long
+mpz_cdiv_qr_ui (mpz_t q, mpz_t r, const mpz_t n, unsigned long d)
+{
+ return mpz_div_qr_ui (q, r, n, d, GMP_DIV_CEIL);
+}
+
+unsigned long
+mpz_fdiv_qr_ui (mpz_t q, mpz_t r, const mpz_t n, unsigned long d)
+{
+ return mpz_div_qr_ui (q, r, n, d, GMP_DIV_FLOOR);
+}
+
+unsigned long
+mpz_tdiv_qr_ui (mpz_t q, mpz_t r, const mpz_t n, unsigned long d)
+{
+ return mpz_div_qr_ui (q, r, n, d, GMP_DIV_TRUNC);
+}
+
+unsigned long
+mpz_cdiv_q_ui (mpz_t q, const mpz_t n, unsigned long d)
+{
+ return mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_CEIL);
+}
+
+unsigned long
+mpz_fdiv_q_ui (mpz_t q, const mpz_t n, unsigned long d)
+{
+ return mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_FLOOR);
+}
+
+unsigned long
+mpz_tdiv_q_ui (mpz_t q, const mpz_t n, unsigned long d)
+{
+ return mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_TRUNC);
+}
+
+unsigned long
+mpz_cdiv_r_ui (mpz_t r, const mpz_t n, unsigned long d)
+{
+ return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_CEIL);
+}
+unsigned long
+mpz_fdiv_r_ui (mpz_t r, const mpz_t n, unsigned long d)
+{
+ return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_FLOOR);
+}
+unsigned long
+mpz_tdiv_r_ui (mpz_t r, const mpz_t n, unsigned long d)
+{
+ return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_TRUNC);
+}
+
+unsigned long
+mpz_cdiv_ui (const mpz_t n, unsigned long d)
+{
+ return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_CEIL);
+}
+
+unsigned long
+mpz_fdiv_ui (const mpz_t n, unsigned long d)
+{
+ return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_FLOOR);
+}
+
+unsigned long
+mpz_tdiv_ui (const mpz_t n, unsigned long d)
+{
+ return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_TRUNC);
+}
+
+unsigned long
+mpz_mod_ui (mpz_t r, const mpz_t n, unsigned long d)
+{
+ return mpz_div_qr_ui (NULL, r, n, d, GMP_DIV_FLOOR);
+}
+
+void
+mpz_divexact_ui (mpz_t q, const mpz_t n, unsigned long d)
+{
+ gmp_assert_nocarry (mpz_div_qr_ui (q, NULL, n, d, GMP_DIV_TRUNC));
+}
+
+int
+mpz_divisible_ui_p (const mpz_t n, unsigned long d)
+{
+ return mpz_div_qr_ui (NULL, NULL, n, d, GMP_DIV_TRUNC) == 0;
+}
+
+\f
+/* GCD */
+static mp_limb_t
+mpn_gcd_11 (mp_limb_t u, mp_limb_t v)
+{
+ unsigned shift;
+
+ assert ( (u | v) > 0);
+
+ if (u == 0)
+ return v;
+ else if (v == 0)
+ return u;
+
+ gmp_ctz (shift, u | v);
+
+ u >>= shift;
+ v >>= shift;
+
+ if ( (u & 1) == 0)
+ MP_LIMB_T_SWAP (u, v);
+
+ while ( (v & 1) == 0)
+ v >>= 1;
+
+ while (u != v)
+ {
+ if (u > v)
+ {
+ u -= v;
+ do
+ u >>= 1;
+ while ( (u & 1) == 0);
+ }
+ else
+ {
+ v -= u;
+ do
+ v >>= 1;
+ while ( (v & 1) == 0);
+ }
+ }
+ return u << shift;
+}
+
+unsigned long
+mpz_gcd_ui (mpz_t g, const mpz_t u, unsigned long v)
+{
+ mp_size_t un;
+
+ if (v == 0)
+ {
+ if (g)
+ mpz_abs (g, u);
+ }
+ else
+ {
+ un = GMP_ABS (u->_mp_size);
+ if (un != 0)
+ v = mpn_gcd_11 (mpn_div_qr_1 (NULL, u->_mp_d, un, v), v);
+
+ if (g)
+ mpz_set_ui (g, v);
+ }
+
+ return v;
+}
+
+static mp_bitcnt_t
+mpz_make_odd (mpz_t r, const mpz_t u)
+{
+ mp_size_t un, rn, i;
+ mp_ptr rp;
+ unsigned shift;
+
+ un = GMP_ABS (u->_mp_size);
+ assert (un > 0);
+
+ for (i = 0; u->_mp_d[i] == 0; i++)
+ ;
+
+ gmp_ctz (shift, u->_mp_d[i]);
+
+ rn = un - i;
+ rp = MPZ_REALLOC (r, rn);
+ if (shift > 0)
+ {
+ mpn_rshift (rp, u->_mp_d + i, rn, shift);
+ rn -= (rp[rn-1] == 0);
+ }
+ else
+ mpn_copyi (rp, u->_mp_d + i, rn);
+
+ r->_mp_size = rn;
+ return i * GMP_LIMB_BITS + shift;
+}
+
+void
+mpz_gcd (mpz_t g, const mpz_t u, const mpz_t v)
+{
+ mpz_t tu, tv;
+ mp_bitcnt_t uz, vz, gz;
+
+ if (u->_mp_size == 0)
+ {
+ mpz_abs (g, v);
+ return;
+ }
+ if (v->_mp_size == 0)
+ {
+ mpz_abs (g, u);
+ return;
+ }
+
+ mpz_init (tu);
+ mpz_init (tv);
+
+ uz = mpz_make_odd (tu, u);
+ vz = mpz_make_odd (tv, v);
+ gz = GMP_MIN (uz, vz);
+
+ if (tu->_mp_size < tv->_mp_size)
+ mpz_swap (tu, tv);
+
+ mpz_tdiv_r (tu, tu, tv);
+ if (tu->_mp_size == 0)
+ {
+ mpz_swap (g, tv);
+ }
+ else
+ for (;;)
+ {
+ int c;
+
+ mpz_make_odd (tu, tu);
+ c = mpz_cmp (tu, tv);
+ if (c == 0)
+ {
+ mpz_swap (g, tu);
+ break;
+ }
+ if (c < 0)
+ mpz_swap (tu, tv);
+
+ if (tv->_mp_size == 1)
+ {
+ mp_limb_t vl = tv->_mp_d[0];
+ mp_limb_t ul = mpz_tdiv_ui (tu, vl);
+ mpz_set_ui (g, mpn_gcd_11 (ul, vl));
+ break;
+ }
+ mpz_sub (tu, tu, tv);
+ }
+ mpz_clear (tu);
+ mpz_clear (tv);
+ mpz_mul_2exp (g, g, gz);
+}
+
+void
+mpz_gcdext (mpz_t g, mpz_t s, mpz_t t, const mpz_t u, const mpz_t v)
+{
+ mpz_t tu, tv, s0, s1, t0, t1;
+ mp_bitcnt_t uz, vz, gz;
+ mp_bitcnt_t power;
+
+ if (u->_mp_size == 0)
+ {
+ /* g = 0 u + sgn(v) v */
+ signed long sign = mpz_sgn (v);
+ mpz_abs (g, v);
+ if (s)
+ mpz_set_ui (s, 0);
+ if (t)
+ mpz_set_si (t, sign);
+ return;
+ }
+
+ if (v->_mp_size == 0)
+ {
+ /* g = sgn(u) u + 0 v */
+ signed long sign = mpz_sgn (u);
+ mpz_abs (g, u);
+ if (s)
+ mpz_set_si (s, sign);
+ if (t)
+ mpz_set_ui (t, 0);
+ return;
+ }
+
+ mpz_init (tu);
+ mpz_init (tv);
+ mpz_init (s0);
+ mpz_init (s1);
+ mpz_init (t0);
+ mpz_init (t1);
+
+ uz = mpz_make_odd (tu, u);
+ vz = mpz_make_odd (tv, v);
+ gz = GMP_MIN (uz, vz);
+
+ uz -= gz;
+ vz -= gz;
+
+ /* Cofactors corresponding to odd gcd. gz handled later. */
+ if (tu->_mp_size < tv->_mp_size)
+ {
+ mpz_swap (tu, tv);
+ MPZ_SRCPTR_SWAP (u, v);
+ MPZ_PTR_SWAP (s, t);
+ MP_BITCNT_T_SWAP (uz, vz);
+ }
+
+ /* Maintain
+ *
+ * u = t0 tu + t1 tv
+ * v = s0 tu + s1 tv
+ *
+ * where u and v denote the inputs with common factors of two
+ * eliminated, and det (s0, t0; s1, t1) = 2^p. Then
+ *
+ * 2^p tu = s1 u - t1 v
+ * 2^p tv = -s0 u + t0 v
+ */
+
+ /* After initial division, tu = q tv + tu', we have
+ *
+ * u = 2^uz (tu' + q tv)
+ * v = 2^vz tv
+ *
+ * or
+ *
+ * t0 = 2^uz, t1 = 2^uz q
+ * s0 = 0, s1 = 2^vz
+ */
+
+ mpz_setbit (t0, uz);
+ mpz_tdiv_qr (t1, tu, tu, tv);
+ mpz_mul_2exp (t1, t1, uz);
+
+ mpz_setbit (s1, vz);
+ power = uz + vz;
+
+ if (tu->_mp_size > 0)
+ {
+ mp_bitcnt_t shift;
+ shift = mpz_make_odd (tu, tu);
+ mpz_mul_2exp (t0, t0, shift);
+ mpz_mul_2exp (s0, s0, shift);
+ power += shift;
+
+ for (;;)
+ {
+ int c;
+ c = mpz_cmp (tu, tv);
+ if (c == 0)
+ break;
+
+ if (c < 0)
+ {
+ /* tv = tv' + tu
+ *
+ * u = t0 tu + t1 (tv' + tu) = (t0 + t1) tu + t1 tv'
+ * v = s0 tu + s1 (tv' + tu) = (s0 + s1) tu + s1 tv' */
+
+ mpz_sub (tv, tv, tu);
+ mpz_add (t0, t0, t1);
+ mpz_add (s0, s0, s1);
+
+ shift = mpz_make_odd (tv, tv);
+ mpz_mul_2exp (t1, t1, shift);
+ mpz_mul_2exp (s1, s1, shift);
+ }
+ else
+ {
+ mpz_sub (tu, tu, tv);
+ mpz_add (t1, t0, t1);
+ mpz_add (s1, s0, s1);
+
+ shift = mpz_make_odd (tu, tu);
+ mpz_mul_2exp (t0, t0, shift);
+ mpz_mul_2exp (s0, s0, shift);
+ }
+ power += shift;
+ }
+ }
+
+ /* Now tv = odd part of gcd, and -s0 and t0 are corresponding
+ cofactors. */
+
+ mpz_mul_2exp (tv, tv, gz);
+ mpz_neg (s0, s0);
+
+ /* 2^p g = s0 u + t0 v. Eliminate one factor of two at a time. To
+ adjust cofactors, we need u / g and v / g */
+
+ mpz_divexact (s1, v, tv);
+ mpz_abs (s1, s1);
+ mpz_divexact (t1, u, tv);
+ mpz_abs (t1, t1);
+
+ while (power-- > 0)
+ {
+ /* s0 u + t0 v = (s0 - v/g) u - (t0 + u/g) v */
+ if (mpz_odd_p (s0) || mpz_odd_p (t0))
+ {
+ mpz_sub (s0, s0, s1);
+ mpz_add (t0, t0, t1);
+ }
+ mpz_divexact_ui (s0, s0, 2);
+ mpz_divexact_ui (t0, t0, 2);
+ }
+
+ /* Arrange so that |s| < |u| / 2g */
+ mpz_add (s1, s0, s1);
+ if (mpz_cmpabs (s0, s1) > 0)
+ {
+ mpz_swap (s0, s1);
+ mpz_sub (t0, t0, t1);
+ }
+ if (u->_mp_size < 0)
+ mpz_neg (s0, s0);
+ if (v->_mp_size < 0)
+ mpz_neg (t0, t0);
+
+ mpz_swap (g, tv);
+ if (s)
+ mpz_swap (s, s0);
+ if (t)
+ mpz_swap (t, t0);
+
+ mpz_clear (tu);
+ mpz_clear (tv);
+ mpz_clear (s0);
+ mpz_clear (s1);
+ mpz_clear (t0);
+ mpz_clear (t1);
+}
+
+void
+mpz_lcm (mpz_t r, const mpz_t u, const mpz_t v)
+{
+ mpz_t g;
+
+ if (u->_mp_size == 0 || v->_mp_size == 0)
+ {
+ r->_mp_size = 0;
+ return;
+ }
+
+ mpz_init (g);
+
+ mpz_gcd (g, u, v);
+ mpz_divexact (g, u, g);
+ mpz_mul (r, g, v);
+
+ mpz_clear (g);
+ mpz_abs (r, r);
+}
+
+void
+mpz_lcm_ui (mpz_t r, const mpz_t u, unsigned long v)
+{
+ if (v == 0 || u->_mp_size == 0)
+ {
+ r->_mp_size = 0;
+ return;
+ }
+
+ v /= mpz_gcd_ui (NULL, u, v);
+ mpz_mul_ui (r, u, v);
+
+ mpz_abs (r, r);
+}
+
+int
+mpz_invert (mpz_t r, const mpz_t u, const mpz_t m)
+{
+ mpz_t g, tr;
+ int invertible;
+
+ if (u->_mp_size == 0 || mpz_cmpabs_ui (m, 1) <= 0)
+ return 0;
+
+ mpz_init (g);
+ mpz_init (tr);
+
+ mpz_gcdext (g, tr, NULL, u, m);
+ invertible = (mpz_cmp_ui (g, 1) == 0);
+
+ if (invertible)
+ {
+ if (tr->_mp_size < 0)
+ {
+ if (m->_mp_size >= 0)
+ mpz_add (tr, tr, m);
+ else
+ mpz_sub (tr, tr, m);
+ }
+ mpz_swap (r, tr);
+ }
+
+ mpz_clear (g);
+ mpz_clear (tr);
+ return invertible;
+}
+
+\f
+/* Higher level operations (sqrt, pow and root) */
+
+void
+mpz_pow_ui (mpz_t r, const mpz_t b, unsigned long e)
+{
+ unsigned long bit;
+ mpz_t tr;
+ mpz_init_set_ui (tr, 1);
+
+ for (bit = GMP_ULONG_HIGHBIT; bit > 0; bit >>= 1)
+ {
+ mpz_mul (tr, tr, tr);
+ if (e & bit)
+ mpz_mul (tr, tr, b);
+ }
+ mpz_swap (r, tr);
+ mpz_clear (tr);
+}
+
+void
+mpz_ui_pow_ui (mpz_t r, unsigned long blimb, unsigned long e)
+{
+ mpz_t b;
+ mpz_init_set_ui (b, blimb);
+ mpz_pow_ui (r, b, e);
+ mpz_clear (b);
+}
+
+void
+mpz_powm (mpz_t r, const mpz_t b, const mpz_t e, const mpz_t m)
+{
+ mpz_t tr;
+ mpz_t base;
+ mp_size_t en, mn;
+ mp_srcptr mp;
+ struct gmp_div_inverse minv;
+ unsigned shift;
+ mp_ptr tp = NULL;
+
+ en = GMP_ABS (e->_mp_size);
+ mn = GMP_ABS (m->_mp_size);
+ if (mn == 0)
+ gmp_die ("mpz_powm: Zero modulo.");
+
+ if (en == 0)
+ {
+ mpz_set_ui (r, 1);
+ return;
+ }
+
+ mp = m->_mp_d;
+ mpn_div_qr_invert (&minv, mp, mn);
+ shift = minv.shift;
+
+ if (shift > 0)
+ {
+ /* To avoid shifts, we do all our reductions, except the final
+ one, using a *normalized* m. */
+ minv.shift = 0;
+
+ tp = gmp_xalloc_limbs (mn);
+ gmp_assert_nocarry (mpn_lshift (tp, mp, mn, shift));
+ mp = tp;
+ }
+
+ mpz_init (base);
+
+ if (e->_mp_size < 0)
+ {
+ if (!mpz_invert (base, b, m))
+ gmp_die ("mpz_powm: Negative exponent and non-invertibe base.");
+ }
+ else
+ {
+ mp_size_t bn;
+ mpz_abs (base, b);
+
+ bn = base->_mp_size;
+ if (bn >= mn)
+ {
+ mpn_div_qr_preinv (NULL, base->_mp_d, base->_mp_size, mp, mn, &minv);
+ bn = mn;
+ }
+
+ /* We have reduced the absolute value. Now take care of the
+ sign. Note that we get zero represented non-canonically as
+ m. */
+ if (b->_mp_size < 0)
+ {
+ mp_ptr bp = MPZ_REALLOC (base, mn);
+ gmp_assert_nocarry (mpn_sub (bp, mp, mn, bp, bn));
+ bn = mn;
+ }
+ base->_mp_size = mpn_normalized_size (base->_mp_d, bn);
+ }
+ mpz_init_set_ui (tr, 1);
+
+ while (en-- > 0)
+ {
+ mp_limb_t w = e->_mp_d[en];
+ mp_limb_t bit;
+
+ for (bit = GMP_LIMB_HIGHBIT; bit > 0; bit >>= 1)
+ {
+ mpz_mul (tr, tr, tr);
+ if (w & bit)
+ mpz_mul (tr, tr, base);
+ if (tr->_mp_size > mn)
+ {
+ mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv);
+ tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn);
+ }
+ }
+ }
+
+ /* Final reduction */
+ if (tr->_mp_size >= mn)
+ {
+ minv.shift = shift;
+ mpn_div_qr_preinv (NULL, tr->_mp_d, tr->_mp_size, mp, mn, &minv);
+ tr->_mp_size = mpn_normalized_size (tr->_mp_d, mn);
+ }
+ if (tp)
+ gmp_free (tp);
+
+ mpz_swap (r, tr);
+ mpz_clear (tr);
+ mpz_clear (base);
+}
+
+void
+mpz_powm_ui (mpz_t r, const mpz_t b, unsigned long elimb, const mpz_t m)
+{
+ mpz_t e;
+ mpz_init_set_ui (e, elimb);
+ mpz_powm (r, b, e, m);
+ mpz_clear (e);
+}
+
+/* x=trunc(y^(1/z)), r=y-x^z */
+void
+mpz_rootrem (mpz_t x, mpz_t r, const mpz_t y, unsigned long z)
+{
+ int sgn;
+ mpz_t t, u;
+
+ sgn = y->_mp_size < 0;
+ if (sgn && (z & 1) == 0)
+ gmp_die ("mpz_rootrem: Negative argument, with even root.");
+ if (z == 0)
+ gmp_die ("mpz_rootrem: Zeroth root.");
+
+ if (mpz_cmpabs_ui (y, 1) <= 0) {
+ mpz_set (x, y);
+ if (r)
+ r->_mp_size = 0;
+ return;
+ }
+
+ mpz_init (t);
+ mpz_init (u);
+ mpz_setbit (t, mpz_sizeinbase (y, 2) / z + 1);
+
+ if (z == 2) /* simplify sqrt loop: z-1 == 1 */
+ do {
+ mpz_swap (u, t); /* u = x */
+ mpz_tdiv_q (t, y, u); /* t = y/x */
+ mpz_add (t, t, u); /* t = y/x + x */
+ mpz_tdiv_q_2exp (t, t, 1); /* x'= (y/x + x)/2 */
+ } while (mpz_cmpabs (t, u) < 0); /* |x'| < |x| */
+ else /* z != 2 */ {
+ mpz_t v;
+
+ mpz_init (v);
+ if (sgn)
+ mpz_neg (t, t);
+
+ do {
+ mpz_swap (u, t); /* u = x */
+ mpz_pow_ui (t, u, z - 1); /* t = x^(z-1) */
+ mpz_tdiv_q (t, y, t); /* t = y/x^(z-1) */
+ mpz_mul_ui (v, u, z - 1); /* v = x*(z-1) */
+ mpz_add (t, t, v); /* t = y/x^(z-1) + x*(z-1) */
+ mpz_tdiv_q_ui (t, t, z); /* x'=(y/x^(z-1) + x*(z-1))/z */
+ } while (mpz_cmpabs (t, u) < 0); /* |x'| < |x| */
+
+ mpz_clear (v);
+ }
+
+ if (r) {
+ mpz_pow_ui (t, u, z);
+ mpz_sub (r, y, t);
+ }
+ mpz_swap (x, u);
+ mpz_clear (u);
+ mpz_clear (t);
+}
+
+int
+mpz_root (mpz_t x, const mpz_t y, unsigned long z)
+{
+ int res;
+ mpz_t r;
+
+ mpz_init (r);
+ mpz_rootrem (x, r, y, z);
+ res = r->_mp_size == 0;
+ mpz_clear (r);
+
+ return res;
+}
+
+/* Compute s = floor(sqrt(u)) and r = u - s^2. Allows r == NULL */
+void
+mpz_sqrtrem (mpz_t s, mpz_t r, const mpz_t u)
+{
+ mpz_rootrem (s, r, u, 2);
+}
+
+void
+mpz_sqrt (mpz_t s, const mpz_t u)
+{
+ mpz_rootrem (s, NULL, u, 2);
+}
+
+\f
+/* Combinatorics */
+
+void
+mpz_fac_ui (mpz_t x, unsigned long n)
+{
+ if (n < 2) {
+ mpz_set_ui (x, 1);
+ return;
+ }
+ mpz_set_ui (x, n);
+ for (;--n > 1;)
+ mpz_mul_ui (x, x, n);
+}
+
+void
+mpz_bin_uiui (mpz_t r, unsigned long n, unsigned long k)
+{
+ mpz_t t;
+
+ if (k > n) {
+ r->_mp_size = 0;
+ return;
+ }
+ mpz_fac_ui (r, n);
+ mpz_init (t);
+ mpz_fac_ui (t, k);
+ mpz_divexact (r, r, t);
+ mpz_fac_ui (t, n - k);
+ mpz_divexact (r, r, t);
+ mpz_clear (t);
+}
+
+\f
+/* Logical operations and bit manipulation. */
+
+/* Numbers are treated as if represented in two's complement (and
+ infinitely sign extended). For a negative values we get the two's
+ complement from -x = ~x + 1, where ~ is bitwise complementt.
+ Negation transforms
+
+ xxxx10...0
+
+ into
+
+ yyyy10...0
+
+ where yyyy is the bitwise complement of xxxx. So least significant
+ bits, up to and including the first one bit, are unchanged, and
+ the more significant bits are all complemented.
+
+ To change a bit from zero to one in a negative number, subtract the
+ corresponding power of two from the absolute value. This can never
+ underflow. To change a bit from one to zero, add the corresponding
+ power of two, and this might overflow. E.g., if x = -001111, the
+ two's complement is 110001. Clearing the least significant bit, we
+ get two's complement 110000, and -010000. */
+
+int
+mpz_tstbit (const mpz_t d, mp_bitcnt_t bit_index)
+{
+ mp_size_t limb_index;
+ unsigned shift;
+ mp_size_t ds;
+ mp_size_t dn;
+ mp_limb_t w;
+ int bit;
+
+ ds = d->_mp_size;
+ dn = GMP_ABS (ds);
+ limb_index = bit_index / GMP_LIMB_BITS;
+ if (limb_index >= dn)
+ return ds < 0;
+
+ shift = bit_index % GMP_LIMB_BITS;
+ w = d->_mp_d[limb_index];
+ bit = (w >> shift) & 1;
+
+ if (ds < 0)
+ {
+ /* d < 0. Check if any of the bits below is set: If so, our bit
+ must be complemented. */
+ if (shift > 0 && (w << (GMP_LIMB_BITS - shift)) > 0)
+ return bit ^ 1;
+ while (limb_index-- > 0)
+ if (d->_mp_d[limb_index] > 0)
+ return bit ^ 1;
+ }
+ return bit;
+}
+
+static void
+mpz_abs_add_bit (mpz_t d, mp_bitcnt_t bit_index)
+{
+ mp_size_t dn, limb_index;
+ mp_limb_t bit;
+ mp_ptr dp;
+
+ dn = GMP_ABS (d->_mp_size);
+
+ limb_index = bit_index / GMP_LIMB_BITS;
+ bit = (mp_limb_t) 1 << (bit_index % GMP_LIMB_BITS);
+
+ if (limb_index >= dn)
+ {
+ mp_size_t i;
+ /* The bit should be set outside of the end of the number.
+ We have to increase the size of the number. */
+ dp = MPZ_REALLOC (d, limb_index + 1);
+
+ dp[limb_index] = bit;
+ for (i = dn; i < limb_index; i++)
+ dp[i] = 0;
+ dn = limb_index + 1;
+ }
+ else
+ {
+ mp_limb_t cy;
+
+ dp = d->_mp_d;
+
+ cy = mpn_add_1 (dp + limb_index, dp + limb_index, dn - limb_index, bit);
+ if (cy > 0)
+ {
+ dp = MPZ_REALLOC (d, dn + 1);
+ dp[dn++] = cy;
+ }
+ }
+
+ d->_mp_size = (d->_mp_size < 0) ? - dn : dn;
+}
+
+static void
+mpz_abs_sub_bit (mpz_t d, mp_bitcnt_t bit_index)
+{
+ mp_size_t dn, limb_index;
+ mp_ptr dp;
+ mp_limb_t bit;
+
+ dn = GMP_ABS (d->_mp_size);
+ dp = d->_mp_d;
+
+ limb_index = bit_index / GMP_LIMB_BITS;
+ bit = (mp_limb_t) 1 << (bit_index % GMP_LIMB_BITS);
+
+ assert (limb_index < dn);
+
+ gmp_assert_nocarry (mpn_sub_1 (dp + limb_index, dp + limb_index,
+ dn - limb_index, bit));
+ dn -= (dp[dn-1] == 0);
+ d->_mp_size = (d->_mp_size < 0) ? - dn : dn;
+}
+
+void
+mpz_setbit (mpz_t d, mp_bitcnt_t bit_index)
+{
+ if (!mpz_tstbit (d, bit_index))
+ {
+ if (d->_mp_size >= 0)
+ mpz_abs_add_bit (d, bit_index);
+ else
+ mpz_abs_sub_bit (d, bit_index);
+ }
+}
+
+void
+mpz_clrbit (mpz_t d, mp_bitcnt_t bit_index)
+{
+ if (mpz_tstbit (d, bit_index))
+ {
+ if (d->_mp_size >= 0)
+ mpz_abs_sub_bit (d, bit_index);
+ else
+ mpz_abs_add_bit (d, bit_index);
+ }
+}
+
+void
+mpz_combit (mpz_t d, mp_bitcnt_t bit_index)
+{
+ if (mpz_tstbit (d, bit_index) ^ (d->_mp_size < 0))
+ mpz_abs_sub_bit (d, bit_index);
+ else
+ mpz_abs_add_bit (d, bit_index);
+}
+
+void
+mpz_com (mpz_t r, const mpz_t u)
+{
+ mpz_neg (r, u);
+ mpz_sub_ui (r, r, 1);
+}
+
+void
+mpz_and (mpz_t r, const mpz_t u, const mpz_t v)
+{
+ mp_size_t un, vn, rn, i;
+ mp_ptr up, vp, rp;
+
+ mp_limb_t ux, vx, rx;
+ mp_limb_t uc, vc, rc;
+ mp_limb_t ul, vl, rl;
+
+ un = GMP_ABS (u->_mp_size);
+ vn = GMP_ABS (v->_mp_size);
+ if (un < vn)
+ {
+ MPZ_SRCPTR_SWAP (u, v);
+ MP_SIZE_T_SWAP (un, vn);
+ }
+ if (vn == 0)
+ {
+ r->_mp_size = 0;
+ return;
+ }
+
+ uc = u->_mp_size < 0;
+ vc = v->_mp_size < 0;
+ rc = uc & vc;
+
+ ux = -uc;
+ vx = -vc;
+ rx = -rc;
+
+ /* If the smaller input is positive, higher limbs don't matter. */
+ rn = vx ? un : vn;
+
+ rp = MPZ_REALLOC (r, rn + rc);
+
+ up = u->_mp_d;
+ vp = v->_mp_d;
+
+ for (i = 0; i < vn; i++)
+ {
+ ul = (up[i] ^ ux) + uc;
+ uc = ul < uc;
+
+ vl = (vp[i] ^ vx) + vc;
+ vc = vl < vc;
+
+ rl = ( (ul & vl) ^ rx) + rc;
+ rc = rl < rc;
+ rp[i] = rl;
+ }
+ assert (vc == 0);
+
+ for (; i < rn; i++)
+ {
+ ul = (up[i] ^ ux) + uc;
+ uc = ul < uc;
+
+ rl = ( (ul & vx) ^ rx) + rc;
+ rc = rl < rc;
+ rp[i] = rl;
+ }
+ if (rc)
+ rp[rn++] = rc;
+ else
+ rn = mpn_normalized_size (rp, rn);
+
+ r->_mp_size = rx ? -rn : rn;
+}
+
+void
+mpz_ior (mpz_t r, const mpz_t u, const mpz_t v)
+{
+ mp_size_t un, vn, rn, i;
+ mp_ptr up, vp, rp;
+
+ mp_limb_t ux, vx, rx;
+ mp_limb_t uc, vc, rc;
+ mp_limb_t ul, vl, rl;
+
+ un = GMP_ABS (u->_mp_size);
+ vn = GMP_ABS (v->_mp_size);
+ if (un < vn)
+ {
+ MPZ_SRCPTR_SWAP (u, v);
+ MP_SIZE_T_SWAP (un, vn);
+ }
+ if (vn == 0)
+ {
+ mpz_set (r, u);
+ return;
+ }
+
+ uc = u->_mp_size < 0;
+ vc = v->_mp_size < 0;
+ rc = uc | vc;
+
+ ux = -uc;
+ vx = -vc;
+ rx = -rc;
+
+ /* If the smaller input is negative, by sign extension higher limbs
+ don't matter. */
+ rn = vx ? vn : un;
+
+ rp = MPZ_REALLOC (r, rn + rc);
+
+ up = u->_mp_d;
+ vp = v->_mp_d;
+
+ for (i = 0; i < vn; i++)
+ {
+ ul = (up[i] ^ ux) + uc;
+ uc = ul < uc;
+
+ vl = (vp[i] ^ vx) + vc;
+ vc = vl < vc;
+
+ rl = ( (ul | vl) ^ rx) + rc;
+ rc = rl < rc;
+ rp[i] = rl;
+ }
+ assert (vc == 0);
+
+ for (; i < rn; i++)
+ {
+ ul = (up[i] ^ ux) + uc;
+ uc = ul < uc;
+
+ rl = ( (ul | vx) ^ rx) + rc;
+ rc = rl < rc;
+ rp[i] = rl;
+ }
+ if (rc)
+ rp[rn++] = rc;
+ else
+ rn = mpn_normalized_size (rp, rn);
+
+ r->_mp_size = rx ? -rn : rn;
+}
+
+void
+mpz_xor (mpz_t r, const mpz_t u, const mpz_t v)
+{
+ mp_size_t un, vn, i;
+ mp_ptr up, vp, rp;
+
+ mp_limb_t ux, vx, rx;
+ mp_limb_t uc, vc, rc;
+ mp_limb_t ul, vl, rl;
+
+ un = GMP_ABS (u->_mp_size);
+ vn = GMP_ABS (v->_mp_size);
+ if (un < vn)
+ {
+ MPZ_SRCPTR_SWAP (u, v);
+ MP_SIZE_T_SWAP (un, vn);
+ }
+ if (vn == 0)
+ {
+ mpz_set (r, u);
+ return;
+ }
+
+ uc = u->_mp_size < 0;
+ vc = v->_mp_size < 0;
+ rc = uc ^ vc;
+
+ ux = -uc;
+ vx = -vc;
+ rx = -rc;
+
+ rp = MPZ_REALLOC (r, un + rc);
+
+ up = u->_mp_d;
+ vp = v->_mp_d;
+
+ for (i = 0; i < vn; i++)
+ {
+ ul = (up[i] ^ ux) + uc;
+ uc = ul < uc;
+
+ vl = (vp[i] ^ vx) + vc;
+ vc = vl < vc;
+
+ rl = (ul ^ vl ^ rx) + rc;
+ rc = rl < rc;
+ rp[i] = rl;
+ }
+ assert (vc == 0);
+
+ for (; i < un; i++)
+ {
+ ul = (up[i] ^ ux) + uc;
+ uc = ul < uc;
+
+ rl = (ul ^ ux) + rc;
+ rc = rl < rc;
+ rp[i] = rl;
+ }
+ if (rc)
+ rp[un++] = rc;
+ else
+ un = mpn_normalized_size (rp, un);
+
+ r->_mp_size = rx ? -un : un;
+}
+
+static unsigned
+gmp_popcount_limb (mp_limb_t x)
+{
+ unsigned c;
+
+ /* Do 16 bits at a time, to avoid limb-sized constants. */
+ for (c = 0; x > 0; x >>= 16)
+ {
+ unsigned w = ((x >> 1) & 0x5555) + (x & 0x5555);
+ w = ((w >> 2) & 0x3333) + (w & 0x3333);
+ w = ((w >> 4) & 0x0f0f) + (w & 0x0f0f);
+ w = (w >> 8) + (w & 0x00ff);
+ c += w;
+ }
+ return c;
+}
+
+mp_bitcnt_t
+mpz_popcount (const mpz_t u)
+{
+ mp_size_t un, i;
+ mp_bitcnt_t c;
+
+ un = u->_mp_size;
+
+ if (un < 0)
+ return ~(mp_bitcnt_t) 0;
+
+ for (c = 0, i = 0; i < un; i++)
+ c += gmp_popcount_limb (u->_mp_d[i]);
+
+ return c;
+}
+
+mp_bitcnt_t
+mpz_hamdist (const mpz_t u, const mpz_t v)
+{
+ mp_size_t un, vn, i;
+ mp_limb_t uc, vc, ul, vl, comp;
+ mp_srcptr up, vp;
+ mp_bitcnt_t c;
+
+ un = u->_mp_size;
+ vn = v->_mp_size;
+
+ if ( (un ^ vn) < 0)
+ return ~(mp_bitcnt_t) 0;
+
+ if (un < 0)
+ {
+ assert (vn < 0);
+ un = -un;
+ vn = -vn;
+ uc = vc = 1;
+ comp = - (mp_limb_t) 1;
+ }
+ else
+ uc = vc = comp = 0;
+
+ up = u->_mp_d;
+ vp = v->_mp_d;
+
+ if (un < vn)
+ MPN_SRCPTR_SWAP (up, un, vp, vn);
+
+ for (i = 0, c = 0; i < vn; i++)
+ {
+ ul = (up[i] ^ comp) + uc;
+ uc = ul < uc;
+
+ vl = (vp[i] ^ comp) + vc;
+ vc = vl < vc;
+
+ c += gmp_popcount_limb (ul ^ vl);
+ }
+ assert (vc == 0);
+
+ for (; i < un; i++)
+ {
+ ul = (up[i] ^ comp) + uc;
+ uc = ul < uc;
+
+ c += gmp_popcount_limb (ul ^ comp);
+ }
+
+ return c;
+}
+
+mp_bitcnt_t
+mpz_scan1 (const mpz_t u, mp_bitcnt_t starting_bit)
+{
+ mp_ptr up;
+ mp_size_t us, un, i;
+ mp_limb_t limb, ux, uc;
+ unsigned cnt;
+
+ up = u->_mp_d;
+ us = u->_mp_size;
+ un = GMP_ABS (us);
+ i = starting_bit / GMP_LIMB_BITS;
+
+ /* Past the end there's no 1 bits for u>=0, or an immediate 1 bit
+ for u<0. Notice this test picks up any u==0 too. */
+ if (i >= un)
+ return (us >= 0 ? ~(mp_bitcnt_t) 0 : starting_bit);
+
+ if (us < 0)
+ {
+ ux = GMP_LIMB_MAX;
+ uc = mpn_zero_p (up, i);
+ }
+ else
+ ux = uc = 0;
+
+ limb = (ux ^ up[i]) + uc;
+ uc = limb < uc;
+
+ /* Mask to 0 all bits before starting_bit, thus ignoring them. */
+ limb &= (GMP_LIMB_MAX << (starting_bit % GMP_LIMB_BITS));
+
+ while (limb == 0)
+ {
+ i++;
+ if (i == un)
+ {
+ assert (uc == 0);
+ /* For the u > 0 case, this can happen only for the first
+ masked limb. For the u < 0 case, it happens when the
+ highest limbs of the absolute value are all ones. */
+ return (us >= 0 ? ~(mp_bitcnt_t) 0 : un * GMP_LIMB_BITS);
+ }
+ limb = (ux ^ up[i]) + uc;
+ uc = limb < uc;
+ }
+ gmp_ctz (cnt, limb);
+ return (mp_bitcnt_t) i * GMP_LIMB_BITS + cnt;
+}
+
+mp_bitcnt_t
+mpz_scan0 (const mpz_t u, mp_bitcnt_t starting_bit)
+{
+ mp_ptr up;
+ mp_size_t us, un, i;
+ mp_limb_t limb, ux, uc;
+ unsigned cnt;
+
+ up = u->_mp_d;
+ us = u->_mp_size;
+ un = GMP_ABS (us);
+ i = starting_bit / GMP_LIMB_BITS;
+
+ /* When past end, there's an immediate 0 bit for u>=0, or no 0 bits for
+ u<0. Notice this test picks up all cases of u==0 too. */
+ if (i >= un)
+ return (us >= 0 ? starting_bit : ~(mp_bitcnt_t) 0);
+
+ if (us < 0)
+ {
+ ux = GMP_LIMB_MAX;
+ uc = mpn_zero_p (up, i);
+ }
+ else
+ ux = uc = 0;
+
+ limb = (ux ^ up[i]) + uc;
+ uc = limb < uc;
+
+ /* Mask to 1 all bits before starting_bit, thus ignoring them. */
+ limb |= ((mp_limb_t) 1 << (starting_bit % GMP_LIMB_BITS)) - 1;
+
+ while (limb == GMP_LIMB_MAX)
+ {
+ i++;
+ if (i == un)
+ {
+ assert (uc == 0);
+ return (us >= 0 ? un * GMP_LIMB_BITS : ~(mp_bitcnt_t) 0);
+ }
+ limb = (ux ^ up[i]) + uc;
+ uc = limb < uc;
+ }
+ gmp_ctz (cnt, ~limb);
+ return (mp_bitcnt_t) i * GMP_LIMB_BITS + cnt;
+}
+
+\f
+/* MPZ base conversion. */
+
+size_t
+mpz_sizeinbase (const mpz_t u, int base)
+{
+ mp_size_t un;
+ mp_srcptr up;
+ mp_ptr tp;
+ mp_bitcnt_t bits;
+ struct gmp_div_inverse bi;
+ size_t ndigits;
+
+ assert (base >= 2);
+ assert (base <= 36);
+
+ un = GMP_ABS (u->_mp_size);
+ if (un == 0)
+ return 1;
+
+ up = u->_mp_d;
+
+ bits = (un - 1) * GMP_LIMB_BITS + mpn_limb_size_in_base_2 (up[un-1]);
+ switch (base)
+ {
+ case 2:
+ return bits;
+ case 4:
+ return (bits + 1) / 2;
+ case 8:
+ return (bits + 2) / 3;
+ case 16:
+ return (bits + 3) / 4;
+ case 32:
+ return (bits + 4) / 5;
+ /* FIXME: Do something more clever for the common case of base
+ 10. */
+ }
+
+ tp = gmp_xalloc_limbs (un);
+ mpn_copyi (tp, up, un);
+ mpn_div_qr_1_invert (&bi, base);
+
+ for (ndigits = 0; un > 0; ndigits++)
+ {
+ mpn_div_qr_1_preinv (tp, tp, un, &bi);
+ un -= (tp[un-1] == 0);
+ }
+ gmp_free (tp);
+ return ndigits;
+}
+
+char *
+mpz_get_str (char *sp, int base, const mpz_t u)
+{
+ unsigned bits;
+ const char *digits;
+ mp_size_t un;
+ size_t i, sn;
+
+ if (base >= 0)
+ {
+ digits = "0123456789abcdefghijklmnopqrstuvwxyz";
+ }
+ else
+ {
+ base = -base;
+ digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+ }
+ if (base <= 1)
+ base = 10;
+ if (base > 36)
+ return NULL;
+
+ sn = 1 + mpz_sizeinbase (u, base);
+ if (!sp)
+ sp = gmp_xalloc (1 + sn);
+
+ un = GMP_ABS (u->_mp_size);
+
+ if (un == 0)
+ {
+ sp[0] = '0';
+ sp[1] = '\0';
+ return sp;
+ }
+
+ i = 0;
+
+ if (u->_mp_size < 0)
+ sp[i++] = '-';
+
+ bits = mpn_base_power_of_two_p (base);
+
+ if (bits)
+ /* Not modified in this case. */
+ sn = i + mpn_get_str_bits ((unsigned char *) sp + i, bits, u->_mp_d, un);
+ else
+ {
+ struct mpn_base_info info;
+ mp_ptr tp;
+
+ mpn_get_base_info (&info, base);
+ tp = gmp_xalloc_limbs (un);
+ mpn_copyi (tp, u->_mp_d, un);
+
+ sn = i + mpn_get_str_other ((unsigned char *) sp + i, base, &info, tp, un);
+ gmp_free (tp);
+ }
+
+ for (; i < sn; i++)
+ sp[i] = digits[(unsigned char) sp[i]];
+
+ sp[sn] = '\0';
+ return sp;
+}
+
+int
+mpz_set_str (mpz_t r, const char *sp, int base)
+{
+ unsigned bits;
+ mp_size_t rn, alloc;
+ mp_ptr rp;
+ size_t sn;
+ size_t dn;
+ int sign;
+ unsigned char *dp;
+
+ assert (base == 0 || (base >= 2 && base <= 36));
+
+ while (isspace( (unsigned char) *sp))
+ sp++;
+
+ if (*sp == '-')
+ {
+ sign = 1;
+ sp++;
+ }
+ else
+ sign = 0;
+
+ if (base == 0)
+ {
+ if (*sp == '0')
+ {
+ sp++;
+ if (*sp == 'x' || *sp == 'X')
+ {
+ base = 16;
+ sp++;
+ }
+ else if (*sp == 'b' || *sp == 'B')
+ {
+ base = 2;
+ sp++;
+ }
+ else
+ base = 8;
+ }
+ else
+ base = 10;
+ }
+
+ sn = strlen (sp);
+ dp = gmp_xalloc (sn + (sn == 0));
+
+ for (dn = 0; *sp; sp++)
+ {
+ unsigned digit;
+
+ if (isspace ((unsigned char) *sp))
+ continue;
+ if (*sp >= '0' && *sp <= '9')
+ digit = *sp - '0';
+ else if (*sp >= 'a' && *sp <= 'z')
+ digit = *sp - 'a' + 10;
+ else if (*sp >= 'A' && *sp <= 'Z')
+ digit = *sp - 'A' + 10;
+ else
+ digit = base; /* fail */
+
+ if (digit >= base)
+ {
+ gmp_free (dp);
+ r->_mp_size = 0;
+ return -1;
+ }
+
+ dp[dn++] = digit;
+ }
+
+ bits = mpn_base_power_of_two_p (base);
+
+ if (bits > 0)
+ {
+ alloc = (sn * bits + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS;
+ rp = MPZ_REALLOC (r, alloc);
+ rn = mpn_set_str_bits (rp, dp, dn, bits);
+ }
+ else
+ {
+ struct mpn_base_info info;
+ mpn_get_base_info (&info, base);
+ alloc = (sn + info.exp - 1) / info.exp;
+ rp = MPZ_REALLOC (r, alloc);
+ rn = mpn_set_str_other (rp, dp, dn, base, &info);
+ }
+ assert (rn <= alloc);
+ gmp_free (dp);
+
+ r->_mp_size = sign ? - rn : rn;
+
+ return 0;
+}
+
+int
+mpz_init_set_str (mpz_t r, const char *sp, int base)
+{
+ mpz_init (r);
+ return mpz_set_str (r, sp, base);
+}
+
+size_t
+mpz_out_str (FILE *stream, int base, const mpz_t x)
+{
+ char *str;
+ size_t len;
+
+ str = mpz_get_str (NULL, base, x);
+ len = strlen (str);
+ len = fwrite (str, 1, len, stream);
+ gmp_free (str);
+ return len;
+}
+
+\f
+static int
+gmp_detect_endian (void)
+{
+ static const int i = 1;
+ const unsigned char *p = (const unsigned char *) &i;
+ if (*p == 1)
+ /* Little endian */
+ return -1;
+ else
+ /* Big endian */
+ return 1;
+}
+
+/* Import and export. Does not support nails. */
+void
+mpz_import (mpz_t r, size_t count, int order, size_t size, int endian,
+ size_t nails, const void *src)
+{
+ const unsigned char *p;
+ ptrdiff_t word_step;
+ mp_ptr rp;
+ mp_size_t rn;
+
+ /* The current (partial) limb. */
+ mp_limb_t limb;
+ /* The number of bytes already copied to this limb (starting from
+ the low end). */
+ size_t bytes;
+ /* The index where the limb should be stored, when completed. */
+ mp_size_t i;
+
+ if (nails != 0)
+ gmp_die ("mpz_import: Nails not supported.");
+
+ assert (order == 1 || order == -1);
+ assert (endian >= -1 && endian <= 1);
+
+ if (endian == 0)
+ endian = gmp_detect_endian ();
+
+ p = (unsigned char *) src;
+
+ word_step = (order != endian) ? 2 * size : 0;
+
+ /* Process bytes from the least significant end, so point p at the
+ least significant word. */
+ if (order == 1)
+ {
+ p += size * (count - 1);
+ word_step = - word_step;
+ }
+
+ /* And at least significant byte of that word. */
+ if (endian == 1)
+ p += (size - 1);
+
+ rn = (size * count + sizeof(mp_limb_t) - 1) / sizeof(mp_limb_t);
+ rp = MPZ_REALLOC (r, rn);
+
+ for (limb = 0, bytes = 0, i = 0; count > 0; count--, p += word_step)
+ {
+ size_t j;
+ for (j = 0; j < size; j++, p -= (ptrdiff_t) endian)
+ {
+ limb |= (mp_limb_t) *p << (bytes++ * CHAR_BIT);
+ if (bytes == sizeof(mp_limb_t))
+ {
+ rp[i++] = limb;
+ bytes = 0;
+ limb = 0;
+ }
+ }
+ }
+ if (bytes > 0)
+ rp[i++] = limb;
+ assert (i == rn);
+
+ r->_mp_size = mpn_normalized_size (rp, i);
+}
+
+void *
+mpz_export (void *r, size_t *countp, int order, size_t size, int endian,
+ size_t nails, const mpz_t u)
+{
+ unsigned char *p;
+ ptrdiff_t word_step;
+ size_t count, k;
+ mp_size_t un;
+
+ /* The current (partial) limb. */
+ mp_limb_t limb;
+ /* The number of bytes left to to in this limb. */
+ size_t bytes;
+ /* The index where the limb was read. */
+ mp_size_t i;
+
+ if (nails != 0)
+ gmp_die ("mpz_import: Nails not supported.");
+
+ assert (order == 1 || order == -1);
+ assert (endian >= -1 && endian <= 1);
+ assert (size > 0 || u->_mp_size == 0);
+
+ un = GMP_ABS (u->_mp_size);
+ if (un == 0)
+ {
+ if (countp)
+ *countp = 0;
+ return r;
+ }
+
+ /* Count bytes in top limb. */
+ for (limb = u->_mp_d[un-1], k = 0; limb > 0; k++, limb >>= CHAR_BIT)
+ ;
+
+ assert (k > 0);
+
+ count = (k + (un-1) * sizeof (mp_limb_t) + size - 1) / size;
+
+ if (!r)
+ r = gmp_xalloc (count * size);
+
+ if (endian == 0)
+ endian = gmp_detect_endian ();
+
+ p = (unsigned char *) r;
+
+ word_step = (order != endian) ? 2 * size : 0;
+
+ /* Process bytes from the least significant end, so point p at the
+ least significant word. */
+ if (order == 1)
+ {
+ p += size * (count - 1);
+ word_step = - word_step;
+ }
+
+ /* And at least significant byte of that word. */
+ if (endian == 1)
+ p += (size - 1);
+
+ for (bytes = 0, i = 0, k = 0; k < count; k++, p += word_step)
+ {
+ size_t j;
+ for (j = 0; j < size; j++, p -= (ptrdiff_t) endian)
+ {
+ if (bytes == 0)
+ {
+ if (i < un)
+ limb = u->_mp_d[i++];
+ bytes = sizeof (mp_limb_t);
+ }
+ *p = limb;
+ limb >>= CHAR_BIT;
+ bytes--;
+ }
+ }
+ assert (i == un);
+ assert (k == count);
+
+ if (countp)
+ *countp = count;
+
+ return r;
+}
--- /dev/null
+/* mini-gmp, a minimalistic implementation of a GNU GMP subset.
+
+Copyright 2011, 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+/* About mini-gmp: This is a minimal implementation of a subset of the
+ GMP interface. It is intended for inclusion into applications which
+ have modest bignums needs, as a fallback when the real GMP library
+ is not installed.
+
+ This file defines the public interface. */
+
+#ifndef __MINI_GMP_H__
+#define __MINI_GMP_H__
+
+/* For size_t */
+#include <stddef.h>
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+void mp_set_memory_functions (void *(*) (size_t),
+ void *(*) (void *, size_t, size_t),
+ void (*) (void *, size_t));
+
+void mp_get_memory_functions (void *(**) (size_t),
+ void *(**) (void *, size_t, size_t),
+ void (**) (void *, size_t));
+
+typedef unsigned long mp_limb_t;
+typedef long mp_size_t;
+typedef unsigned long mp_bitcnt_t;
+
+typedef mp_limb_t *mp_ptr;
+typedef const mp_limb_t *mp_srcptr;
+
+typedef struct
+{
+ int _mp_alloc; /* Number of *limbs* allocated and pointed
+ to by the _mp_d field. */
+ int _mp_size; /* abs(_mp_size) is the number of limbs the
+ last field points to. If _mp_size is
+ negative this is a negative number. */
+ mp_limb_t *_mp_d; /* Pointer to the limbs. */
+} __mpz_struct;
+
+typedef __mpz_struct mpz_t[1];
+
+typedef __mpz_struct *mpz_ptr;
+typedef const __mpz_struct *mpz_srcptr;
+
+void mpn_copyi (mp_ptr, mp_srcptr, mp_size_t);
+void mpn_copyd (mp_ptr, mp_srcptr, mp_size_t);
+
+int mpn_cmp (mp_srcptr, mp_srcptr, mp_size_t);
+
+mp_limb_t mpn_add_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_add_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t mpn_add (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+
+mp_limb_t mpn_sub_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_sub_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t mpn_sub (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+
+mp_limb_t mpn_mul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_addmul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_submul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+mp_limb_t mpn_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+void mpn_mul_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void mpn_sqr (mp_ptr, mp_srcptr, mp_size_t);
+
+mp_limb_t mpn_lshift (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
+mp_limb_t mpn_rshift (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
+
+mp_limb_t mpn_invert_3by2 (mp_limb_t, mp_limb_t);
+#define mpn_invert_limb(x) mpn_invert_3by2 ((x), 0)
+
+size_t mpn_get_str (unsigned char *, int, mp_ptr, mp_size_t);
+mp_size_t mpn_set_str (mp_ptr, const unsigned char *, size_t, int);
+
+void mpz_init (mpz_t);
+void mpz_init2 (mpz_t, mp_bitcnt_t);
+void mpz_clear (mpz_t);
+
+#define mpz_odd_p(z) (((z)->_mp_size != 0) & (int) (z)->_mp_d[0])
+#define mpz_even_p(z) (! mpz_odd_p (z))
+
+int mpz_sgn (const mpz_t);
+int mpz_cmp_si (const mpz_t, long);
+int mpz_cmp_ui (const mpz_t, unsigned long);
+int mpz_cmp (const mpz_t, const mpz_t);
+int mpz_cmpabs_ui (const mpz_t, unsigned long);
+int mpz_cmpabs (const mpz_t, const mpz_t);
+int mpz_cmp_d (const mpz_t, double);
+int mpz_cmpabs_d (const mpz_t, double);
+
+void mpz_abs (mpz_t, const mpz_t);
+void mpz_neg (mpz_t, const mpz_t);
+void mpz_swap (mpz_t, mpz_t);
+
+void mpz_add_ui (mpz_t, const mpz_t, unsigned long);
+void mpz_add (mpz_t, const mpz_t, const mpz_t);
+void mpz_sub_ui (mpz_t, const mpz_t, unsigned long);
+void mpz_ui_sub (mpz_t, unsigned long, const mpz_t);
+void mpz_sub (mpz_t, const mpz_t, const mpz_t);
+
+void mpz_mul_si (mpz_t, const mpz_t, long int);
+void mpz_mul_ui (mpz_t, const mpz_t, unsigned long int);
+void mpz_mul (mpz_t, const mpz_t, const mpz_t);
+void mpz_mul_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+
+void mpz_cdiv_qr (mpz_t, mpz_t, const mpz_t, const mpz_t);
+void mpz_fdiv_qr (mpz_t, mpz_t, const mpz_t, const mpz_t);
+void mpz_tdiv_qr (mpz_t, mpz_t, const mpz_t, const mpz_t);
+void mpz_cdiv_q (mpz_t, const mpz_t, const mpz_t);
+void mpz_fdiv_q (mpz_t, const mpz_t, const mpz_t);
+void mpz_tdiv_q (mpz_t, const mpz_t, const mpz_t);
+void mpz_cdiv_r (mpz_t, const mpz_t, const mpz_t);
+void mpz_fdiv_r (mpz_t, const mpz_t, const mpz_t);
+void mpz_tdiv_r (mpz_t, const mpz_t, const mpz_t);
+
+void mpz_cdiv_q_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+void mpz_fdiv_q_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+void mpz_tdiv_q_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+void mpz_cdiv_r_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+void mpz_fdiv_r_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+void mpz_tdiv_r_2exp (mpz_t, const mpz_t, mp_bitcnt_t);
+
+void mpz_mod (mpz_t, const mpz_t, const mpz_t);
+
+void mpz_divexact (mpz_t, const mpz_t, const mpz_t);
+
+int mpz_divisible_p (const mpz_t, const mpz_t);
+
+unsigned long mpz_cdiv_qr_ui (mpz_t, mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_fdiv_qr_ui (mpz_t, mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_tdiv_qr_ui (mpz_t, mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_cdiv_q_ui (mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_fdiv_q_ui (mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_tdiv_q_ui (mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_cdiv_r_ui (mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_fdiv_r_ui (mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_tdiv_r_ui (mpz_t, const mpz_t, unsigned long);
+unsigned long mpz_cdiv_ui (const mpz_t, unsigned long);
+unsigned long mpz_fdiv_ui (const mpz_t, unsigned long);
+unsigned long mpz_tdiv_ui (const mpz_t, unsigned long);
+
+unsigned long mpz_mod_ui (mpz_t, const mpz_t, unsigned long);
+
+void mpz_divexact_ui (mpz_t, const mpz_t, unsigned long);
+
+int mpz_divisible_ui_p (const mpz_t, unsigned long);
+
+unsigned long mpz_gcd_ui (mpz_t, const mpz_t, unsigned long);
+void mpz_gcd (mpz_t, const mpz_t, const mpz_t);
+void mpz_gcdext (mpz_t, mpz_t, mpz_t, const mpz_t, const mpz_t);
+void mpz_lcm_ui (mpz_t, const mpz_t, unsigned long);
+void mpz_lcm (mpz_t, const mpz_t, const mpz_t);
+int mpz_invert (mpz_t, const mpz_t, const mpz_t);
+
+void mpz_sqrtrem (mpz_t, mpz_t, const mpz_t);
+void mpz_sqrt (mpz_t, const mpz_t);
+
+void mpz_pow_ui (mpz_t, const mpz_t, unsigned long);
+void mpz_ui_pow_ui (mpz_t, unsigned long, unsigned long);
+void mpz_powm (mpz_t, const mpz_t, const mpz_t, const mpz_t);
+void mpz_powm_ui (mpz_t, const mpz_t, unsigned long, const mpz_t);
+
+void mpz_rootrem (mpz_t, mpz_t, const mpz_t, unsigned long);
+int mpz_root (mpz_t, const mpz_t, unsigned long);
+
+void mpz_fac_ui (mpz_t, unsigned long);
+void mpz_bin_uiui (mpz_t, unsigned long, unsigned long);
+
+int mpz_tstbit (const mpz_t, mp_bitcnt_t);
+void mpz_setbit (mpz_t, mp_bitcnt_t);
+void mpz_clrbit (mpz_t, mp_bitcnt_t);
+void mpz_combit (mpz_t, mp_bitcnt_t);
+
+void mpz_com (mpz_t, const mpz_t);
+void mpz_and (mpz_t, const mpz_t, const mpz_t);
+void mpz_ior (mpz_t, const mpz_t, const mpz_t);
+void mpz_xor (mpz_t, const mpz_t, const mpz_t);
+
+mp_bitcnt_t mpz_popcount (const mpz_t);
+mp_bitcnt_t mpz_hamdist (const mpz_t, const mpz_t);
+mp_bitcnt_t mpz_scan0 (const mpz_t, mp_bitcnt_t);
+mp_bitcnt_t mpz_scan1 (const mpz_t, mp_bitcnt_t);
+
+int mpz_fits_slong_p (const mpz_t);
+int mpz_fits_ulong_p (const mpz_t);
+long int mpz_get_si (const mpz_t);
+unsigned long int mpz_get_ui (const mpz_t);
+double mpz_get_d (const mpz_t);
+size_t mpz_size (const mpz_t);
+mp_limb_t mpz_getlimbn (const mpz_t, mp_size_t);
+
+void mpz_set_si (mpz_t, signed long int);
+void mpz_set_ui (mpz_t, unsigned long int);
+void mpz_set (mpz_t, const mpz_t);
+void mpz_set_d (mpz_t, double);
+
+void mpz_init_set_si (mpz_t, signed long int);
+void mpz_init_set_ui (mpz_t, unsigned long int);
+void mpz_init_set (mpz_t, const mpz_t);
+void mpz_init_set_d (mpz_t, double);
+
+size_t mpz_sizeinbase (const mpz_t, int);
+char *mpz_get_str (char *, int, const mpz_t);
+int mpz_set_str (mpz_t, const char *, int);
+int mpz_init_set_str (mpz_t, const char *, int);
+
+/* This long list taken from gmp.h. */
+/* For reference, "defined(EOF)" cannot be used here. In g++ 2.95.4,
+ <iostream> defines EOF but not FILE. */
+#if defined (FILE) \
+ || defined (H_STDIO) \
+ || defined (_H_STDIO) /* AIX */ \
+ || defined (_STDIO_H) /* glibc, Sun, SCO */ \
+ || defined (_STDIO_H_) /* BSD, OSF */ \
+ || defined (__STDIO_H) /* Borland */ \
+ || defined (__STDIO_H__) /* IRIX */ \
+ || defined (_STDIO_INCLUDED) /* HPUX */ \
+ || defined (__dj_include_stdio_h_) /* DJGPP */ \
+ || defined (_FILE_DEFINED) /* Microsoft */ \
+ || defined (__STDIO__) /* Apple MPW MrC */ \
+ || defined (_MSL_STDIO_H) /* Metrowerks */ \
+ || defined (_STDIO_H_INCLUDED) /* QNX4 */ \
+ || defined (_ISO_STDIO_ISO_H) /* Sun C++ */ \
+ || defined (__STDIO_LOADED) /* VMS */
+size_t mpz_out_str (FILE *, int, const mpz_t);
+#endif
+
+void mpz_import (mpz_t, size_t, int, size_t, int, size_t, const void *);
+void *mpz_export (void *, size_t *, int, size_t, int, size_t, const mpz_t);
+
+#if defined (__cplusplus)
+}
+#endif
+#endif /* __MINI_GMP_H__ */
--- /dev/null
+# Note: Requires GNU make
+
+# Copyright 2011, 2012 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library test suite.
+#
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
+#
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/.
+
+srcdir=.
+MINI_GMP_DIR=..
+
+CC = gcc
+EXTRA_CFLAGS = -O -Wall -g
+CFLAGS = $(EXTRA_CFLAGS) -I$(MINI_GMP_DIR)
+LDFLAGS =
+
+LIBS = -lgmp -lm -lmcheck
+
+CHECK_PROGRAMS = t-add t-sub t-mul t-invert t-div t-div_2exp \
+ t-double t-cmp_d t-gcd t-lcm t-import t-comb t-signed \
+ t-sqrt t-root t-powm t-logops t-bitops t-scan t-str \
+ t-reuse
+
+MISC_OBJS = hex-random.o mini-random.o testutils.o
+
+all:
+
+clean:
+ rm -f *.o $(CHECK_PROGRAMS)
+
+%: %.c
+.c:
+
+# Keep object files
+.PRECIOUS: %.o
+
+%.o: %.c $(MINI_GMP_DIR)/mini-gmp.h hex-random.h mini-random.h
+ $(CC) $(CFLAGS) -c $< -o $@
+
+testutils.o: $(MINI_GMP_DIR)/mini-gmp.c
+
+%: %.o $(MISC_OBJS)
+ $(CC) $(LDFLAGS) $^ $(LIBS) -o $@
+
+# Missing tests include:
+# mpz_cmp_d, mpz_popcount, mpz_hamdist, mpz_ui_pow_ui
+
+check: $(CHECK_PROGRAMS)
+ $(srcdir)/run-tests $(CHECK_PROGRAMS)
--- /dev/null
+/*
+
+Copyright 2011, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <time.h>
+#include <unistd.h>
+
+#include "gmp.h"
+
+#include "hex-random.h"
+
+static gmp_randstate_t state;
+
+void
+hex_random_init (void)
+{
+ unsigned long seed;
+ char *env_seed;
+
+ env_seed = getenv("GMP_CHECK_RANDOMIZE");
+ if (env_seed && env_seed[0])
+ {
+ seed = strtoul (env_seed, NULL, 0);
+ if (seed)
+ printf ("Re-seeding with GMP_CHECK_RANDOMIZE=%lu\n", seed);
+ else
+ {
+ seed = time(NULL) + getpid();
+ printf ("Seed GMP_CHECK_RANDOMIZE=%lu (include this in bug reports)\n", seed);
+ }
+ }
+ else
+ seed = 4711;
+
+ gmp_randinit_default (state);
+ gmp_randseed_ui (state, seed);
+}
+
+char *
+hex_urandomb (unsigned long bits)
+{
+ char *res;
+ mpz_t x;
+
+ mpz_init (x);
+ mpz_urandomb (x, state, bits);
+ gmp_asprintf (&res, "%Zx", x);
+ mpz_clear (x);
+ return res;
+}
+
+char *
+hex_rrandomb (unsigned long bits)
+{
+ char *res;
+ mpz_t x;
+
+ mpz_init (x);
+ mpz_rrandomb (x, state, bits);
+ gmp_asprintf (&res, "%Zx", x);
+ mpz_clear (x);
+ return res;
+}
+
+char *
+hex_rrandomb_export (void *dst, size_t *countp,
+ int order, size_t size, int endian, unsigned long bits)
+{
+ char *res;
+ mpz_t x;
+ mpz_init (x);
+ mpz_rrandomb (x, state, bits);
+ gmp_asprintf (&res, "%Zx", x);
+ mpz_export (dst, countp, order, size, endian, 0, x);
+ mpz_clear (x);
+ return res;
+}
+
+void hex_random_op2 (enum hex_random_op op, unsigned long maxbits,
+ char **ap, char **rp)
+{
+ mpz_t a, r;
+ unsigned long abits;
+ unsigned signs;
+
+ mpz_init (a);
+ mpz_init (r);
+
+ abits = gmp_urandomb_ui (state, 32) % maxbits;
+
+ mpz_rrandomb (a, state, abits);
+
+ signs = gmp_urandomb_ui (state, 1);
+ if (signs & 1)
+ mpz_neg (a, a);
+
+ switch (op)
+ {
+ default:
+ abort ();
+ case OP_SQR:
+ mpz_mul (r, a, a);
+ break;
+ }
+
+ gmp_asprintf (ap, "%Zx", a);
+ gmp_asprintf (rp, "%Zx", r);
+
+ mpz_clear (a);
+ mpz_clear (r);
+}
+
+void
+hex_random_op3 (enum hex_random_op op, unsigned long maxbits,
+ char **ap, char **bp, char **rp)
+{
+ mpz_t a, b, r;
+ unsigned long abits, bbits;
+ unsigned signs;
+
+ mpz_init (a);
+ mpz_init (b);
+ mpz_init (r);
+
+ abits = gmp_urandomb_ui (state, 32) % maxbits;
+ bbits = gmp_urandomb_ui (state, 32) % maxbits;
+
+ mpz_rrandomb (a, state, abits);
+ mpz_rrandomb (b, state, bbits);
+
+ signs = gmp_urandomb_ui (state, 3);
+ if (signs & 1)
+ mpz_neg (a, a);
+ if (signs & 2)
+ mpz_neg (b, b);
+
+ switch (op)
+ {
+ default:
+ abort ();
+ case OP_ADD:
+ mpz_add (r, a, b);
+ break;
+ case OP_SUB:
+ mpz_sub (r, a, b);
+ break;
+ case OP_MUL:
+ mpz_mul (r, a, b);
+ break;
+ case OP_GCD:
+ if (signs & 4)
+ {
+ /* Produce a large gcd */
+ unsigned long gbits = gmp_urandomb_ui (state, 32) % maxbits;
+ mpz_rrandomb (r, state, gbits);
+ mpz_mul (a, a, r);
+ mpz_mul (b, b, r);
+ }
+ mpz_gcd (r, a, b);
+ break;
+ case OP_LCM:
+ if (signs & 4)
+ {
+ /* Produce a large gcd */
+ unsigned long gbits = gmp_urandomb_ui (state, 32) % maxbits;
+ mpz_rrandomb (r, state, gbits);
+ mpz_mul (a, a, r);
+ mpz_mul (b, b, r);
+ }
+ mpz_lcm (r, a, b);
+ break;
+ case OP_AND:
+ mpz_and (r, a, b);
+ break;
+ case OP_IOR:
+ mpz_ior (r, a, b);
+ break;
+ case OP_XOR:
+ mpz_xor (r, a, b);
+ break;
+ }
+
+ gmp_asprintf (ap, "%Zx", a);
+ gmp_asprintf (bp, "%Zx", b);
+ gmp_asprintf (rp, "%Zx", r);
+
+ mpz_clear (a);
+ mpz_clear (b);
+ mpz_clear (r);
+}
+
+void
+hex_random_op4 (enum hex_random_op op, unsigned long maxbits,
+ char **ap, char **bp, char **cp, char **dp)
+{
+ mpz_t a, b, c, d;
+ unsigned long abits, bbits;
+ unsigned signs;
+
+ mpz_init (a);
+ mpz_init (b);
+ mpz_init (c);
+ mpz_init (d);
+
+ if (op == OP_POWM)
+ {
+ unsigned long cbits;
+ abits = gmp_urandomb_ui (state, 32) % maxbits;
+ bbits = 1 + gmp_urandomb_ui (state, 32) % maxbits;
+ cbits = 2 + gmp_urandomb_ui (state, 32) % maxbits;
+
+ mpz_rrandomb (a, state, abits);
+ mpz_rrandomb (b, state, bbits);
+ mpz_rrandomb (c, state, cbits);
+
+ signs = gmp_urandomb_ui (state, 3);
+ if (signs & 1)
+ mpz_neg (a, a);
+ if (signs & 2)
+ {
+ mpz_t g;
+
+ /* If we negate the exponent, must make sure that gcd(a, c) = 1 */
+ if (mpz_sgn (a) == 0)
+ mpz_set_ui (a, 1);
+ else
+ {
+ mpz_init (g);
+
+ for (;;)
+ {
+ mpz_gcd (g, a, c);
+ if (mpz_cmp_ui (g, 1) == 0)
+ break;
+ mpz_divexact (a, a, g);
+ }
+ mpz_clear (g);
+ }
+ mpz_neg (b, b);
+ }
+ if (signs & 4)
+ mpz_neg (c, c);
+
+ mpz_powm (d, a, b, c);
+ }
+ else
+ {
+ unsigned long qbits;
+ bbits = 1 + gmp_urandomb_ui (state, 32) % maxbits;
+ qbits = gmp_urandomb_ui (state, 32) % maxbits;
+ abits = bbits + qbits;
+ if (abits > 30)
+ abits -= 30;
+ else
+ abits = 0;
+
+ mpz_rrandomb (a, state, abits);
+ mpz_rrandomb (b, state, bbits);
+
+ signs = gmp_urandomb_ui (state, 2);
+ if (signs & 1)
+ mpz_neg (a, a);
+ if (signs & 2)
+ mpz_neg (b, b);
+
+ switch (op)
+ {
+ default:
+ abort ();
+ case OP_CDIV:
+ mpz_cdiv_qr (c, d, a, b);
+ break;
+ case OP_FDIV:
+ mpz_fdiv_qr (c, d, a, b);
+ break;
+ case OP_TDIV:
+ mpz_tdiv_qr (c, d, a, b);
+ break;
+ }
+ }
+ gmp_asprintf (ap, "%Zx", a);
+ gmp_asprintf (bp, "%Zx", b);
+ gmp_asprintf (cp, "%Zx", c);
+ gmp_asprintf (dp, "%Zx", d);
+
+ mpz_clear (a);
+ mpz_clear (b);
+ mpz_clear (c);
+ mpz_clear (d);
+}
+
+void
+hex_random_bit_op (enum hex_random_op op, unsigned long maxbits,
+ char **ap, unsigned long *b, char **rp)
+{
+ mpz_t a, r;
+ unsigned long abits, bbits;
+ unsigned signs;
+
+ mpz_init (a);
+ mpz_init (r);
+
+ abits = gmp_urandomb_ui (state, 32) % maxbits;
+ bbits = gmp_urandomb_ui (state, 32) % (maxbits + 100);
+
+ mpz_rrandomb (a, state, abits);
+
+ signs = gmp_urandomb_ui (state, 1);
+ if (signs & 1)
+ mpz_neg (a, a);
+
+ switch (op)
+ {
+ default:
+ abort ();
+
+ case OP_SETBIT:
+ mpz_set (r, a);
+ mpz_setbit (r, bbits);
+ break;
+ case OP_CLRBIT:
+ mpz_set (r, a);
+ mpz_clrbit (r, bbits);
+ break;
+ case OP_COMBIT:
+ mpz_set (r, a);
+ mpz_combit (r, bbits);
+ break;
+ case OP_CDIV_Q_2:
+ mpz_cdiv_q_2exp (r, a, bbits);
+ break;
+ case OP_CDIV_R_2:
+ mpz_cdiv_r_2exp (r, a, bbits);
+ break;
+ case OP_FDIV_Q_2:
+ mpz_fdiv_q_2exp (r, a, bbits);
+ break;
+ case OP_FDIV_R_2:
+ mpz_fdiv_r_2exp (r, a, bbits);
+ break;
+ case OP_TDIV_Q_2:
+ mpz_tdiv_q_2exp (r, a, bbits);
+ break;
+ case OP_TDIV_R_2:
+ mpz_tdiv_r_2exp (r, a, bbits);
+ break;
+ }
+
+ gmp_asprintf (ap, "%Zx", a);
+ *b = bbits;
+ gmp_asprintf (rp, "%Zx", r);
+
+ mpz_clear (a);
+ mpz_clear (r);
+}
+
+void
+hex_random_scan_op (enum hex_random_op op, unsigned long maxbits,
+ char **ap, unsigned long *b, unsigned long *r)
+{
+ mpz_t a;
+ unsigned long abits, bbits;
+ unsigned signs;
+
+ mpz_init (a);
+
+ abits = gmp_urandomb_ui (state, 32) % maxbits;
+ bbits = gmp_urandomb_ui (state, 32) % (maxbits + 100);
+
+ mpz_rrandomb (a, state, abits);
+
+ signs = gmp_urandomb_ui (state, 1);
+ if (signs & 1)
+ mpz_neg (a, a);
+
+ switch (op)
+ {
+ default:
+ abort ();
+
+ case OP_SCAN0:
+ *r = mpz_scan0 (a, bbits);
+ break;
+ case OP_SCAN1:
+ *r = mpz_scan1 (a, bbits);
+ break;
+ }
+ gmp_asprintf (ap, "%Zx", a);
+ *b = bbits;
+
+ mpz_clear (a);
+}
+
+void
+hex_random_str_op (unsigned long maxbits,
+ int base, char **ap, char **rp)
+{
+ mpz_t a;
+ unsigned long abits;
+ unsigned signs;
+
+ mpz_init (a);
+
+ abits = gmp_urandomb_ui (state, 32) % maxbits;
+
+ mpz_rrandomb (a, state, abits);
+
+ signs = gmp_urandomb_ui (state, 2);
+ if (signs & 1)
+ mpz_neg (a, a);
+
+ *ap = mpz_get_str (NULL, 16, a);
+ *rp = mpz_get_str (NULL, base, a);
+
+ mpz_clear (a);
+}
--- /dev/null
+/*
+
+Copyright 2011, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+enum hex_random_op
+ {
+ OP_ADD, OP_SUB, OP_MUL, OP_SQR,
+ OP_CDIV, OP_FDIV, OP_TDIV,
+ OP_CDIV_Q_2, OP_CDIV_R_2,
+ OP_FDIV_Q_2, OP_FDIV_R_2,
+ OP_TDIV_Q_2, OP_TDIV_R_2,
+ OP_GCD, OP_LCM, OP_POWM, OP_AND, OP_IOR, OP_XOR,
+ OP_SETBIT, OP_CLRBIT, OP_COMBIT,
+ OP_SCAN0, OP_SCAN1,
+ };
+
+void hex_random_init (void);
+char *hex_urandomb (unsigned long bits);
+char *hex_rrandomb (unsigned long bits);
+char *hex_rrandomb_export (void *dst, size_t *countp,
+ int order, size_t size, int endian,
+ unsigned long bits);
+
+void hex_random_op2 (enum hex_random_op op, unsigned long maxbits,
+ char **ap, char **rp);
+void hex_random_op3 (enum hex_random_op op, unsigned long maxbits,
+ char **ap, char **bp, char **rp);
+void hex_random_op4 (enum hex_random_op op, unsigned long maxbits,
+ char **ap, char **bp, char **rp, char **qp);
+void hex_random_bit_op (enum hex_random_op op, unsigned long maxbits,
+ char **ap, unsigned long *b, char **rp);
+void hex_random_scan_op (enum hex_random_op op, unsigned long maxbits,
+ char **ap, unsigned long *b, unsigned long *r);
+void hex_random_str_op (unsigned long maxbits,
+ int base, char **ap, char **rp);
--- /dev/null
+/*
+
+Copyright 2011, 2013, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "mini-random.h"
+
+static void
+set_str (mpz_t r, const char *s)
+{
+ if (mpz_set_str (r, s, 16) != 0)
+ {
+ fprintf (stderr, "mpz_set_str failed on input %s\n", s);
+ abort ();
+ }
+}
+
+void
+mini_urandomb (mpz_t r, unsigned long bits)
+{
+ char *s;
+ s = hex_urandomb (bits);
+ set_str (r, s);
+ free (s);
+}
+
+void
+mini_rrandomb (mpz_t r, unsigned long bits)
+{
+ char *s;
+ s = hex_rrandomb (bits);
+ set_str (r, s);
+ free (s);
+}
+
+void
+mini_rrandomb_export (mpz_t r, void *dst, size_t *countp,
+ int order, size_t size, int endian, unsigned long bits)
+{
+ char *s;
+ s = hex_rrandomb_export (dst, countp, order, size, endian, bits);
+ set_str (r, s);
+ free (s);
+}
+
+void
+mini_random_op2 (enum hex_random_op op, unsigned long maxbits,
+ mpz_t a, mpz_t r)
+{
+ char *ap;
+ char *rp;
+
+ hex_random_op2 (op, maxbits, &ap, &rp);
+ set_str (a, ap);
+ set_str (r, rp);
+
+ free (ap);
+ free (rp);
+}
+
+void
+mini_random_op3 (enum hex_random_op op, unsigned long maxbits,
+ mpz_t a, mpz_t b, mpz_t r)
+{
+ char *ap;
+ char *bp;
+ char *rp;
+
+ hex_random_op3 (op, maxbits, &ap, &bp, &rp);
+ set_str (a, ap);
+ set_str (b, bp);
+ set_str (r, rp);
+
+ free (ap);
+ free (bp);
+ free (rp);
+}
+
+void
+mini_random_op4 (enum hex_random_op op, unsigned long maxbits,
+ mpz_t a, mpz_t b, mpz_t c, mpz_t d)
+{
+ char *ap;
+ char *bp;
+ char *cp;
+ char *dp;
+
+ hex_random_op4 (op, maxbits, &ap, &bp, &cp, &dp);
+ set_str (a, ap);
+ set_str (b, bp);
+ set_str (c, cp);
+ set_str (d, dp);
+
+ free (ap);
+ free (bp);
+ free (cp);
+ free (dp);
+}
+
+void
+mini_random_bit_op (enum hex_random_op op, unsigned long maxbits,
+ mpz_t a, mp_bitcnt_t *b, mpz_t r)
+{
+ char *ap;
+ char *rp;
+
+ hex_random_bit_op (op, maxbits, &ap, b, &rp);
+ set_str (a, ap);
+ set_str (r, rp);
+
+ free (ap);
+ free (rp);
+}
+
+void
+mini_random_scan_op (enum hex_random_op op, unsigned long maxbits,
+ mpz_t a, mp_bitcnt_t *b, mp_bitcnt_t *r)
+{
+ char *ap;
+
+ hex_random_scan_op (op, maxbits, &ap, b, r);
+ set_str (a, ap);
+
+ free (ap);
+}
--- /dev/null
+/*
+
+Copyright 2011, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include "mini-gmp.h"
+#include "hex-random.h"
+
+void mini_urandomb (mpz_t, unsigned long);
+void mini_rrandomb (mpz_t, unsigned long);
+void mini_rrandomb_export (mpz_t r, void *dst, size_t *countp,
+ int order, size_t size, int endian,
+ unsigned long bits);
+
+void mini_random_op2 (enum hex_random_op, unsigned long, mpz_t, mpz_t);
+void mini_random_op3 (enum hex_random_op, unsigned long, mpz_t, mpz_t, mpz_t);
+void mini_random_op4 (enum hex_random_op, unsigned long, mpz_t, mpz_t, mpz_t, mpz_t);
+void mini_random_scan_op (enum hex_random_op, unsigned long, mpz_t, mp_bitcnt_t *, mp_bitcnt_t *);
+void mini_random_bit_op (enum hex_random_op, unsigned long, mpz_t, mp_bitcnt_t *, mpz_t);
--- /dev/null
+#! /bin/sh
+
+# Copyright (C) 2000, 2001, 2002, 2004, 2005, 2011, 2012 Niels Möller
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+failed=0
+all=0
+
+debug='no'
+testflags=''
+
+if [ -z "$srcdir" ] ; then
+ srcdir=`pwd`
+fi
+
+export srcdir
+
+# When used in make rules, we sometimes get the filenames VPATH
+# expanded, but usually not.
+find_program () {
+ case "$1" in
+ */*)
+ echo "$1"
+ ;;
+ *)
+ if [ -x "$1" ] ; then
+ echo "./$1"
+ else
+ echo "$srcdir/$1"
+ fi
+ ;;
+ esac
+}
+
+env_program () {
+ if [ -x "$1" ] ; then
+ if "$1"; then : ; else
+ echo FAIL: $1
+ exit 1
+ fi
+ fi
+}
+
+test_program () {
+ testname=`basename "$1" .exe`
+ testname=`basename "$testname" -test`
+ if [ -z "$EMULATOR" ] || head -1 "$1" | grep '^#!' > /dev/null; then
+ "$1" $testflags
+ else
+ $EMULATOR "$1" $testflags
+ fi
+ case "$?" in
+ 0)
+ echo PASS: $testname
+ all=`expr $all + 1`
+ ;;
+ 77)
+ echo SKIP: $testname
+ ;;
+ *)
+ echo FAIL: $testname
+ failed=`expr $failed + 1`
+ all=`expr $all + 1`
+ ;;
+ esac
+}
+
+env_program `find_program setup-env`
+
+while test $# != 0
+do
+ case "$1" in
+ --debug)
+ debug=yes
+ ;;
+ -v)
+ testflags='-v'
+ ;;
+ -*)
+ echo >&2 'Unknown option `'"$1'"
+ exit 1
+ ;;
+ *)
+ break
+ ;;
+ esac
+ shift
+done
+
+if [ $# -eq 0 ] ; then
+ for f in *-test; do test_program "./$f"; done
+else
+ for f in "$@" ; do test_program `find_program "$f"`; done
+fi
+
+if [ $failed -eq 0 ] ; then
+ banner="All $all tests passed"
+else
+ banner="$failed of $all tests failed"
+fi
+dashes=`echo "$banner" | sed s/./=/g`
+echo "$dashes"
+echo "$banner"
+echo "$dashes"
+
+if [ "x$debug" = xno ] ; then
+ env_program `find_program teardown-env`
+fi
+
+[ "$failed" -eq 0 ]
--- /dev/null
+/*
+
+Copyright 2012, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 10000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+ char *buf = mpz_get_str (NULL, 16, x);
+ fprintf (stderr, "%s: %s\n", label, buf);
+ testfree (buf);
+}
+
+void
+testmain (int argc, char **argv)
+{
+ unsigned i;
+ mpz_t a, b, res, ref;
+
+ mpz_init (a);
+ mpz_init (b);
+ mpz_init (res);
+ mpz_init (ref);
+
+ for (i = 0; i < COUNT; i++)
+ {
+ mini_random_op3 (OP_ADD, MAXBITS, a, b, ref);
+ mpz_add (res, a, b);
+ if (mpz_cmp (res, ref))
+ {
+ fprintf (stderr, "mpz_add failed:\n");
+ dump ("a", a);
+ dump ("b", b);
+ dump ("r", res);
+ dump ("ref", ref);
+ abort ();
+ }
+ }
+ mpz_clear (a);
+ mpz_clear (b);
+ mpz_clear (res);
+ mpz_clear (ref);
+}
--- /dev/null
+/*
+
+Copyright 2012, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 10000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+ char *buf = mpz_get_str (NULL, 16, x);
+ fprintf (stderr, "%s: %s\n", label, buf);
+ testfree (buf);
+}
+
+void
+testmain (int argc, char **argv)
+{
+ unsigned i;
+ mpz_t a, res, ref;
+ mp_bitcnt_t b;
+
+ mpz_init (a);
+ mpz_init (res);
+ mpz_init (ref);
+
+ for (i = 0; i < COUNT; i++)
+ {
+ mini_random_bit_op (OP_SETBIT, MAXBITS, a, &b, ref);
+ mpz_set (res, a);
+ mpz_setbit (res, b);
+ if (mpz_cmp (res, ref))
+ {
+ fprintf (stderr, "mpz_setbit failed:\n");
+ dump ("a", a);
+ fprintf (stderr, "b: %lu\n", b);
+ dump ("r", res);
+ dump ("ref", ref);
+ abort ();
+ }
+ if (!mpz_tstbit (res, b))
+ {
+ fprintf (stderr, "mpz_tstbit failed (after mpz_setbit):\n");
+ dump ("res", a);
+ fprintf (stderr, "b: %lu\n", b);
+ abort ();
+ }
+ mini_random_bit_op (OP_CLRBIT, MAXBITS, a, &b, ref);
+ mpz_set (res, a);
+ mpz_clrbit (res, b);
+ if (mpz_cmp (res, ref))
+ {
+ fprintf (stderr, "mpz_clrbit failed:\n");
+ dump ("a", a);
+ fprintf (stderr, "b: %lu\n", b);
+ dump ("r", res);
+ dump ("ref", ref);
+ abort ();
+ }
+ if (mpz_tstbit (res, b))
+ {
+ fprintf (stderr, "mpz_tstbit failed (after mpz_clrbit):\n");
+ dump ("res", a);
+ fprintf (stderr, "b: %lu\n", b);
+ abort ();
+ }
+ mini_random_bit_op (OP_COMBIT, MAXBITS, a, &b, ref);
+ mpz_set (res, a);
+ mpz_combit (res, b);
+ if (mpz_cmp (res, ref))
+ {
+ fprintf (stderr, "mpz_combit failed:\n");
+ dump ("a", a);
+ fprintf (stderr, "b: %lu\n", b);
+ dump ("r", res);
+ dump ("ref", ref);
+ abort ();
+ }
+ if (mpz_tstbit (res, b) == mpz_tstbit (a, b))
+ {
+ fprintf (stderr, "mpz_tstbit failed (after mpz_combit):\n");
+ dump ("res", a);
+ fprintf (stderr, "b: %lu\n", b);
+ abort ();
+ }
+ }
+ mpz_clear (a);
+ mpz_clear (res);
+ mpz_clear (ref);
+}
--- /dev/null
+/* Test mpz_cmp_d and mpz_cmpabs_d.
+
+Copyright 2001, 2002, 2003, 2005, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <math.h>
+
+#include "testutils.h"
+
+/* FIXME: Not sure if the tests here are exhaustive. Ought to try to get
+ each possible exit from mpz_cmp_d (and mpz_cmpabs_d) exercised. */
+
+
+#define SGN(n) ((n) > 0 ? 1 : (n) < 0 ? -1 : 0)
+
+
+void
+check_one (const char *name, mpz_srcptr x, double y, int cmp, int cmpabs)
+{
+ int got;
+
+ got = mpz_cmp_d (x, y);
+ if (SGN(got) != cmp)
+ {
+ int i;
+ printf ("mpz_cmp_d wrong (from %s)\n", name);
+ printf (" got %d\n", got);
+ printf (" want %d\n", cmp);
+ fail:
+ printf (" x=");
+ mpz_out_str (stdout, 10, x);
+ printf ("\n y %g\n", y);
+ printf (" x=0x");
+ mpz_out_str (stdout, -16, x);
+ printf ("\n y %g\n", y);
+ printf (" y");
+ for (i = 0; i < sizeof(y); i++)
+ printf (" %02X", (unsigned) ((unsigned char *) &y)[i]);
+ printf ("\n");
+ abort ();
+ }
+
+ got = mpz_cmpabs_d (x, y);
+ if (SGN(got) != cmpabs)
+ {
+ printf ("mpz_cmpabs_d wrong\n");
+ printf (" got %d\n", got);
+ printf (" want %d\n", cmpabs);
+ goto fail;
+ }
+}
+
+static void
+mpz_set_str_or_abort (mpz_ptr z, const char *str, int base)
+{
+ if (mpz_set_str (z, str, base) != 0)
+ {
+ fprintf (stderr, "ERROR: mpz_set_str failed\n");
+ fprintf (stderr, " str = \"%s\"\n", str);
+ fprintf (stderr, " base = %d\n", base);
+ abort();
+ }
+}
+
+void
+check_data (void)
+{
+ static const struct {
+ const char *x;
+ double y;
+ int cmp, cmpabs;
+
+ } data[] = {
+
+ { "0", 0.0, 0, 0 },
+
+ { "1", 0.0, 1, 1 },
+ { "-1", 0.0, -1, 1 },
+
+ { "1", 0.5, 1, 1 },
+ { "-1", -0.5, -1, 1 },
+
+ { "0", 1.0, -1, -1 },
+ { "0", -1.0, 1, -1 },
+
+ { "0x1000000000000000000000000000000000000000000000000", 1.0, 1, 1 },
+ { "-0x1000000000000000000000000000000000000000000000000", 1.0, -1, 1 },
+
+ { "0", 1e100, -1, -1 },
+ { "0", -1e100, 1, -1 },
+
+ { "2", 1.5, 1, 1 },
+ { "2", -1.5, 1, 1 },
+ { "-2", 1.5, -1, 1 },
+ { "-2", -1.5, -1, 1 },
+ };
+
+ mpz_t x;
+ int i;
+
+ mpz_init (x);
+
+ for (i = 0; i < numberof (data); i++)
+ {
+ mpz_set_str_or_abort (x, data[i].x, 0);
+ check_one ("check_data", x, data[i].y, data[i].cmp, data[i].cmpabs);
+ }
+
+ mpz_clear (x);
+}
+
+
+/* Equality of integers with up to 53 bits */
+void
+check_onebits (void)
+{
+ mpz_t x, x2;
+ double y;
+ int i;
+
+ mpz_init_set_ui (x, 0L);
+ mpz_init (x2);
+
+ for (i = 0; i < 512; i++)
+ {
+ mpz_mul_2exp (x, x, 1);
+ mpz_add_ui (x, x, 1L);
+
+ y = mpz_get_d (x);
+ mpz_set_d (x2, y);
+
+ /* stop if any truncation is occurring */
+ if (mpz_cmp (x, x2) != 0)
+ break;
+
+ check_one ("check_onebits", x, y, 0, 0);
+ check_one ("check_onebits", x, -y, 1, 0);
+ mpz_neg (x, x);
+ check_one ("check_onebits", x, y, -1, 0);
+ check_one ("check_onebits", x, -y, 0, 0);
+ mpz_neg (x, x);
+ }
+
+ mpz_clear (x);
+ mpz_clear (x2);
+}
+
+
+/* With the mpz differing by 1, in a limb position possibly below the double */
+void
+check_low_z_one (void)
+{
+ mpz_t x;
+ double y;
+ unsigned long i;
+
+ mpz_init (x);
+
+ /* FIXME: It'd be better to base this on the float format. */
+#if defined (__vax) || defined (__vax__)
+#define LIM 127 /* vax fp numbers have limited range */
+#else
+#define LIM 512
+#endif
+
+ for (i = 1; i < LIM; i++)
+ {
+ mpz_set_ui (x, 1L);
+ mpz_mul_2exp (x, x, i);
+ y = mpz_get_d (x);
+
+ check_one ("check_low_z_one", x, y, 0, 0);
+ check_one ("check_low_z_one", x, -y, 1, 0);
+ mpz_neg (x, x);
+ check_one ("check_low_z_one", x, y, -1, 0);
+ check_one ("check_low_z_one", x, -y, 0, 0);
+ mpz_neg (x, x);
+
+ mpz_sub_ui (x, x, 1);
+
+ check_one ("check_low_z_one", x, y, -1, -1);
+ check_one ("check_low_z_one", x, -y, 1, -1);
+ mpz_neg (x, x);
+ check_one ("check_low_z_one", x, y, -1, -1);
+ check_one ("check_low_z_one", x, -y, 1, -1);
+ mpz_neg (x, x);
+
+ mpz_add_ui (x, x, 2);
+
+ check_one ("check_low_z_one", x, y, 1, 1);
+ check_one ("check_low_z_one", x, -y, 1, 1);
+ mpz_neg (x, x);
+ check_one ("check_low_z_one", x, y, -1, 1);
+ check_one ("check_low_z_one", x, -y, -1, 1);
+ mpz_neg (x, x);
+ }
+
+ mpz_clear (x);
+}
+
+/* Comparing 1 and 1+2^-n. "y" is volatile to make gcc store and fetch it,
+ which forces it to a 64-bit double, whereas on x86 it would otherwise
+ remain on the float stack as an 80-bit long double. */
+void
+check_one_2exp (void)
+{
+ double e;
+ mpz_t x;
+ volatile double y;
+ int i;
+
+ mpz_init (x);
+
+ e = 1.0;
+ for (i = 0; i < 128; i++)
+ {
+ e /= 2.0;
+ y = 1.0 + e;
+ if (y == 1.0)
+ break;
+
+ mpz_set_ui (x, 1L);
+ check_one ("check_one_2exp", x, y, -1, -1);
+ check_one ("check_one_2exp", x, -y, 1, -1);
+
+ mpz_set_si (x, -1L);
+ check_one ("check_one_2exp", x, y, -1, -1);
+ check_one ("check_one_2exp", x, -y, 1, -1);
+ }
+
+ mpz_clear (x);
+}
+
+void
+check_infinity (void)
+{
+ mpz_t x;
+ double y = HUGE_VAL;
+ if (y != 2*y)
+ return;
+
+ mpz_init (x);
+
+ /* 0 cmp inf */
+ mpz_set_ui (x, 0L);
+ check_one ("check_infinity", x, y, -1, -1);
+ check_one ("check_infinity", x, -y, 1, -1);
+
+ /* 123 cmp inf */
+ mpz_set_ui (x, 123L);
+ check_one ("check_infinity", x, y, -1, -1);
+ check_one ("check_infinity", x, -y, 1, -1);
+
+ /* -123 cmp inf */
+ mpz_set_si (x, -123L);
+ check_one ("check_infinity", x, y, -1, -1);
+ check_one ("check_infinity", x, -y, 1, -1);
+
+ /* 2^5000 cmp inf */
+ mpz_set_ui (x, 1L);
+ mpz_mul_2exp (x, x, 5000L);
+ check_one ("check_infinity", x, y, -1, -1);
+ check_one ("check_infinity", x, -y, 1, -1);
+
+ /* -2^5000 cmp inf */
+ mpz_neg (x, x);
+ check_one ("check_infinity", x, y, -1, -1);
+ check_one ("check_infinity", x, -y, 1, -1);
+
+ mpz_clear (x);
+}
+
+void
+testmain (int argc, char *argv[])
+{
+ check_data ();
+ check_onebits ();
+ check_low_z_one ();
+ check_one_2exp ();
+ check_infinity ();
+}
--- /dev/null
+/* Exercise mpz_fac_ui and mpz_bin_uiui.
+
+Copyright 2000, 2001, 2002, 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "testutils.h"
+
+/* Usage: t-fac_ui [x|num]
+
+ With no arguments testing goes up to the initial value of "limit" below.
+ With a number argument tests are carried that far, or with a literal "x"
+ tests are continued without limit (this being meant only for development
+ purposes). */
+
+void
+try_mpz_bin_uiui (mpz_srcptr want, unsigned long n, unsigned long k)
+{
+ mpz_t got;
+
+ mpz_init (got);
+ mpz_bin_uiui (got, n, k);
+ if (mpz_cmp (got, want) != 0)
+ {
+ printf ("mpz_bin_uiui wrong\n");
+ printf (" n=%lu\n", n);
+ printf (" k=%lu\n", k);
+ printf (" got="); mpz_out_str (stdout, 10, got); printf ("\n");
+ printf (" want="); mpz_out_str (stdout, 10, want); printf ("\n");
+ abort();
+ }
+ mpz_clear (got);
+}
+
+/* Test all bin(n,k) cases, with 0 <= k <= n + 1 <= count. */
+void
+bin_smallexaustive (unsigned int count)
+{
+ mpz_t want;
+ unsigned long n, k;
+
+ mpz_init (want);
+
+ for (n = 0; n < count; n++)
+ {
+ mpz_set_ui (want, 1);
+ for (k = 0; k <= n; k++)
+ {
+ try_mpz_bin_uiui (want, n, k);
+ mpz_mul_ui (want, want, n - k);
+ mpz_fdiv_q_ui (want, want, k + 1);
+ }
+ try_mpz_bin_uiui (want, n, k);
+ }
+
+ mpz_clear (want);
+}
+
+/* Test all fac(n) cases, with 0 <= n <= limit. */
+void
+fac_smallexaustive (unsigned int limit)
+{
+ mpz_t f, r;
+ unsigned long n;
+ mpz_init_set_si (f, 1); /* 0! = 1 */
+ mpz_init (r);
+
+ for (n = 0; n < limit; n++)
+ {
+ mpz_fac_ui (r, n);
+
+ if (mpz_cmp (f, r) != 0)
+ {
+ printf ("mpz_fac_ui(%lu) wrong\n", n);
+ printf (" got "); mpz_out_str (stdout, 10, r); printf("\n");
+ printf (" want "); mpz_out_str (stdout, 10, f); printf("\n");
+ abort ();
+ }
+
+ mpz_mul_ui (f, f, n+1); /* (n+1)! = n! * (n+1) */
+ }
+
+ mpz_clear (f);
+ mpz_clear (r);
+}
+
+void checkWilson (mpz_t f, unsigned long n)
+{
+ unsigned long m;
+
+ mpz_fac_ui (f, n - 1);
+ m = mpz_fdiv_ui (f, n);
+ if ( m != n - 1)
+ {
+ printf ("mpz_fac_ui(%lu) wrong\n", n - 1);
+ printf (" Wilson's theorem not verified: got %lu, expected %lu.\n",m ,n - 1);
+ abort ();
+ }
+}
+
+void
+checkprimes (unsigned long p1, unsigned long p2, unsigned long p3)
+{
+ mpz_t b, f;
+
+ if (p1 - 1 != p2 - 1 + p3 - 1)
+ {
+ printf ("checkprimes(%lu, %lu, %lu) wrong\n", p1, p2, p3);
+ printf (" %lu - 1 != %lu - 1 + %lu - 1 \n", p1, p2, p3);
+ abort ();
+ }
+
+ mpz_init (b);
+ mpz_init (f);
+
+ checkWilson (b, p1); /* b = (p1-1)! */
+ checkWilson (f, p2); /* f = (p2-1)! */
+ mpz_divexact (b, b, f);
+ checkWilson (f, p3); /* f = (p3-1)! */
+ mpz_divexact (b, b, f); /* b = (p1-1)!/((p2-1)!(p3-1)!) */
+ mpz_bin_uiui (f, p1 - 1, p2 - 1);
+ if (mpz_cmp (f, b) != 0)
+ {
+ printf ("checkprimes(%lu, %lu, %lu) wrong\n", p1, p2, p3);
+ printf (" got "); mpz_out_str (stdout, 10, b); printf("\n");
+ printf (" want "); mpz_out_str (stdout, 10, f); printf("\n");
+ abort ();
+ }
+
+ mpz_clear (b);
+ mpz_clear (f);
+
+}
+
+void
+testmain (int argc, char *argv[])
+{
+ unsigned long limit = 128;
+
+ if (argc > 1 && argv[1][0] == 'x')
+ limit = ~ limit;
+ else if (argc > 1)
+ limit = atoi (argv[1]);
+
+ checkprimes(1009, 733, 277);
+ fac_smallexaustive (limit);
+ bin_smallexaustive (limit);
+}
--- /dev/null
+/*
+
+Copyright 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 10000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+ char *buf = mpz_get_str (NULL, 16, x);
+ fprintf (stderr, "%s: %s\n", label, buf);
+ testfree (buf);
+}
+
+typedef void div_qr_func (mpz_t, mpz_t, const mpz_t, const mpz_t);
+typedef unsigned long div_qr_ui_func (mpz_t, mpz_t, const mpz_t, unsigned long);
+typedef void div_func (mpz_t, const mpz_t, const mpz_t);
+typedef unsigned long div_x_ui_func (mpz_t, const mpz_t, unsigned long);
+typedef unsigned long div_ui_func (const mpz_t, unsigned long);
+
+void
+testmain (int argc, char **argv)
+{
+ unsigned i;
+ mpz_t a, b, q, r, rq, rr;
+ int div_p;
+
+ mpz_init (a);
+ mpz_init (b);
+ mpz_init (r);
+ mpz_init (q);
+ mpz_init (rr);
+ mpz_init (rq);
+
+ for (i = 0; i < COUNT; i++)
+ {
+ unsigned j;
+ for (j = 0; j < 3; j++)
+ {
+ static const enum hex_random_op ops[3] = { OP_CDIV, OP_FDIV, OP_TDIV };
+ static const char name[3] = { 'c', 'f', 't'};
+ static div_qr_func * const div_qr [3] =
+ {
+ mpz_cdiv_qr, mpz_fdiv_qr, mpz_tdiv_qr
+ };
+ static div_qr_ui_func *div_qr_ui[3] =
+ {
+ mpz_cdiv_qr_ui, mpz_fdiv_qr_ui, mpz_tdiv_qr_ui
+ };
+ static div_func * const div_q [3] =
+ {
+ mpz_cdiv_q, mpz_fdiv_q, mpz_tdiv_q
+ };
+ static div_x_ui_func *div_q_ui[3] =
+ {
+ mpz_cdiv_q_ui, mpz_fdiv_q_ui, mpz_tdiv_q_ui
+ };
+ static div_func * const div_r [3] =
+ {
+ mpz_cdiv_r, mpz_fdiv_r, mpz_tdiv_r
+ };
+ static div_x_ui_func *div_r_ui[3] =
+ {
+ mpz_cdiv_r_ui, mpz_fdiv_r_ui, mpz_tdiv_r_ui
+ };
+ static div_ui_func *div_ui[3] =
+ {
+ mpz_cdiv_ui, mpz_fdiv_ui, mpz_tdiv_ui
+ };
+
+ mini_random_op4 (ops[j], MAXBITS, a, b, rq, rr);
+ div_qr[j] (q, r, a, b);
+ if (mpz_cmp (r, rr) || mpz_cmp (q, rq))
+ {
+ fprintf (stderr, "mpz_%cdiv_qr failed:\n", name[j]);
+ dump ("a", a);
+ dump ("b", b);
+ dump ("r ", r);
+ dump ("rref", rr);
+ dump ("q ", q);
+ dump ("qref", rq);
+ abort ();
+ }
+ mpz_set_si (q, -5);
+ div_q[j] (q, a, b);
+ if (mpz_cmp (q, rq))
+ {
+ fprintf (stderr, "mpz_%cdiv_q failed:\n", name[j]);
+ dump ("a", a);
+ dump ("b", b);
+ dump ("q ", q);
+ dump ("qref", rq);
+ abort ();
+ }
+ mpz_set_ui (r, ~5);
+ div_r[j] (r, a, b);
+ if (mpz_cmp (r, rr))
+ {
+ fprintf (stderr, "mpz_%cdiv_r failed:\n", name[j]);
+ dump ("a", a);
+ dump ("b", b);
+ dump ("r ", r);
+ dump ("rref", rr);
+ abort ();
+ }
+
+ if (j == 0) /* do this once, not for all roundings */
+ {
+ div_p = mpz_divisible_p (a, b);
+ if ((mpz_sgn (r) == 0) ^ (div_p != 0))
+ {
+ fprintf (stderr, "mpz_divisible_p failed:\n");
+ dump ("a", a);
+ dump ("b", b);
+ dump ("r ", r);
+ abort ();
+ }
+ }
+
+ if (j == 0 && mpz_sgn (b) < 0) /* ceil, negative divisor */
+ {
+ mpz_mod (r, a, b);
+ if (mpz_cmp (r, rr))
+ {
+ fprintf (stderr, "mpz_mod failed:\n");
+ dump ("a", a);
+ dump ("b", b);
+ dump ("r ", r);
+ dump ("rref", rr);
+ abort ();
+ }
+ }
+
+ if (j == 1 && mpz_sgn (b) > 0) /* floor, positive divisor */
+ {
+ mpz_mod (r, a, b);
+ if (mpz_cmp (r, rr))
+ {
+ fprintf (stderr, "mpz_mod failed:\n");
+ dump ("a", a);
+ dump ("b", b);
+ dump ("r ", r);
+ dump ("rref", rr);
+ abort ();
+ }
+ }
+
+ if (mpz_fits_ulong_p (b))
+ {
+ mp_limb_t rl;
+
+ rl = div_qr_ui[j] (q, r, a, mpz_get_ui (b));
+ if (rl != mpz_get_ui (rr)
+ || mpz_cmp (r, rr) || mpz_cmp (q, rq))
+ {
+ fprintf (stderr, "mpz_%cdiv_qr_ui failed:\n", name[j]);
+ dump ("a", a);
+ dump ("b", b);
+ fprintf(stderr, "rl = %lx\n", rl);
+ dump ("r ", r);
+ dump ("rref", rr);
+ dump ("q ", q);
+ dump ("qref", rq);
+ abort ();
+ }
+
+ mpz_set_si (q, 3);
+ rl = div_q_ui[j] (q, a, mpz_get_ui (b));
+ if (rl != mpz_get_ui (rr) || mpz_cmp (q, rq))
+ {
+ fprintf (stderr, "mpz_%cdiv_q_ui failed:\n", name[j]);
+ dump ("a", a);
+ dump ("b", b);
+ fprintf(stderr, "rl = %lx\n", rl);
+ dump ("rref", rr);
+ dump ("q ", q);
+ dump ("qref", rq);
+ abort ();
+ }
+
+ mpz_set_ui (r, 7);
+ rl = div_r_ui[j] (r, a, mpz_get_ui (b));
+ if (rl != mpz_get_ui (rr) || mpz_cmp (r, rr))
+ {
+ fprintf (stderr, "mpz_%cdiv_qr_ui failed:\n", name[j]);
+ dump ("a", a);
+ dump ("b", b);
+ fprintf(stderr, "rl = %lx\n", rl);
+ dump ("r ", r);
+ dump ("rref", rr);
+ abort ();
+ }
+
+ rl = div_ui[j] (a, mpz_get_ui (b));
+ if (rl != mpz_get_ui (rr))
+ {
+ fprintf (stderr, "mpz_%cdiv_qr_ui failed:\n", name[j]);
+ dump ("a", a);
+ dump ("b", b);
+ fprintf(stderr, "rl = %lx\n", rl);
+ dump ("rref", rr);
+ abort ();
+ }
+
+ if (j == 0) /* do this once, not for all roundings */
+ {
+ div_p = mpz_divisible_ui_p (a, mpz_get_ui (b));
+ if ((mpz_sgn (r) == 0) ^ (div_p != 0))
+ {
+ fprintf (stderr, "mpz_divisible_ui_p failed:\n");
+ dump ("a", a);
+ dump ("b", b);
+ dump ("r ", r);
+ abort ();
+ }
+ }
+
+ if (j == 1) /* floor */
+ {
+ mpz_mod_ui (r, a, mpz_get_ui (b));
+ if (mpz_cmp (r, rr))
+ {
+ fprintf (stderr, "mpz_mod failed:\n");
+ dump ("a", a);
+ dump ("b", b);
+ dump ("r ", r);
+ dump ("rref", rr);
+ abort ();
+ }
+ }
+ }
+ }
+ }
+ mpz_clear (a);
+ mpz_clear (b);
+ mpz_clear (r);
+ mpz_clear (q);
+ mpz_clear (rr);
+ mpz_clear (rq);
+}
--- /dev/null
+/*
+
+Copyright 2012, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 10000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+ char *buf = mpz_get_str (NULL, 16, x);
+ fprintf (stderr, "%s: %s\n", label, buf);
+ testfree (buf);
+}
+
+typedef void div_func (mpz_t, const mpz_t, mp_bitcnt_t);
+
+void
+testmain (int argc, char **argv)
+{
+ unsigned i;
+ mpz_t a, res, ref;
+ mp_bitcnt_t b;
+
+ mpz_init (a);
+ mpz_init (res);
+ mpz_init (ref);
+
+ for (i = 0; i < COUNT; i++)
+ {
+ unsigned j;
+ for (j = 0; j < 6; j++)
+ {
+ static const enum hex_random_op ops[6] =
+ {
+ OP_CDIV_Q_2, OP_CDIV_R_2,
+ OP_FDIV_Q_2, OP_FDIV_R_2,
+ OP_TDIV_Q_2, OP_TDIV_R_2
+ };
+ static const char *name[6] =
+ {
+ "cdiv_q", "cdiv_r",
+ "fdiv_q", "fdiv_r",
+ "tdiv_q", "tdiv_r"
+ };
+ static div_func * const div [6] =
+ {
+ mpz_cdiv_q_2exp, mpz_cdiv_r_2exp,
+ mpz_fdiv_q_2exp, mpz_fdiv_r_2exp,
+ mpz_tdiv_q_2exp, mpz_tdiv_r_2exp
+ };
+
+ mini_random_bit_op (ops[j], MAXBITS, a, &b, ref);
+ div[j] (res, a, b);
+ if (mpz_cmp (ref, res))
+ {
+ fprintf (stderr, "mpz_%s_2exp failed:\n", name[j]);
+ dump ("a", a);
+ fprintf (stderr, "b: %lu\n", b);
+ dump ("r", res);
+ dump ("ref", ref);
+ abort ();
+ }
+ }
+ }
+ mpz_clear (a);
+ mpz_clear (res);
+ mpz_clear (ref);
+}
--- /dev/null
+/*
+
+Copyright 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <limits.h>
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "testutils.h"
+
+#define GMP_LIMB_BITS (sizeof(mp_limb_t) * CHAR_BIT)
+
+#define COUNT 10000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+ char *buf = mpz_get_str (NULL, 16, x);
+ fprintf (stderr, "%s: %s\n", label, buf);
+ testfree (buf);
+}
+
+static const struct
+{
+ double d;
+ const char *s;
+} values[] = {
+ { 0.0, "0" },
+ { 0.3, "0" },
+ { -0.3, "0" },
+ { M_PI, "3" },
+ { M_PI*1e15, "b29430a256d21" },
+ { -M_PI*1e15, "-b29430a256d21" },
+ /* 17 * 2^{200} =
+ 27317946752402834684213355569799764242877450894307478200123392 */
+ {0.2731794675240283468421335556979976424288e62,
+ "1100000000000000000000000000000000000000000000000000" },
+ { 0.0, NULL }
+};
+
+void
+testmain (int argc, char **argv)
+{
+ unsigned i;
+ mpz_t x;
+
+ for (i = 0; values[i].s; i++)
+ {
+ char *s;
+ mpz_init_set_d (x, values[i].d);
+ s = mpz_get_str (NULL, 16, x);
+ if (strcmp (s, values[i].s) != 0)
+ {
+ fprintf (stderr, "mpz_set_d failed:\n"
+ "d = %.20g\n"
+ "s = %s\n"
+ "r = %s\n",
+ values[i].d, s, values[i].s);
+ abort ();
+ }
+ testfree (s);
+ mpz_clear (x);
+ }
+
+ mpz_init (x);
+
+ for (i = 0; i < COUNT; i++)
+ {
+ /* Use volatile, to avoid extended precision in floating point
+ registers, e.g., on m68k and 80387. */
+ volatile double d, f;
+ unsigned long m;
+ int e;
+
+ mini_rrandomb (x, GMP_LIMB_BITS);
+ m = mpz_get_ui (x);
+ mini_urandomb (x, 8);
+ e = mpz_get_ui (x) - 100;
+
+ d = ldexp ((double) m, e);
+ mpz_set_d (x, d);
+ f = mpz_get_d (x);
+ if (f != floor (d))
+ {
+ fprintf (stderr, "mpz_set_d/mpz_get_d failed:\n");
+ goto dumperror;
+ }
+ if ((f == d) ? (mpz_cmp_d (x, d) != 0) : (mpz_cmp_d (x, d) >= 0))
+ {
+ fprintf (stderr, "mpz_cmp_d (x, d) failed:\n");
+ goto dumperror;
+ }
+ f = d + 1.0;
+ if (f > d && ! (mpz_cmp_d (x, f) < 0))
+ {
+ fprintf (stderr, "mpz_cmp_d (x, f) failed:\n");
+ goto dumperror;
+ }
+
+ d = - d;
+
+ mpz_set_d (x, d);
+ f = mpz_get_d (x);
+ if (f != ceil (d))
+ {
+ fprintf (stderr, "mpz_set_d/mpz_get_d failed:\n");
+ dumperror:
+ dump ("x", x);
+ fprintf (stderr, "m = %lx, e = %i\n", m, e);
+ fprintf (stderr, "d = %.15g\n", d);
+ fprintf (stderr, "f = %.15g\n", f);
+ fprintf (stderr, "f - d = %.5g\n", f - d);
+ abort ();
+ }
+ if ((f == d) ? (mpz_cmp_d (x, d) != 0) : (mpz_cmp_d (x, d) <= 0))
+ {
+ fprintf (stderr, "mpz_cmp_d (x, d) failed:\n");
+ goto dumperror;
+ }
+ f = d - 1.0;
+ if (f < d && ! (mpz_cmp_d (x, f) > 0))
+ {
+ fprintf (stderr, "mpz_cmp_d (x, f) failed:\n");
+ goto dumperror;
+ }
+ }
+
+ mpz_clear (x);
+}
--- /dev/null
+/*
+
+Copyright 2012, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 10000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+ char *buf = mpz_get_str (NULL, 16, x);
+ fprintf (stderr, "%s: %s\n", label, buf);
+ testfree (buf);
+}
+
+/* Called when g is supposed to be gcd(a,b), and g = s a + t b. */
+static int
+gcdext_valid_p (const mpz_t a, const mpz_t b,
+ const mpz_t g, const mpz_t s, const mpz_t t)
+{
+ mpz_t ta, tb, r;
+
+ /* It's not clear that gcd(0,0) is well defined, but we allow it and
+ require that gcd(0,0) = 0. */
+ if (mpz_sgn (g) < 0)
+ return 0;
+
+ if (mpz_sgn (a) == 0)
+ {
+ /* Must have g == abs (b). Any value for s is in some sense "correct",
+ but it makes sense to require that s == 0. */
+ return mpz_cmpabs (g, b) == 0 && mpz_sgn (s) == 0;
+ }
+ else if (mpz_sgn (b) == 0)
+ {
+ /* Must have g == abs (a), s == sign (a) */
+ return mpz_cmpabs (g, a) == 0 && mpz_cmp_si (s, mpz_sgn (a)) == 0;
+ }
+
+ if (mpz_sgn (g) <= 0)
+ return 0;
+
+ mpz_init (ta);
+ mpz_init (tb);
+ mpz_init (r);
+
+ mpz_mul (ta, s, a);
+ mpz_mul (tb, t, b);
+ mpz_add (ta, ta, tb);
+
+ if (mpz_cmp (ta, g) != 0)
+ {
+ fail:
+ mpz_clear (ta);
+ mpz_clear (tb);
+ mpz_clear (r);
+ return 0;
+ }
+ mpz_tdiv_qr (ta, r, a, g);
+ if (mpz_sgn (r) != 0)
+ goto fail;
+
+ mpz_tdiv_qr (tb, r, b, g);
+ if (mpz_sgn (r) != 0)
+ goto fail;
+
+ /* Require that 2 |s| < |b/g|, or |s| == 1. */
+ if (mpz_cmpabs_ui (s, 1) > 0)
+ {
+ mpz_mul_2exp (r, s, 1);
+ if (mpz_cmpabs (r, tb) > 0)
+ goto fail;
+ }
+
+ /* Require that 2 |t| < |a/g| or |t| == 1*/
+ if (mpz_cmpabs_ui (t, 1) > 0)
+ {
+ mpz_mul_2exp (r, t, 1);
+ if (mpz_cmpabs (r, ta) > 0)
+ return 0;
+ }
+
+ mpz_clear (ta);
+ mpz_clear (tb);
+ mpz_clear (r);
+
+ return 1;
+}
+
+void
+testmain (int argc, char **argv)
+{
+ unsigned i;
+ mpz_t a, b, g, s, t;
+
+ mpz_init (a);
+ mpz_init (b);
+ mpz_init (g);
+ mpz_init (s);
+ mpz_init (t);
+
+ for (i = 0; i < COUNT; i++)
+ {
+ mini_random_op3 (OP_GCD, MAXBITS, a, b, s);
+ mpz_gcd (g, a, b);
+ if (mpz_cmp (g, s))
+ {
+ fprintf (stderr, "mpz_gcd failed:\n");
+ dump ("a", a);
+ dump ("b", b);
+ dump ("r", g);
+ dump ("ref", s);
+ abort ();
+ }
+ }
+
+ for (i = 0; i < COUNT; i++)
+ {
+ unsigned flags;
+ mini_urandomb (a, 32);
+ flags = mpz_get_ui (a);
+ mini_rrandomb (a, MAXBITS);
+ mini_rrandomb (b, MAXBITS);
+
+ if (flags % 37 == 0)
+ mpz_mul (a, a, b);
+ if (flags % 37 == 1)
+ mpz_mul (b, a, b);
+
+ if (flags & 1)
+ mpz_neg (a, a);
+ if (flags & 2)
+ mpz_neg (b, b);
+
+ mpz_gcdext (g, s, t, a, b);
+ if (!gcdext_valid_p (a, b, g, s, t))
+ {
+ fprintf (stderr, "mpz_gcdext failed:\n");
+ dump ("a", a);
+ dump ("b", b);
+ dump ("g", g);
+ dump ("s", s);
+ dump ("t", t);
+ abort ();
+ }
+
+ mpz_gcd (s, a, b);
+ if (mpz_cmp (g, s))
+ {
+ fprintf (stderr, "mpz_gcd failed:\n");
+ dump ("a", a);
+ dump ("b", b);
+ dump ("r", g);
+ dump ("ref", s);
+ abort ();
+ }
+ }
+ mpz_clear (a);
+ mpz_clear (b);
+ mpz_clear (g);
+ mpz_clear (s);
+ mpz_clear (t);
+}
--- /dev/null
+/*
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "testutils.h"
+
+#define MAX_WORDS 20
+#define MAX_WORD_SIZE 10
+
+static void
+dump (const char *label, const mpz_t x)
+{
+ char *buf = mpz_get_str (NULL, 16, x);
+ fprintf (stderr, "%s: %s\n", label, buf);
+ testfree (buf);
+}
+
+static void
+dump_bytes (const char *label, const unsigned char *s, size_t n)
+{
+ size_t i;
+ fprintf (stderr, "%s:", label);
+ for (i = 0; i < n; i++)
+ {
+ if (i && (i % 16) == 0)
+ fprintf (stderr, "\n");
+ fprintf (stderr, " %02x", s[i]);
+ }
+ fprintf (stderr, "\n");
+}
+
+/* Tests both mpz_import and mpz_export. */
+void
+testmain (int argc, char **argv)
+{
+ unsigned char input[MAX_WORDS * MAX_WORD_SIZE];
+ unsigned char output[MAX_WORDS * MAX_WORD_SIZE + 2];
+ size_t count, in_count, out_count, size;
+ int endian, order;
+
+ mpz_t a, res;
+
+ mpz_init (a);
+ mpz_init (res);
+
+ for (size = 0; size <= MAX_WORD_SIZE; size++)
+ for (count = 0; count <= MAX_WORDS; count++)
+ for (endian = -1; endian <= 1; endian++)
+ for (order = -1; order <= 1; order += 2)
+ {
+ mini_rrandomb_export (a, input, &in_count,
+ order, size, endian, size*count * 8);
+ mpz_import (res, in_count, order, size, endian, 0, input);
+ if (mpz_cmp (a, res))
+ {
+ fprintf (stderr, "mpz_import failed:\n"
+ "in_count %lu, out_count %lu, endian = %d, order = %d\n",
+ (unsigned long) in_count, (unsigned long) out_count, endian, order);
+ dump ("a", a);
+ dump ("res", res);
+ abort ();
+ }
+ output[0] = 17;
+ output[1+in_count*size] = 17;
+
+ mpz_export (output+1, &out_count, order, size, endian, 0, a);
+ if (out_count != in_count
+ || memcmp (output+1, input, in_count * size)
+ || output[0] != 17
+ || output[1+in_count*size] != 17)
+ {
+ fprintf (stderr, "mpz_export failed:\n"
+ "in_count %lu, out_count %lu, endian = %d, order = %d\n",
+ (unsigned long) in_count, (unsigned long) out_count, endian, order);
+ dump_bytes ("input", input, in_count * size);
+ dump_bytes ("output", output+1, out_count * size);
+ if (output[0] != 17)
+ fprintf (stderr, "Overwrite at -1, value %02x\n", output[0]);
+ if (output[1+in_count*size] != 17)
+ fprintf (stderr, "Overwrite at %lu, value %02x\n",
+ (unsigned long) (in_count*size), output[1+in_count*size]);
+
+ abort ();
+ }
+ }
+ mpz_clear (a);
+ mpz_clear (res);
+}
--- /dev/null
+/*
+
+Copyright 2012, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "testutils.h"
+
+#define GMP_LIMB_BITS (sizeof(mp_limb_t) * CHAR_BIT)
+
+#define COUNT 10000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+ char *buf = mpz_get_str (NULL, 16, x);
+ fprintf (stderr, "%s: %s\n", label, buf);
+ testfree (buf);
+}
+
+void
+testmain (int argc, char **argv)
+{
+ unsigned i;
+ mpz_t u, m, p, t;
+
+ mpz_init (u);
+ mpz_init (m);
+ mpz_init (p);
+ mpz_init (t);
+
+ for (i = 0; i < COUNT; i++)
+ {
+ mini_urandomb (u, GMP_LIMB_BITS);
+ mpz_setbit (u, GMP_LIMB_BITS -1);
+
+ mpz_set_ui (m, mpn_invert_limb (u->_mp_d[0]));
+ mpz_setbit (m, GMP_LIMB_BITS);
+
+ mpz_mul (p, m, u);
+
+ mpz_set_ui (t, 0);
+ mpz_setbit (t, 2* GMP_LIMB_BITS);
+ mpz_sub (t, t, p);
+
+ /* Should have 0 < B^2 - m u <= u */
+ if (mpz_sgn (t) <= 0 || mpz_cmp (t, u) > 0)
+ {
+ fprintf (stderr, "mpn_invert_limb failed:\n");
+ dump ("u", u);
+ dump ("m", m);
+ dump ("p", p);
+ dump ("t", t);
+ abort ();
+ }
+ }
+
+ for (i = 0; i < COUNT; i++)
+ {
+ mini_urandomb (u, 2*GMP_LIMB_BITS);
+ mpz_setbit (u, 2*GMP_LIMB_BITS -1);
+
+ mpz_set_ui (m, mpn_invert_3by2 (u->_mp_d[1], u[0]._mp_d[0]));
+
+ mpz_setbit (m, GMP_LIMB_BITS);
+
+ mpz_mul (p, m, u);
+
+ mpz_set_ui (t, 0);
+ mpz_setbit (t, 3 * GMP_LIMB_BITS);
+ mpz_sub (t, t, p);
+
+ /* Should have 0 < B^3 - m u <= u */
+ if (mpz_sgn (t) <= 0 || mpz_cmp (t, u) > 0)
+ {
+ fprintf (stderr, "mpn_invert_3by2 failed:\n");
+ dump ("u", u);
+ dump ("m", m);
+ dump ("p", p);
+ dump ("t", t);
+ abort ();
+ }
+ }
+
+ mpz_clear (u);
+ mpz_clear (m);
+ mpz_clear (p);
+ mpz_clear (t);
+}
--- /dev/null
+/*
+
+Copyright 2012, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 10000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+ char *buf = mpz_get_str (NULL, 16, x);
+ fprintf (stderr, "%s: %s\n", label, buf);
+ testfree (buf);
+}
+
+void
+testmain (int argc, char **argv)
+{
+ unsigned i;
+ mpz_t a, b, g, s;
+
+ mpz_init (a);
+ mpz_init (b);
+ mpz_init (g);
+ mpz_init (s);
+
+ for (i = 0; i < COUNT; i++)
+ {
+ mini_random_op3 (OP_LCM, MAXBITS, a, b, s);
+ mpz_lcm (g, a, b);
+ if (mpz_cmp (g, s))
+ {
+ fprintf (stderr, "mpz_lcm failed:\n");
+ dump ("a", a);
+ dump ("b", b);
+ dump ("r", g);
+ dump ("ref", s);
+ abort ();
+ }
+ if (mpz_fits_ulong_p (b))
+ {
+ mpz_set_si (g, 0);
+ mpz_lcm_ui (g, a, mpz_get_ui (b));
+ if (mpz_cmp (g, s))
+ {
+ fprintf (stderr, "mpz_lcm_ui failed:\n");
+ dump ("a", a);
+ dump ("b", b);
+ dump ("r", g);
+ dump ("ref", s);
+ abort ();
+ }
+ }
+ }
+
+ mpz_clear (a);
+ mpz_clear (b);
+ mpz_clear (g);
+ mpz_clear (s);
+}
--- /dev/null
+/*
+
+Copyright 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 10000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+ char *buf = mpz_get_str (NULL, 16, x);
+ fprintf (stderr, "%s: %s\n", label, buf);
+ testfree (buf);
+}
+
+void
+testlogops (int count)
+{
+ unsigned i;
+ mpz_t a, b, res, ref;
+ mp_bitcnt_t c;
+
+ mpz_init (a);
+ mpz_init (b);
+ mpz_init (res);
+ mpz_init (ref);
+
+ for (i = 0; i < count; i++)
+ {
+ mini_random_op3 (OP_AND, MAXBITS, a, b, ref);
+ mpz_and (res, a, b);
+ if (mpz_cmp (res, ref))
+ {
+ fprintf (stderr, "mpz_and failed:\n");
+ dump ("a", a);
+ dump ("b", b);
+ dump ("r", res);
+ dump ("ref", ref);
+ abort ();
+ }
+
+ mini_random_op3 (OP_IOR, MAXBITS, a, b, ref);
+ mpz_ior (res, a, b);
+ if (mpz_cmp (res, ref))
+ {
+ fprintf (stderr, "mpz_ior failed:\n");
+ dump ("a", a);
+ dump ("b", b);
+ dump ("r", res);
+ dump ("ref", ref);
+ abort ();
+ }
+
+ mini_random_op3 (OP_XOR, MAXBITS, a, b, ref);
+ mpz_xor (res, a, b);
+ if (mpz_cmp (res, ref))
+ {
+ fprintf (stderr, "mpz_xor failed:\n");
+ dump ("a", a);
+ dump ("b", b);
+ dump ("r", res);
+ dump ("ref", ref);
+ abort ();
+ }
+
+ if (i % 8) {
+ c = 0;
+ mpz_mul_2exp (res, res, i % 8);
+ } else if (mpz_sgn (res) >= 0) {
+ c = mpz_odd_p (res) != 0;
+ mpz_tdiv_q_2exp (res, res, 1);
+ } else {
+ c = (~ (mp_bitcnt_t) 0) - 3;
+ mpz_set_ui (res, 11 << ((i >> 3)%4)); /* set 3 bits */
+ }
+
+ if (mpz_popcount (res) + c != mpz_hamdist (a, b))
+ {
+ fprintf (stderr, "mpz_popcount(r) + %lu and mpz_hamdist(a,b) differ:\n", c);
+ dump ("a", a);
+ dump ("b", b);
+ dump ("r", res);
+ fprintf (stderr, "mpz_popcount(r) = %lu:\n", mpz_popcount (res));
+ fprintf (stderr, "mpz_hamdist(a,b) = %lu:\n", mpz_hamdist (a, b));
+ abort ();
+ }
+ }
+ mpz_clear (a);
+ mpz_clear (b);
+ mpz_clear (res);
+ mpz_clear (ref);
+}
+
+void
+testmain (int argc, char **argv)
+{
+ testhalves (COUNT*2/3, testlogops);
+ testlogops (COUNT/3);
+}
--- /dev/null
+/*
+
+Copyright 2012, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 10000
+
+#define GMP_LIMB_BITS (sizeof(mp_limb_t) * CHAR_BIT)
+#define MAXLIMBS ((MAXBITS + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS)
+
+static void
+dump (const char *label, const mpz_t x)
+{
+ char *buf = mpz_get_str (NULL, 16, x);
+ fprintf (stderr, "%s: %s\n", label, buf);
+ testfree (buf);
+}
+
+void
+testmain (int argc, char **argv)
+{
+ unsigned i;
+ mpz_t a, b, res, res_ui, ref;
+ mp_limb_t t[2*MAXLIMBS];
+ mp_size_t an, rn;
+
+ mpz_init (a);
+ mpz_init (b);
+ mpz_init (res);
+ mpz_init (res_ui);
+ mpz_init (ref);
+
+ for (i = 0; i < COUNT; i++)
+ {
+ mini_random_op3 (OP_MUL, MAXBITS, a, b, ref);
+ mpz_mul (res, a, b);
+ if (mpz_cmp (res, ref))
+ {
+ fprintf (stderr, "mpz_mul failed:\n");
+ dump ("a", a);
+ dump ("b", b);
+ dump ("r", res);
+ dump ("ref", ref);
+ abort ();
+ }
+ if (mpz_size (a) == mpz_size (b))
+ {
+ memset (t, 0, sizeof(t));
+ an = mpz_size (a);
+ if (an > 0)
+ {
+ mpn_mul_n (t, a->_mp_d, b->_mp_d, an);
+ rn = 2*an - (res->_mp_d[2*an-1] == 0);
+ if (rn != mpz_size (ref) || mpn_cmp (t, ref->_mp_d, rn))
+ {
+ fprintf (stderr, "mpn_mul_n failed:\n");
+ dump ("a", a);
+ dump ("b", b);
+ dump ("ref", ref);
+ abort ();
+ }
+ }
+ }
+ if (mpz_fits_slong_p (b)) {
+ mpz_mul_si (res_ui, a, mpz_get_si (b));
+ if (mpz_cmp (res_ui, ref))
+ {
+ fprintf (stderr, "mpz_mul_si failed:\n");
+ dump ("a", a);
+ dump ("b", b);
+ dump ("r", res_ui);
+ dump ("ref", ref);
+ abort ();
+ }
+ }
+ mini_random_op2 (OP_SQR, MAXBITS, a, ref);
+ an = mpz_size (a);
+ if (an > 0)
+ {
+ memset (t, 0, sizeof(t));
+ mpn_sqr (t, a->_mp_d, an);
+
+ rn = 2*an - (t[2*an-1] == 0);
+ if (rn != mpz_size (ref) || mpn_cmp (t, ref->_mp_d, rn))
+ {
+ fprintf (stderr, "mpn (squaring) failed:\n");
+ dump ("a", a);
+ dump ("ref", ref);
+ abort ();
+ }
+ }
+ }
+ mpz_clear (a);
+ mpz_clear (b);
+ mpz_clear (res);
+ mpz_clear (res_ui);
+ mpz_clear (ref);
+}
--- /dev/null
+/*
+
+Copyright 2012, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 1000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+ char *buf = mpz_get_str (NULL, 16, x);
+ fprintf (stderr, "%s: %s\n", label, buf);
+ testfree (buf);
+}
+
+void
+testmain (int argc, char **argv)
+{
+ unsigned i;
+ mpz_t b, e, m, res, ref;
+
+ mpz_init (b);
+ mpz_init (e);
+ mpz_init (m);
+ mpz_init (res);
+ mpz_init (ref);
+
+ for (i = 0; i < COUNT; i++)
+ {
+ mini_random_op4 (OP_POWM, MAXBITS, b, e, m, ref);
+ mpz_powm (res, b, e, m);
+ if (mpz_cmp (res, ref))
+ {
+ fprintf (stderr, "mpz_powm failed:\n");
+ dump ("b", b);
+ dump ("e", e);
+ dump ("m", m);
+ dump ("r", res);
+ dump ("ref", ref);
+ abort ();
+ }
+ }
+ mpz_clear (b);
+ mpz_clear (e);
+ mpz_clear (m);
+ mpz_clear (res);
+ mpz_clear (ref);
+}
--- /dev/null
+/* Test that routines allow reusing a source variable as destination.
+
+Copyright 1996, 1999, 2000, 2001, 2002, 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "testutils.h"
+
+#define COUNT 100
+
+void dump (const char *, mpz_t, mpz_t, mpz_t);
+void mpz_check_format (const mpz_t);
+
+typedef void (*dss_func) (mpz_t, const mpz_t, const mpz_t);
+typedef void (*dsi_func) (mpz_t, const mpz_t, unsigned long int);
+typedef unsigned long int (*dsi_div_func) (mpz_t, const mpz_t, unsigned long int);
+typedef unsigned long int (*ddsi_div_func) (mpz_t, mpz_t, const mpz_t, unsigned long int);
+typedef void (*ddss_div_func) (mpz_t, mpz_t, const mpz_t, const mpz_t);
+typedef void (*ds_func) (mpz_t, const mpz_t);
+
+
+void
+mpz_xinvert (mpz_t r, const mpz_t a, const mpz_t b)
+{
+ int res;
+ res = mpz_invert (r, a, b);
+ if (res == 0)
+ mpz_set_ui (r, 0);
+}
+
+dss_func dss_funcs[] =
+{
+ mpz_add, mpz_sub, mpz_mul,
+ mpz_cdiv_q, mpz_cdiv_r, mpz_fdiv_q, mpz_fdiv_r, mpz_tdiv_q, mpz_tdiv_r,
+ mpz_xinvert,
+ mpz_gcd, mpz_lcm, mpz_and, mpz_ior, mpz_xor
+};
+const char *dss_func_names[] =
+{
+ "mpz_add", "mpz_sub", "mpz_mul",
+ "mpz_cdiv_q", "mpz_cdiv_r", "mpz_fdiv_q", "mpz_fdiv_r", "mpz_tdiv_q", "mpz_tdiv_r",
+ "mpz_xinvert",
+ "mpz_gcd", "mpz_lcm", "mpz_and", "mpz_ior", "mpz_xor"
+};
+char dss_func_division[] = {0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0};
+
+dsi_func dsi_funcs[] =
+{
+ /* Don't change order here without changing the code in main(). */
+ mpz_add_ui, mpz_mul_ui, mpz_sub_ui,
+ mpz_fdiv_q_2exp, mpz_fdiv_r_2exp,
+ mpz_cdiv_q_2exp, mpz_cdiv_r_2exp,
+ mpz_tdiv_q_2exp, mpz_tdiv_r_2exp,
+ mpz_mul_2exp,
+ mpz_pow_ui
+};
+const char *dsi_func_names[] =
+{
+ "mpz_add_ui", "mpz_mul_ui", "mpz_sub_ui",
+ "mpz_fdiv_q_2exp", "mpz_fdiv_r_2exp",
+ "mpz_cdiv_q_2exp", "mpz_cdiv_r_2exp",
+ "mpz_tdiv_q_2exp", "mpz_tdiv_r_2exp",
+ "mpz_mul_2exp",
+ "mpz_pow_ui"
+};
+
+dsi_div_func dsi_div_funcs[] =
+{
+ mpz_cdiv_q_ui, mpz_cdiv_r_ui,
+ mpz_fdiv_q_ui, mpz_fdiv_r_ui,
+ mpz_tdiv_q_ui, mpz_tdiv_r_ui
+};
+const char *dsi_div_func_names[] =
+{
+ "mpz_cdiv_q_ui", "mpz_cdiv_r_ui",
+ "mpz_fdiv_q_ui", "mpz_fdiv_r_ui",
+ "mpz_tdiv_q_ui", "mpz_tdiv_r_ui"
+};
+
+ddsi_div_func ddsi_div_funcs[] =
+{
+ mpz_cdiv_qr_ui,
+ mpz_fdiv_qr_ui,
+ mpz_tdiv_qr_ui
+};
+const char *ddsi_div_func_names[] =
+{
+ "mpz_cdiv_qr_ui",
+ "mpz_fdiv_qr_ui",
+ "mpz_tdiv_qr_ui"
+};
+
+ddss_div_func ddss_div_funcs[] =
+{
+ mpz_cdiv_qr,
+ mpz_fdiv_qr,
+ mpz_tdiv_qr
+};
+const char *ddss_div_func_names[] =
+{
+ "mpz_cdiv_qr",
+ "mpz_fdiv_qr",
+ "mpz_tdiv_qr"
+};
+
+ds_func ds_funcs[] =
+{
+ mpz_abs, mpz_com, mpz_neg, mpz_sqrt
+};
+const char *ds_func_names[] =
+{
+ "mpz_abs", "mpz_com", "mpz_neg", "mpz_sqrt"
+};
+
+
+#define FAIL(class,indx,op1,op2,op3) \
+ do { \
+ class##_funcs[indx] = 0; \
+ dump (class##_func_names[indx], op1, op2, op3); \
+ failures++; \
+ } while (0)
+#define FAIL2(fname,op1,op2,op3) \
+ do { \
+ dump (#fname, op1, op2, op3); \
+ failures++; \
+ } while (0)
+
+void
+testmain (int argc, char **argv)
+{
+ int i;
+ int pass, reps = COUNT;
+ mpz_t in1, in2, in3;
+ unsigned long int in2i;
+ mp_size_t size;
+ mpz_t res1, res2, res3;
+ mpz_t ref1, ref2, ref3;
+ mpz_t t;
+ unsigned long int r1, r2;
+ long failures = 0;
+ mpz_t bs;
+ unsigned long bsi, size_range;
+
+ mpz_init (bs);
+
+ mpz_init (in1);
+ mpz_init (in2);
+ mpz_init (in3);
+ mpz_init (ref1);
+ mpz_init (ref2);
+ mpz_init (ref3);
+ mpz_init (res1);
+ mpz_init (res2);
+ mpz_init (res3);
+ mpz_init (t);
+
+ for (pass = 1; pass <= reps; pass++)
+ {
+ mini_urandomb (bs, 32);
+ size_range = mpz_get_ui (bs) % 12 + 2;
+
+ mini_urandomb (bs, size_range);
+ size = mpz_get_ui (bs);
+ mini_rrandomb (in1, size);
+
+ mini_urandomb (bs, size_range);
+ size = mpz_get_ui (bs);
+ mini_rrandomb (in2, size);
+
+ mini_urandomb (bs, size_range);
+ size = mpz_get_ui (bs);
+ mini_rrandomb (in3, size);
+
+ mini_urandomb (bs, 3);
+ bsi = mpz_get_ui (bs);
+ if ((bsi & 1) != 0)
+ mpz_neg (in1, in1);
+ if ((bsi & 2) != 0)
+ mpz_neg (in2, in2);
+ if ((bsi & 4) != 0)
+ mpz_neg (in3, in3);
+
+ for (i = 0; i < sizeof (dss_funcs) / sizeof (dss_func); i++)
+ {
+ if (dss_funcs[i] == 0)
+ continue;
+ if (dss_func_division[i] && mpz_sgn (in2) == 0)
+ continue;
+
+ (dss_funcs[i]) (ref1, in1, in2);
+ mpz_check_format (ref1);
+
+ mpz_set (res1, in1);
+ (dss_funcs[i]) (res1, res1, in2);
+ mpz_check_format (res1);
+ if (mpz_cmp (ref1, res1) != 0)
+ FAIL (dss, i, in1, in2, NULL);
+
+ mpz_set (res1, in2);
+ (dss_funcs[i]) (res1, in1, res1);
+ mpz_check_format (res1);
+ if (mpz_cmp (ref1, res1) != 0)
+ FAIL (dss, i, in1, in2, NULL);
+ }
+
+ for (i = 0; i < sizeof (ddss_div_funcs) / sizeof (ddss_div_func); i++)
+ {
+ if (ddss_div_funcs[i] == 0)
+ continue;
+ if (mpz_sgn (in2) == 0)
+ continue;
+
+ (ddss_div_funcs[i]) (ref1, ref2, in1, in2);
+ mpz_check_format (ref1);
+ mpz_check_format (ref2);
+
+ mpz_set (res1, in1);
+ (ddss_div_funcs[i]) (res1, res2, res1, in2);
+ mpz_check_format (res1);
+ mpz_check_format (res2);
+ if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+ FAIL (ddss_div, i, in1, in2, NULL);
+
+ mpz_set (res2, in1);
+ (ddss_div_funcs[i]) (res1, res2, res2, in2);
+ mpz_check_format (res1);
+ mpz_check_format (res2);
+ if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+ FAIL (ddss_div, i, in1, in2, NULL);
+
+ mpz_set (res1, in2);
+ (ddss_div_funcs[i]) (res1, res2, in1, res1);
+ mpz_check_format (res1);
+ mpz_check_format (res2);
+ if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+ FAIL (ddss_div, i, in1, in2, NULL);
+
+ mpz_set (res2, in2);
+ (ddss_div_funcs[i]) (res1, res2, in1, res2);
+ mpz_check_format (res1);
+ mpz_check_format (res2);
+ if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+ FAIL (ddss_div, i, in1, in2, NULL);
+ }
+
+ for (i = 0; i < sizeof (ds_funcs) / sizeof (ds_func); i++)
+ {
+ if (ds_funcs[i] == 0)
+ continue;
+ if (strcmp (ds_func_names[i], "mpz_sqrt") == 0
+ && mpz_sgn (in1) < 0)
+ continue;
+
+ (ds_funcs[i]) (ref1, in1);
+ mpz_check_format (ref1);
+
+ mpz_set (res1, in1);
+ (ds_funcs[i]) (res1, res1);
+ mpz_check_format (res1);
+ if (mpz_cmp (ref1, res1) != 0)
+ FAIL (ds, i, in1, in2, NULL);
+ }
+
+ in2i = mpz_get_ui (in2);
+
+ for (i = 0; i < sizeof (dsi_funcs) / sizeof (dsi_func); i++)
+ {
+ if (dsi_funcs[i] == 0)
+ continue;
+ if (strcmp (dsi_func_names[i], "mpz_fdiv_q_2exp") == 0)
+ /* Limit exponent to something reasonable for the division
+ functions. Without this, we'd normally shift things off
+ the end and just generate the trivial values 1, 0, -1. */
+ in2i %= 0x1000;
+ if (strcmp (dsi_func_names[i], "mpz_mul_2exp") == 0)
+ /* Limit exponent more for mpz_mul_2exp to save time. */
+ in2i %= 0x100;
+ if (strcmp (dsi_func_names[i], "mpz_pow_ui") == 0)
+ /* Limit exponent yet more for mpz_pow_ui to save time. */
+ in2i %= 0x10;
+
+ (dsi_funcs[i]) (ref1, in1, in2i);
+ mpz_check_format (ref1);
+
+ mpz_set (res1, in1);
+ (dsi_funcs[i]) (res1, res1, in2i);
+ mpz_check_format (res1);
+ if (mpz_cmp (ref1, res1) != 0)
+ FAIL (dsi, i, in1, in2, NULL);
+ }
+
+ if (in2i != 0) /* Don't divide by 0. */
+ {
+ for (i = 0; i < sizeof (dsi_div_funcs) / sizeof (dsi_div_funcs); i++)
+ {
+ r1 = (dsi_div_funcs[i]) (ref1, in1, in2i);
+ mpz_check_format (ref1);
+
+ mpz_set (res1, in1);
+ r2 = (dsi_div_funcs[i]) (res1, res1, in2i);
+ mpz_check_format (res1);
+ if (mpz_cmp (ref1, res1) != 0 || r1 != r2)
+ FAIL (dsi_div, i, in1, in2, NULL);
+ }
+
+ for (i = 0; i < sizeof (ddsi_div_funcs) / sizeof (ddsi_div_funcs); i++)
+ {
+ r1 = (ddsi_div_funcs[i]) (ref1, ref2, in1, in2i);
+ mpz_check_format (ref1);
+
+ mpz_set (res1, in1);
+ r2 = (ddsi_div_funcs[i]) (res1, res2, res1, in2i);
+ mpz_check_format (res1);
+ if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0 || r1 != r2)
+ FAIL (ddsi_div, i, in1, in2, NULL);
+
+ mpz_set (res2, in1);
+ (ddsi_div_funcs[i]) (res1, res2, res2, in2i);
+ mpz_check_format (res1);
+ if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0 || r1 != r2)
+ FAIL (ddsi_div, i, in1, in2, NULL);
+ }
+ }
+
+ if (mpz_sgn (in1) >= 0)
+ {
+ mpz_sqrtrem (ref1, ref2, in1);
+ mpz_check_format (ref1);
+ mpz_check_format (ref2);
+
+ mpz_set (res1, in1);
+ mpz_sqrtrem (res1, res2, res1);
+ mpz_check_format (res1);
+ mpz_check_format (res2);
+ if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+ FAIL2 (mpz_sqrtrem, in1, NULL, NULL);
+
+ mpz_set (res2, in1);
+ mpz_sqrtrem (res1, res2, res2);
+ mpz_check_format (res1);
+ mpz_check_format (res2);
+ if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+ FAIL2 (mpz_sqrtrem, in1, NULL, NULL);
+ }
+
+ if (mpz_sgn (in1) >= 0)
+ {
+ mpz_root (ref1, in1, in2i % 0x1000 + 1);
+ mpz_check_format (ref1);
+
+ mpz_set (res1, in1);
+ mpz_root (res1, res1, in2i % 0x1000 + 1);
+ mpz_check_format (res1);
+ if (mpz_cmp (ref1, res1) != 0)
+ FAIL2 (mpz_root, in1, in2, NULL);
+ }
+
+ if (mpz_sgn (in1) >= 0)
+ {
+ mpz_rootrem (ref1, ref2, in1, in2i % 0x1000 + 1);
+ mpz_check_format (ref1);
+ mpz_check_format (ref2);
+
+ mpz_set (res1, in1);
+ mpz_rootrem (res1, res2, res1, in2i % 0x1000 + 1);
+ mpz_check_format (res1);
+ mpz_check_format (res2);
+ if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+ FAIL2 (mpz_rootrem, in1, in2, NULL);
+
+ mpz_set (res2, in1);
+ mpz_rootrem (res1, res2, res2, in2i % 0x1000 + 1);
+ mpz_check_format (res1);
+ mpz_check_format (res2);
+ if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
+ FAIL2 (mpz_rootrem, in1, in2, NULL);
+ }
+
+ if (pass < reps / 2) /* run fewer tests since gcdext lots of time */
+ {
+ mpz_gcdext (ref1, ref2, ref3, in1, in2);
+ mpz_check_format (ref1);
+ mpz_check_format (ref2);
+ mpz_check_format (ref3);
+
+ mpz_set (res1, in1);
+ mpz_gcdext (res1, res2, res3, res1, in2);
+ mpz_check_format (res1);
+ mpz_check_format (res2);
+ mpz_check_format (res3);
+ if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+ || mpz_cmp (ref3, res3) != 0)
+ FAIL2 (mpz_gcdext, in1, in2, NULL);
+
+ mpz_set (res2, in1);
+ mpz_gcdext (res1, res2, res3, res2, in2);
+ mpz_check_format (res1);
+ mpz_check_format (res2);
+ mpz_check_format (res3);
+ if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+ || mpz_cmp (ref3, res3) != 0)
+ FAIL2 (mpz_gcdext, in1, in2, NULL);
+
+ mpz_set (res3, in1);
+ mpz_gcdext (res1, res2, res3, res3, in2);
+ mpz_check_format (res1);
+ mpz_check_format (res2);
+ mpz_check_format (res3);
+ if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+ || mpz_cmp (ref3, res3) != 0)
+ FAIL2 (mpz_gcdext, in1, in2, NULL);
+
+ mpz_set (res1, in2);
+ mpz_gcdext (res1, res2, res3, in1, res1);
+ mpz_check_format (res1);
+ mpz_check_format (res2);
+ mpz_check_format (res3);
+ if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+ || mpz_cmp (ref3, res3) != 0)
+ FAIL2 (mpz_gcdext, in1, in2, NULL);
+
+ mpz_set (res2, in2);
+ mpz_gcdext (res1, res2, res3, in1, res2);
+ mpz_check_format (res1);
+ mpz_check_format (res2);
+ mpz_check_format (res3);
+ if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+ || mpz_cmp (ref3, res3) != 0)
+ FAIL2 (mpz_gcdext, in1, in2, NULL);
+
+ mpz_set (res3, in2);
+ mpz_gcdext (res1, res2, res3, in1, res3);
+ mpz_check_format (res1);
+ mpz_check_format (res2);
+ mpz_check_format (res3);
+ if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+ || mpz_cmp (ref3, res3) != 0)
+ FAIL2 (mpz_gcdext, in1, in2, NULL);
+
+ mpz_set (res1, in1);
+ mpz_gcdext (res1, res2, NULL, res1, in2);
+ mpz_check_format (res1);
+ mpz_check_format (res2);
+ if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+ || mpz_cmp (ref3, res3) != 0)
+ FAIL2 (mpz_gcdext, in1, in2, NULL);
+
+ mpz_set (res2, in1);
+ mpz_gcdext (res1, res2, NULL, res2, in2);
+ mpz_check_format (res1);
+ mpz_check_format (res2);
+ if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+ || mpz_cmp (ref3, res3) != 0)
+ FAIL2 (mpz_gcdext, in1, in2, NULL);
+
+ mpz_set (res1, in2);
+ mpz_gcdext (res1, res2, NULL, in1, res1);
+ mpz_check_format (res1);
+ mpz_check_format (res2);
+ if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+ || mpz_cmp (ref3, res3) != 0)
+ FAIL2 (mpz_gcdext, in1, in2, NULL);
+
+ mpz_set (res2, in2);
+ mpz_gcdext (res1, res2, NULL, in1, res2);
+ mpz_check_format (res1);
+ mpz_check_format (res2);
+ if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0
+ || mpz_cmp (ref3, res3) != 0)
+ FAIL2 (mpz_gcdext, in1, in2, NULL);
+ }
+
+ /* Don't run mpz_powm for huge exponents or when undefined. */
+ if (mpz_sizeinbase (in2, 2) < 250 && mpz_sgn (in3) != 0
+ && (mpz_sgn (in2) >= 0 || mpz_invert (t, in1, in3)))
+ {
+ mpz_powm (ref1, in1, in2, in3);
+ mpz_check_format (ref1);
+
+ mpz_set (res1, in1);
+ mpz_powm (res1, res1, in2, in3);
+ mpz_check_format (res1);
+ if (mpz_cmp (ref1, res1) != 0)
+ FAIL2 (mpz_powm, in1, in2, in3);
+
+ mpz_set (res1, in2);
+ mpz_powm (res1, in1, res1, in3);
+ mpz_check_format (res1);
+ if (mpz_cmp (ref1, res1) != 0)
+ FAIL2 (mpz_powm, in1, in2, in3);
+
+ mpz_set (res1, in3);
+ mpz_powm (res1, in1, in2, res1);
+ mpz_check_format (res1);
+ if (mpz_cmp (ref1, res1) != 0)
+ FAIL2 (mpz_powm, in1, in2, in3);
+ }
+
+ /* Don't run mpz_powm_ui when undefined. */
+ if (mpz_sgn (in3) != 0)
+ {
+ mpz_powm_ui (ref1, in1, in2i, in3);
+ mpz_check_format (ref1);
+
+ mpz_set (res1, in1);
+ mpz_powm_ui (res1, res1, in2i, in3);
+ mpz_check_format (res1);
+ if (mpz_cmp (ref1, res1) != 0)
+ FAIL2 (mpz_powm_ui, in1, in2, in3);
+
+ mpz_set (res1, in3);
+ mpz_powm_ui (res1, in1, in2i, res1);
+ mpz_check_format (res1);
+ if (mpz_cmp (ref1, res1) != 0)
+ FAIL2 (mpz_powm_ui, in1, in2, in3);
+ }
+
+ {
+ r1 = mpz_gcd_ui (ref1, in1, in2i);
+ mpz_check_format (ref1);
+
+ mpz_set (res1, in1);
+ r2 = mpz_gcd_ui (res1, res1, in2i);
+ mpz_check_format (res1);
+ if (mpz_cmp (ref1, res1) != 0)
+ FAIL2 (mpz_gcd_ui, in1, in2, NULL);
+ }
+#if 0
+ if (mpz_cmp_ui (in2, 1L) > 0 && mpz_sgn (in1) != 0)
+ {
+ /* Test mpz_remove */
+ mpz_remove (ref1, in1, in2);
+ mpz_check_format (ref1);
+
+ mpz_set (res1, in1);
+ mpz_remove (res1, res1, in2);
+ mpz_check_format (res1);
+ if (mpz_cmp (ref1, res1) != 0)
+ FAIL2 (mpz_remove, in1, in2, NULL);
+
+ mpz_set (res1, in2);
+ mpz_remove (res1, in1, res1);
+ mpz_check_format (res1);
+ if (mpz_cmp (ref1, res1) != 0)
+ FAIL2 (mpz_remove, in1, in2, NULL);
+ }
+#endif
+ if (mpz_sgn (in2) != 0)
+ {
+ /* Test mpz_divexact */
+ mpz_mul (t, in1, in2);
+ mpz_divexact (ref1, t, in2);
+ mpz_check_format (ref1);
+
+ mpz_set (res1, t);
+ mpz_divexact (res1, res1, in2);
+ mpz_check_format (res1);
+ if (mpz_cmp (ref1, res1) != 0)
+ FAIL2 (mpz_divexact, t, in2, NULL);
+
+ mpz_set (res1, in2);
+ mpz_divexact (res1, t, res1);
+ mpz_check_format (res1);
+ if (mpz_cmp (ref1, res1) != 0)
+ FAIL2 (mpz_divexact, t, in2, NULL);
+ }
+
+#if 0
+ if (mpz_sgn (in2) > 0)
+ {
+ /* Test mpz_divexact_gcd, same as mpz_divexact */
+ mpz_mul (t, in1, in2);
+ mpz_divexact_gcd (ref1, t, in2);
+ mpz_check_format (ref1);
+
+ mpz_set (res1, t);
+ mpz_divexact_gcd (res1, res1, in2);
+ mpz_check_format (res1);
+ if (mpz_cmp (ref1, res1) != 0)
+ FAIL2 (mpz_divexact_gcd, t, in2, NULL);
+
+ mpz_set (res1, in2);
+ mpz_divexact_gcd (res1, t, res1);
+ mpz_check_format (res1);
+ if (mpz_cmp (ref1, res1) != 0)
+ FAIL2 (mpz_divexact_gcd, t, in2, NULL);
+ }
+#endif
+ }
+
+ if (failures != 0)
+ {
+ fprintf (stderr, "mpz/reuse: %ld error%s\n", failures, "s" + (failures == 1));
+ exit (1);
+ }
+
+ mpz_clear (bs);
+ mpz_clear (in1);
+ mpz_clear (in2);
+ mpz_clear (in3);
+ mpz_clear (ref1);
+ mpz_clear (ref2);
+ mpz_clear (ref3);
+ mpz_clear (res1);
+ mpz_clear (res2);
+ mpz_clear (res3);
+ mpz_clear (t);
+}
+
+void
+dump (const char *name, mpz_t in1, mpz_t in2, mpz_t in3)
+{
+ printf ("failure in %s (", name);
+ mpz_out_str (stdout, -16, in1);
+ if (in2 != NULL)
+ {
+ printf (" ");
+ mpz_out_str (stdout, -16, in2);
+ }
+ if (in3 != NULL)
+ {
+ printf (" ");
+ mpz_out_str (stdout, -16, in3);
+ }
+ printf (")\n");
+}
+
+void
+mpz_check_format (const mpz_t x)
+{
+ mp_size_t n = x ->_mp_size;
+ if (n < 0)
+ n = - n;
+
+ if (n > x->_mp_alloc)
+ {
+ fprintf (stderr, "mpz_t size exceeds allocation!\n");
+ abort ();
+ }
+
+ if (n > 0 && x->_mp_d[n-1] == 0)
+ {
+ fprintf (stderr, "Unnormalized mpz_t!\n");
+ abort ();
+ }
+}
--- /dev/null
+/*
+
+Copyright 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 10000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+ char *buf = mpz_get_str (NULL, 16, x);
+ fprintf (stderr, "%s: %s\n", label, buf);
+ testfree (buf);
+}
+
+/* Called when s is supposed to be floor(root(u,z)), and r = u - s^z */
+static int
+rootrem_valid_p (const mpz_t u, const mpz_t s, const mpz_t r, unsigned long z)
+{
+ mpz_t t;
+
+ mpz_init (t);
+ if (mpz_fits_ulong_p (s))
+ mpz_ui_pow_ui (t, mpz_get_ui (s), z);
+ else
+ mpz_pow_ui (t, s, z);
+ mpz_sub (t, u, t);
+ if (mpz_sgn (t) != mpz_sgn(u) || mpz_cmp (t, r) != 0)
+ {
+ mpz_clear (t);
+ return 0;
+ }
+ if (mpz_sgn (s) > 0)
+ mpz_add_ui (t, s, 1);
+ else
+ mpz_sub_ui (t, s, 1);
+ mpz_pow_ui (t, t, z);
+ if (mpz_cmpabs (t, u) <= 0)
+ {
+ mpz_clear (t);
+ return 0;
+ }
+
+ mpz_clear (t);
+ return 1;
+}
+
+void
+testmain (int argc, char **argv)
+{
+ unsigned i;
+ unsigned long e;
+ mpz_t u, s, r, bs;
+
+ mpz_init (u);
+ mpz_init (s);
+ mpz_init (r);
+ mpz_init (bs);
+
+ for (i = 0; i < COUNT; i++)
+ {
+ mini_rrandomb (u, MAXBITS);
+ mini_rrandomb (bs, 12);
+ e = mpz_getlimbn (bs, 0) % mpz_sizeinbase (u, 2) + 2;
+ if ((e & 1) && (mpz_getlimbn (bs, 0) & (1L<<10)))
+ mpz_neg (u, u);
+ mpz_rootrem (s, r, u, e);
+
+ if (!rootrem_valid_p (u, s, r, e))
+ {
+ fprintf (stderr, "mpz_rootrem(%lu-th) failed:\n", e);
+ dump ("u", u);
+ dump ("root", s);
+ dump ("rem", r);
+ abort ();
+ }
+ }
+ mpz_clear (bs);
+ mpz_clear (u);
+ mpz_clear (s);
+ mpz_clear (r);
+}
--- /dev/null
+/*
+
+Copyright 2012, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 10000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+ char *buf = mpz_get_str (NULL, 16, x);
+ fprintf (stderr, "%s: %s\n", label, buf);
+ testfree (buf);
+}
+
+void
+testmain (int argc, char **argv)
+{
+ unsigned i;
+ mpz_t a;
+ mp_bitcnt_t b, res, ref;
+
+ mpz_init (a);
+
+ for (i = 0; i < COUNT; i++)
+ {
+ mini_random_scan_op (OP_SCAN0, MAXBITS, a, &b, &ref);
+ res = mpz_scan0 (a, b);
+ if (res != ref)
+ {
+ fprintf (stderr, "mpz_scan0 failed:\n");
+ dump ("a", a);
+ fprintf (stderr, "b: %lu\n", b);
+ fprintf (stderr, "r: %lu\n", res);
+ fprintf (stderr, "ref: %lu\n", ref);
+ abort ();
+ }
+ mini_random_scan_op (OP_SCAN1, MAXBITS, a, &b, &ref);
+ res = mpz_scan1 (a, b);
+ if (res != ref)
+ {
+ fprintf (stderr, "mpz_scan1 failed:\n");
+ dump ("a", a);
+ fprintf (stderr, "b: %lu\n", b);
+ fprintf (stderr, "r: %lu\n", res);
+ fprintf (stderr, "ref: %lu\n", ref);
+ abort ();
+ }
+ }
+ mpz_clear (a);
+}
--- /dev/null
+/* Exercise some mpz_..._si functions.
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "testutils.h"
+
+int
+check_si (mpz_t sz, mpz_t oz, long si, long oi, int c)
+{
+ mpz_t t;
+ int fail;
+
+ if (mpz_cmp_si (sz, oi) != c)
+ {
+ printf ("mpz_cmp_si (sz, %ld) != %i.\n", oi, c);
+ printf (" sz="); mpz_out_str (stdout, 10, sz); printf ("\n");
+ abort ();
+ }
+
+ if ((si < oi ? -1 : si > oi) != c)
+ return 1;
+
+ mpz_init_set_si (t, si);
+
+ if ((fail = mpz_cmp_si (sz, si)) != 0)
+ printf ("mpz_cmp_si (sz, %ld) != 0.\n", si);
+ if (mpz_cmp_si (oz, si) != -c)
+ printf ("mpz_cmp_si (oz, %ld) != %i.\n", si, -c), fail = 1;
+ if (! mpz_fits_slong_p (sz))
+ printf ("mpz_fits_slong_p (sz) != 1.\n"), fail = 1;
+ if (mpz_get_si (sz) != si)
+ printf ("mpz_get_si (sz) != %ld.\n", si), fail = 1;
+ if (mpz_cmp (t, sz) != 0)
+ {
+ printf ("mpz_init_set_si (%ld) failed.\n", si);
+ printf (" got="); mpz_out_str (stdout, 10, t); printf ("\n");
+ fail = 1;
+ }
+
+ mpz_clear (t);
+
+ if (fail)
+ {
+ printf (" sz="); mpz_out_str (stdout, 10, sz); printf ("\n");
+ printf (" oz="); mpz_out_str (stdout, 10, oz); printf ("\n");
+ printf (" si=%ld\n", si);
+ abort ();
+ }
+
+ return 0;
+}
+
+void
+try_op_si (int c)
+{
+ long si, oi;
+ mpz_t sz, oz;
+
+ si = c;
+ mpz_init_set_si (sz, si);
+
+ oi = si;
+ mpz_init_set (oz, sz);
+
+ do {
+ si *= 2; /* c * 2^k */
+ mpz_mul_2exp (sz, sz, 1);
+
+ if (check_si (sz, oz, si, oi, c))
+ {
+ mpz_set (oz, sz);
+ break;
+ }
+
+ oi = si + c; /* c * (2^k + 1) */
+ if (c == -1)
+ mpz_sub_ui (oz, sz, 1);
+ else
+ mpz_add_ui (oz, sz, 1);
+
+ if (check_si (oz, sz, oi, si, c))
+ break;
+
+ oi = (si - c) * 2 + c; /* c * (2^K - 1) */
+ mpz_mul_si (oz, sz, 2*c);
+ if (c == -1)
+ mpz_ui_sub (oz, 1, oz); /* oz = sz * 2 + 1 */
+ else
+ mpz_sub_ui (oz, oz, 1); /* oz = sz * 2 - 1 */
+ } while (check_si (oz, sz, oi, si, c) == 0);
+
+ mpz_clear (sz);
+
+ if (mpz_fits_slong_p (oz))
+ {
+ printf ("Should not fit a signed long any more.\n");
+ printf (" oz="); mpz_out_str (stdout, 10, oz); printf ("\n");
+ abort ();
+ }
+
+ if (mpz_cmp_si (oz, -c) != c)
+ {
+ printf ("mpz_cmp_si (oz, %i) != %i.\n", c, c);
+ printf (" oz="); mpz_out_str (stdout, 10, oz); printf ("\n");
+ abort ();
+ }
+
+ mpz_mul_2exp (oz, oz, 1);
+ if (mpz_cmp_si (oz, -c) != c)
+ {
+ printf ("mpz_cmp_si (oz, %i) != %i.\n", c, c);
+ printf (" oz="); mpz_out_str (stdout, 10, oz); printf ("\n");
+ abort ();
+ }
+
+ mpz_clear (oz);
+}
+
+void
+testmain (int argc, char *argv[])
+{
+ try_op_si (-1);
+ try_op_si (1);
+}
--- /dev/null
+/*
+
+Copyright 2012, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 10000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+ char *buf = mpz_get_str (NULL, 16, x);
+ fprintf (stderr, "%s: %s\n", label, buf);
+ testfree (buf);
+}
+
+/* Called when s is supposed to be floor(sqrt(u)), and r = u - s^2 */
+static int
+sqrtrem_valid_p (const mpz_t u, const mpz_t s, const mpz_t r)
+{
+ mpz_t t;
+
+ mpz_init (t);
+ mpz_mul (t, s, s);
+ mpz_sub (t, u, t);
+ if (mpz_sgn (t) < 0 || mpz_cmp (t, r) != 0)
+ {
+ mpz_clear (t);
+ return 0;
+ }
+ mpz_add_ui (t, s, 1);
+ mpz_mul (t, t, t);
+ if (mpz_cmp (t, u) <= 0)
+ {
+ mpz_clear (t);
+ return 0;
+ }
+
+ mpz_clear (t);
+ return 1;
+}
+
+void
+testmain (int argc, char **argv)
+{
+ unsigned i;
+ mpz_t u, s, r;
+
+ mpz_init (u);
+ mpz_init (s);
+ mpz_init (r);
+
+ for (i = 0; i < COUNT; i++)
+ {
+ mini_rrandomb (u, MAXBITS);
+ mpz_sqrtrem (s, r, u);
+
+ if (!sqrtrem_valid_p (u, s, r))
+ {
+ fprintf (stderr, "mpz_sqrtrem failed:\n");
+ dump ("u", u);
+ dump ("sqrt", s);
+ dump ("rem", r);
+ abort ();
+ }
+ }
+ mpz_clear (u);
+ mpz_clear (s);
+ mpz_clear (r);
+}
--- /dev/null
+/*
+
+Copyright 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <assert.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 2000
+
+#define GMP_LIMB_BITS (sizeof(mp_limb_t) * CHAR_BIT)
+#define MAXLIMBS ((MAXBITS + GMP_LIMB_BITS - 1) / GMP_LIMB_BITS)
+
+static void
+dump (const char *label, const mpz_t x)
+{
+ char *buf = mpz_get_str (NULL, 16, x);
+ fprintf (stderr, "%s: %s\n", label, buf);
+ testfree (buf);
+}
+
+static void
+test_small (void)
+{
+ struct {
+ const char *input;
+ const char *decimal;
+ } data[] = {
+ { "183407", "183407" },
+ { " 763959", "763959" },
+ { "9 81999", "981999" },
+ { "10\t7398", "107398" },
+ { "-9585 44", "-00958544" },
+ { "-0", "0000" },
+ { " -000 ", "0" },
+ { "0704436", "231710" },
+ { " 02503517", "689999" },
+ { "0 1312143", "365667" },
+ { "-03 274062", "-882738" },
+ { "012\t242", "005282" },
+ { "0b11010111110010001111", "883855" },
+ { " 0b11001010010100001", "103585" },
+ { "-0b101010110011101111", "-175343" },
+ { "0b 1111111011011100110", "521958" },
+ { "0b1 1111110111001000011", "1044035" },
+ { " 0x53dfc", "343548" },
+ { "0xfA019", "1024025" },
+ { "0x 642d1", "410321" },
+ { "0x5 8067", "360551" },
+ { "-0xd6Be6", "-879590" },
+ { "\t0B1110000100000000011", "460803" },
+ { "0B\t1111110010010100101", "517285" },
+ { "0B1\t010111101101110100", "359284" },
+ { "-0B101\t1001101111111001", "-367609" },
+ { "0B10001001010111110000", "562672" },
+ { "0Xe4B7e", "936830" },
+ { "0X1E4bf", "124095" },
+ { "-0Xfdb90", "-1039248" },
+ { "0X7fc47", "523335" },
+ { "0X8167c", "530044" },
+ /* Some invalid inputs */
+ { "0ab", NULL },
+ { "10x0", NULL },
+ { "0xxab", NULL },
+ { "ab", NULL },
+ { "0%#", NULL },
+ { "$foo", NULL },
+ { NULL, NULL }
+ };
+ unsigned i;
+ mpz_t a, b;
+ mpz_init (b);
+
+ for (i = 0; data[i].input; i++)
+ {
+ int res = mpz_init_set_str (a, data[i].input, 0);
+ if (data[i].decimal)
+ {
+ if (res != 0)
+ {
+ fprintf (stderr, "mpz_set_str returned -1, input: %s\n",
+ data[i].input);
+ abort ();
+ }
+ if (mpz_set_str (b, data[i].decimal, 10) != 0)
+ {
+ fprintf (stderr, "mpz_set_str returned -1, decimal input: %s\n",
+ data[i].input);
+ abort ();
+ }
+ if (mpz_cmp (a, b) != 0)
+ {
+ fprintf (stderr, "mpz_set_str failed for input: %s\n",
+ data[i].input);
+
+ dump ("got", a);
+ dump ("ref", b);
+ abort ();
+ }
+ }
+ else if (res != -1)
+ {
+ fprintf (stderr, "mpz_set_str returned %d, invalid input: %s\n",
+ res, data[i].input);
+ abort ();
+ }
+ mpz_clear (a);
+ }
+
+ mpz_clear (b);
+}
+
+void
+testmain (int argc, char **argv)
+{
+ unsigned i;
+ char *ap;
+ char *bp;
+ char *rp;
+ size_t bn, rn, arn;
+
+ mpz_t a, b;
+
+ FILE *tmp;
+
+ test_small ();
+
+ mpz_init (a);
+ mpz_init (b);
+
+ tmp = tmpfile ();
+ if (!tmp)
+ fprintf (stderr,
+ "Failed to create temporary file. Skipping mpz_out_str tests.\n");
+
+ for (i = 0; i < COUNT; i++)
+ {
+ int base;
+ for (base = 0; base <= 36; base += 1 + (base == 0))
+ {
+ hex_random_str_op (MAXBITS, i&1 ? base: -base, &ap, &rp);
+ if (mpz_set_str (a, ap, 16) != 0)
+ {
+ fprintf (stderr, "mpz_set_str failed on input %s\n", ap);
+ abort ();
+ }
+
+ rn = strlen (rp);
+ arn = rn - (rp[0] == '-');
+
+ bn = mpz_sizeinbase (a, base ? base : 10);
+ if (bn < arn || bn > (arn + 1))
+ {
+ fprintf (stderr, "mpz_sizeinbase failed:\n");
+ dump ("a", a);
+ fprintf (stderr, "r = %s\n", rp);
+ fprintf (stderr, " base %d, correct size %u, got %u\n",
+ base, (unsigned) arn, (unsigned)bn);
+ abort ();
+ }
+ bp = mpz_get_str (NULL, i&1 ? base: -base, a);
+ if (strcmp (bp, rp))
+ {
+ fprintf (stderr, "mpz_get_str failed:\n");
+ dump ("a", a);
+ fprintf (stderr, "b = %s\n", bp);
+ fprintf (stderr, " base = %d\n", base);
+ fprintf (stderr, "r = %s\n", rp);
+ abort ();
+ }
+
+ /* Just a few tests with file i/o. */
+ if (tmp && i < 20)
+ {
+ size_t tn;
+ rewind (tmp);
+ tn = mpz_out_str (tmp, i&1 ? base: -base, a);
+ if (tn != rn)
+ {
+ fprintf (stderr, "mpz_out_str, bad return value:\n");
+ dump ("a", a);
+ fprintf (stderr, "r = %s\n", rp);
+ fprintf (stderr, " base %d, correct size %u, got %u\n",
+ base, (unsigned) rn, (unsigned)tn);
+ abort ();
+ }
+ rewind (tmp);
+ memset (bp, 0, rn);
+ tn = fread (bp, 1, rn, tmp);
+ if (tn != rn)
+ {
+ fprintf (stderr,
+ "fread failed, expected %lu bytes, got only %lu.\n",
+ (unsigned long) rn, (unsigned long) tn);
+ abort ();
+ }
+
+ if (memcmp (bp, rp, rn) != 0)
+ {
+ fprintf (stderr, "mpz_out_str failed:\n");
+ dump ("a", a);
+ fprintf (stderr, "b = %s\n", bp);
+ fprintf (stderr, " base = %d\n", base);
+ fprintf (stderr, "r = %s\n", rp);
+ abort ();
+ }
+ }
+
+ mpz_set_str (b, rp, base);
+
+ if (mpz_cmp (a, b))
+ {
+ fprintf (stderr, "mpz_set_str failed:\n");
+ fprintf (stderr, "r = %s\n", rp);
+ fprintf (stderr, " base = %d\n", base);
+ fprintf (stderr, "r = %s\n", ap);
+ fprintf (stderr, " base = 16\n");
+ dump ("b", b);
+ dump ("r", a);
+ abort ();
+ }
+
+ /* Test mpn interface */
+ if (base && mpz_sgn (a))
+ {
+ size_t i;
+ const char *absr;
+ mp_limb_t t[MAXLIMBS];
+ mp_size_t tn = mpz_size (a);
+
+ assert (tn <= MAXLIMBS);
+ mpn_copyi (t, a->_mp_d, tn);
+
+ bn = mpn_get_str (bp, base, t, tn);
+ if (bn != arn)
+ {
+ fprintf (stderr, "mpn_get_str failed:\n");
+ fprintf (stderr, "returned length: %lu (bad)\n", (unsigned long) bn);
+ fprintf (stderr, "expected: %lu\n", (unsigned long) arn);
+ fprintf (stderr, " base = %d\n", base);
+ fprintf (stderr, "r = %s\n", ap);
+ fprintf (stderr, " base = 16\n");
+ dump ("b", b);
+ dump ("r", a);
+ abort ();
+ }
+ absr = rp + (rp[0] == '-');
+
+ for (i = 0; i < bn; i++)
+ {
+ unsigned char digit = absr[i];
+ unsigned value;
+ if (digit >= '0' && digit <= '9')
+ value = digit - '0';
+ else if (digit >= 'a' && digit <= 'z')
+ value = digit - 'a' + 10;
+ else if (digit >= 'A' && digit <= 'Z')
+ value = digit - 'A' + 10;
+ else
+ {
+ fprintf (stderr, "Internal error in test.\n");
+ abort();
+ }
+ if (bp[i] != value)
+ {
+ fprintf (stderr, "mpn_get_str failed:\n");
+ fprintf (stderr, "digit %lu: %d (bad)\n", (unsigned long) i, bp[i]);
+ fprintf (stderr, "expected: %d\n", value);
+ fprintf (stderr, " base = %d\n", base);
+ fprintf (stderr, "r = %s\n", ap);
+ fprintf (stderr, " base = 16\n");
+ dump ("b", b);
+ dump ("r", a);
+ abort ();
+ }
+ }
+ tn = mpn_set_str (t, bp, bn, base);
+ if (tn != mpz_size (a) || mpn_cmp (t, a->_mp_d, tn))
+ {
+ fprintf (stderr, "mpn_set_str failed:\n");
+ fprintf (stderr, "r = %s\n", rp);
+ fprintf (stderr, " base = %d\n", base);
+ fprintf (stderr, "r = %s\n", ap);
+ fprintf (stderr, " base = 16\n");
+ dump ("r", a);
+ abort ();
+ }
+ }
+ free (ap);
+ testfree (bp);
+ }
+ }
+ mpz_clear (a);
+ mpz_clear (b);
+}
--- /dev/null
+/*
+
+Copyright 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "testutils.h"
+
+#define MAXBITS 400
+#define COUNT 10000
+
+static void
+dump (const char *label, const mpz_t x)
+{
+ char *buf = mpz_get_str (NULL, 16, x);
+ fprintf (stderr, "%s: %s\n", label, buf);
+ free (buf);
+}
+
+void
+testmain (int argc, char **argv)
+{
+ unsigned i;
+ mpz_t a, b, res, res_ui, ref;
+
+ mpz_init (a);
+ mpz_init (b);
+ mpz_init (res);
+ mpz_init (res_ui);
+ mpz_init (ref);
+
+ for (i = 0; i < COUNT; i++)
+ {
+ mini_random_op3 (OP_SUB, MAXBITS, a, b, ref);
+ mpz_sub (res, a, b);
+ if (mpz_cmp (res, ref))
+ {
+ fprintf (stderr, "mpz_sub failed:\n");
+ dump ("a", a);
+ dump ("b", b);
+ dump ("r", res);
+ dump ("ref", ref);
+ abort ();
+ }
+ if (mpz_fits_ulong_p (a)) {
+ mpz_ui_sub (res_ui, mpz_get_ui (a), b);
+ if (mpz_cmp (res_ui, ref))
+ {
+ fprintf (stderr, "mpz_ui_sub failed:\n");
+ dump ("a", a);
+ dump ("b", b);
+ dump ("r", res_ui);
+ dump ("ref", ref);
+ abort ();
+ }
+ }
+ }
+ mpz_clear (a);
+ mpz_clear (b);
+ mpz_clear (res);
+ mpz_clear (res_ui);
+ mpz_clear (ref);
+}
--- /dev/null
+/*
+
+Copyright 2013, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include "testutils.h"
+
+/* Include it here, so we we could tweak, e.g., how MPZ_REALLOC
+ works. */
+#include "../mini-gmp.c"
+
+static size_t total_alloc = 0;
+
+/* Custom memory allocation to track memory usage, and add a small red
+ zone.
+
+ About alignment: In general, getting a block from malloc, and
+ incrementing it by sizeof(size_t), like we do here, might give a
+ pointer which is not properly aligned for all types. But the
+ largest type we allocate space for is unsigned long (mp_limb_t),
+ which shouldn't have stricter alignment requirements than
+ size_t. */
+
+static char block_end[8] =
+ { 0x7c, 0x37, 0xd6, 0x12, 0xa8, 0x6c, 0x01, 0xd1 };
+
+static void *
+block_init (size_t *block, size_t size)
+{
+ char *p;
+ *block++ = size;
+
+ p = (char *) block;
+ memcpy (p + size, block_end, sizeof(block_end));
+
+ total_alloc += size;
+ return p;
+}
+
+/* Check small redzone, return pointer to malloced block. */
+static size_t *
+block_check (char *p)
+{
+ size_t *block = (size_t *) p - 1;
+ size_t size = block[0];
+
+ if (memcmp (p + size, block_end, sizeof(block_end)) != 0)
+ {
+ fprintf (stderr, "red zone overwritten.\n");
+ abort ();
+ }
+ total_alloc -= size;
+ return block;
+}
+
+static void *
+tu_alloc (size_t size)
+{
+ size_t *block = malloc (sizeof(size_t) + size + sizeof(block_end));
+ if (!block)
+ {
+ fprintf (stderr, "Virtual memory exhausted.\n");
+ abort ();
+ }
+
+ return block_init (block, size);
+}
+
+static void *
+tu_realloc (void *p, size_t old_size, size_t new_size)
+{
+ size_t *block = block_check (p);
+ block = realloc (block, sizeof(size_t) + new_size + sizeof(block_end));
+ if (!block)
+ {
+ fprintf (stderr, "Virtual memory exhausted.\n");
+ abort ();
+ }
+
+ return block_init (block, new_size);
+}
+
+static void
+tu_free (void *p, size_t old_size)
+{
+ free (block_check (p));
+}
+
+/* Free memory allocated via mini-gmp allocation function. */
+void
+testfree (void *p)
+{
+ void (*freefunc) (void *, size_t);
+ mp_get_memory_functions (NULL, NULL, &freefunc);
+
+ freefunc (p, 0);
+}
+
+int
+main (int argc, char **argv)
+{
+ hex_random_init ();
+
+ mp_set_memory_functions (tu_alloc, tu_realloc, tu_free);
+
+ /* Currently, t-comb seems to be the only program accepting any
+ arguments. It might make sense to parse common arguments here. */
+ testmain (argc, argv);
+
+ if (total_alloc != 0)
+ {
+ fprintf (stderr, "Memory leaked: %lu bytes.\n",
+ (unsigned long) total_alloc);
+ abort ();
+ }
+ return 0;
+}
+
+void
+testhalves (int count, void (*tested_fun) (int))
+{
+ void (*freefunc) (void *, size_t);
+ void *(*reallocfunc) (void *, size_t, size_t);
+ void *(*allocfunc) (size_t);
+ size_t initial_alloc;
+
+ mp_get_memory_functions (&allocfunc, &reallocfunc, &freefunc);
+ initial_alloc = total_alloc;
+ (*tested_fun) (count / 2);
+ if (initial_alloc != total_alloc)
+ {
+ fprintf (stderr, "First half, memory leaked: %lu bytes.\n",
+ (unsigned long) total_alloc - initial_alloc);
+ abort ();
+ }
+ mp_set_memory_functions (NULL, NULL, NULL);
+ (*tested_fun) (count / 2);
+ mp_set_memory_functions (allocfunc, reallocfunc, freefunc);
+}
--- /dev/null
+/*
+
+Copyright 2013, Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "mini-random.h"
+
+#define numberof(x) (sizeof (x) / sizeof ((x)[0]))
+
+void testmain (int argc, char **argv);
+
+void testhalves (int count, void (*tested_fun) (int));
+
+void testfree (void *p);
+++ /dev/null
-/* mp-h.in -- Definitions for the GNU multiple precision library -*-mode:c-*-
- BSD mp compatible functions.
-
-Copyright 1991, 1993, 1994, 1995, 1996, 2000, 2001, 2002, 2004 Free Software
-Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#ifndef __MP_H__
-
-
-/* The following (everything under ifndef __GNU_MP__) must be identical in
- gmp.h and mp.h to allow both to be included in an application or during
- the library build. Use the t-gmp-mp-h.pl script to check. */
-#ifndef __GNU_MP__
-#define __GNU_MP__ 5
-
-#define __need_size_t /* tell gcc stddef.h we only want size_t */
-#if defined (__cplusplus)
-#include <cstddef> /* for size_t */
-#else
-#include <stddef.h> /* for size_t */
-#endif
-#undef __need_size_t
-
-/* The following instantiated by configure, for internal use only */
-#if ! defined (__GMP_WITHIN_CONFIGURE)
-@DEFN_LONG_LONG_LIMB@
-#define __GMP_LIBGMP_DLL @LIBGMP_DLL@
-#endif
-
-#if defined (__STDC__) \
- || defined (__cplusplus) \
- || defined (_AIX) \
- || defined (__DECC) \
- || (defined (__mips) && defined (_SYSTYPE_SVR4)) \
- || defined (_MSC_VER) \
- || defined (_WIN32)
-#define __GMP_HAVE_CONST 1
-#define __GMP_HAVE_PROTOTYPES 1
-#define __GMP_HAVE_TOKEN_PASTE 1
-#else
-#define __GMP_HAVE_CONST 0
-#define __GMP_HAVE_PROTOTYPES 0
-#define __GMP_HAVE_TOKEN_PASTE 0
-#endif
-
-
-#if __GMP_HAVE_CONST
-#define __gmp_const const
-#define __gmp_signed signed
-#else
-#define __gmp_const
-#define __gmp_signed
-#endif
-
-#if defined (__GNUC__)
-#define __GMP_DECLSPEC_EXPORT __declspec(__dllexport__)
-#define __GMP_DECLSPEC_IMPORT __declspec(__dllimport__)
-#endif
-#if defined (_MSC_VER) || defined (__BORLANDC__)
-#define __GMP_DECLSPEC_EXPORT __declspec(dllexport)
-#define __GMP_DECLSPEC_IMPORT __declspec(dllimport)
-#endif
-#ifdef __WATCOMC__
-#define __GMP_DECLSPEC_EXPORT __export
-#define __GMP_DECLSPEC_IMPORT __import
-#endif
-#ifdef __IBMC__
-#define __GMP_DECLSPEC_EXPORT _Export
-#define __GMP_DECLSPEC_IMPORT _Import
-#endif
-
-#if __GMP_LIBGMP_DLL
-#if __GMP_WITHIN_GMP
-#define __GMP_DECLSPEC __GMP_DECLSPEC_EXPORT
-#else
-#define __GMP_DECLSPEC __GMP_DECLSPEC_IMPORT
-#endif
-#else
-#define __GMP_DECLSPEC
-#endif
-
-#ifdef __GMP_SHORT_LIMB
-typedef unsigned int mp_limb_t;
-typedef int mp_limb_signed_t;
-#else
-#ifdef _LONG_LONG_LIMB
-typedef unsigned long long int mp_limb_t;
-typedef long long int mp_limb_signed_t;
-#else
-typedef unsigned long int mp_limb_t;
-typedef long int mp_limb_signed_t;
-#endif
-#endif
-typedef unsigned long int mp_bitcnt_t;
-
-typedef struct
-{
- int _mp_alloc; /* Number of *limbs* allocated and pointed
- to by the _mp_d field. */
- int _mp_size; /* abs(_mp_size) is the number of limbs the
- last field points to. If _mp_size is
- negative this is a negative number. */
- mp_limb_t *_mp_d; /* Pointer to the limbs. */
-} __mpz_struct;
-
-#endif /* __GNU_MP__ */
-
-/* User-visible types. */
-typedef __mpz_struct MINT;
-
-
-#if __GMP_HAVE_PROTOTYPES
-#define __GMP_PROTO(x) x
-#else
-#define __GMP_PROTO(x) ()
-#endif
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-#define mp_set_memory_functions __gmp_set_memory_functions
-__GMP_DECLSPEC void mp_set_memory_functions __GMP_PROTO ((void *(*) (size_t),
- void *(*) (void *, size_t, size_t),
- void (*) (void *, size_t)));
-__GMP_DECLSPEC MINT *itom __GMP_PROTO ((signed short int));
-__GMP_DECLSPEC MINT *xtom __GMP_PROTO ((const char *));
-__GMP_DECLSPEC void move __GMP_PROTO ((const MINT *, MINT *));
-__GMP_DECLSPEC void madd __GMP_PROTO ((const MINT *, const MINT *, MINT *));
-__GMP_DECLSPEC void msub __GMP_PROTO ((const MINT *, const MINT *, MINT *));
-__GMP_DECLSPEC void mult __GMP_PROTO ((const MINT *, const MINT *, MINT *));
-__GMP_DECLSPEC void mdiv __GMP_PROTO ((const MINT *, const MINT *, MINT *, MINT *));
-__GMP_DECLSPEC void sdiv __GMP_PROTO ((const MINT *, signed short int, MINT *, signed short int *));
-__GMP_DECLSPEC void msqrt __GMP_PROTO ((const MINT *, MINT *, MINT *));
-__GMP_DECLSPEC void pow __GMP_PROTO ((const MINT *, const MINT *, const MINT *, MINT *));
-__GMP_DECLSPEC void rpow __GMP_PROTO ((const MINT *, signed short int, MINT *));
-__GMP_DECLSPEC void gcd __GMP_PROTO ((const MINT *, const MINT *, MINT *));
-__GMP_DECLSPEC int mcmp __GMP_PROTO ((const MINT *, const MINT *));
-__GMP_DECLSPEC void min __GMP_PROTO ((MINT *));
-__GMP_DECLSPEC void mout __GMP_PROTO ((const MINT *));
-__GMP_DECLSPEC char *mtox __GMP_PROTO ((const MINT *));
-__GMP_DECLSPEC void mfree __GMP_PROTO ((MINT *));
-
-#if defined (__cplusplus)
-}
-#endif
-
-#define __MP_H__
-#endif /* __MP_H__ */
#ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
const
-unsigned char __clz_tab[128] =
+unsigned char __clz_tab[129] =
{
1,2,3,3,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
- 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8
+ 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
+ 9
};
#endif
+++ /dev/null
-## Process this file with automake to generate Makefile.in
-
-# Copyright 1996, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
-#
-# This file is part of the GNU MP Library.
-#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-
-# -I$(top_srcdir)/mpz is for #includes done by mpz .c files. Perhaps most
-# compilers are smart enough to look in the same directory as the .c file
-# already, but lets make absolutely sure.
-#
-INCLUDES = -DBERKELEY_MP -D__GMP_WITHIN_GMP -D__gmpz_realloc=_mp_realloc \
- -I$(top_srcdir) -I$(top_srcdir)/mpz
-
-# The mpz sources here all know to look for -DBERKELEY_MP to compile to in
-# mpbsd form.
-#
-libmpbsd_la_SOURCES = itom.c mfree.c min.c mout.c mtox.c rpow.c sdiv.c xtom.c \
- ../mpz/add.c ../mpz/cmp.c ../mpz/gcd.c ../mpz/mul.c ../mpz/powm.c \
- ../mpz/realloc.c ../mpz/set.c ../mpz/sqrtrem.c ../mpz/sub.c ../mpz/tdiv_qr.c
-
-if WANT_MPBSD
-noinst_LTLIBRARIES = libmpbsd.la
-endif
+++ /dev/null
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
-@SET_MAKE@
-
-# Copyright 1996, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
-#
-# This file is part of the GNU MP Library.
-#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-VPATH = @srcdir@
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
-subdir = mpbsd
-DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/configure.in
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
-LTLIBRARIES = $(noinst_LTLIBRARIES)
-libmpbsd_la_LIBADD =
-am_libmpbsd_la_OBJECTS = itom$U.lo mfree$U.lo min$U.lo mout$U.lo \
- mtox$U.lo rpow$U.lo sdiv$U.lo xtom$U.lo add$U.lo cmp$U.lo \
- gcd$U.lo mul$U.lo powm$U.lo realloc$U.lo set$U.lo sqrtrem$U.lo \
- sub$U.lo tdiv_qr$U.lo
-libmpbsd_la_OBJECTS = $(am_libmpbsd_la_OBJECTS)
-@WANT_MPBSD_TRUE@am_libmpbsd_la_rpath =
-DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
-depcomp =
-am__depfiles_maybe =
-COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
- $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
-LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
- --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
- $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
-CCLD = $(CC)
-LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
- --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
- $(LDFLAGS) -o $@
-SOURCES = $(libmpbsd_la_SOURCES)
-DIST_SOURCES = $(libmpbsd_la_SOURCES)
-ETAGS = etags
-CTAGS = ctags
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-ABI = @ABI@
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AR = @AR@
-AS = @AS@
-ASMFLAGS = @ASMFLAGS@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
-CC = @CC@
-CCAS = @CCAS@
-CC_FOR_BUILD = @CC_FOR_BUILD@
-CFLAGS = @CFLAGS@
-CPP = @CPP@
-CPPFLAGS = @CPPFLAGS@
-CPP_FOR_BUILD = @CPP_FOR_BUILD@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
-DEFS = @DEFS@
-DLLTOOL = @DLLTOOL@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
-FGREP = @FGREP@
-GMP_LDFLAGS = @GMP_LDFLAGS@
-GMP_LIMB_BITS = @GMP_LIMB_BITS@
-GMP_NAIL_BITS = @GMP_NAIL_BITS@
-GREP = @GREP@
-HAVE_CLOCK_01 = @HAVE_CLOCK_01@
-HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
-HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
-HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
-HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
-HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
-HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
-HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
-HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
-HAVE_STACK_T_01 = @HAVE_STACK_T_01@
-HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LEX = @LEX@
-LEXLIB = @LEXLIB@
-LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
-LIBCURSES = @LIBCURSES@
-LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
-LIBGMP_DLL = @LIBGMP_DLL@
-LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
-LIBM = @LIBM@
-LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
-LIBOBJS = @LIBOBJS@
-LIBREADLINE = @LIBREADLINE@
-LIBS = @LIBS@
-LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-M4 = @M4@
-MAINT = @MAINT@
-MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-RANLIB = @RANLIB@
-SED = @SED@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
-STRIP = @STRIP@
-TAL_OBJECT = @TAL_OBJECT@
-TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
-U_FOR_BUILD = @U_FOR_BUILD@
-VERSION = @VERSION@
-WITH_READLINE_01 = @WITH_READLINE_01@
-YACC = @YACC@
-YFLAGS = @YFLAGS@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_AR = @ac_ct_AR@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-am__leading_dot = @am__leading_dot@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-gmp_srclinks = @gmp_srclinks@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-mpn_objects = @mpn_objects@
-mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-
-# -I$(top_srcdir)/mpz is for #includes done by mpz .c files. Perhaps most
-# compilers are smart enough to look in the same directory as the .c file
-# already, but lets make absolutely sure.
-#
-INCLUDES = -DBERKELEY_MP -D__GMP_WITHIN_GMP -D__gmpz_realloc=_mp_realloc \
- -I$(top_srcdir) -I$(top_srcdir)/mpz
-
-
-# The mpz sources here all know to look for -DBERKELEY_MP to compile to in
-# mpbsd form.
-#
-libmpbsd_la_SOURCES = itom.c mfree.c min.c mout.c mtox.c rpow.c sdiv.c xtom.c \
- ../mpz/add.c ../mpz/cmp.c ../mpz/gcd.c ../mpz/mul.c ../mpz/powm.c \
- ../mpz/realloc.c ../mpz/set.c ../mpz/sqrtrem.c ../mpz/sub.c ../mpz/tdiv_qr.c
-
-@WANT_MPBSD_TRUE@noinst_LTLIBRARIES = libmpbsd.la
-all: all-am
-
-.SUFFIXES:
-.SUFFIXES: .c .lo .o .obj
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
- && { if test -f $@; then exit 0; else break; fi; }; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps mpbsd/Makefile'; \
- $(am__cd) $(top_srcdir) && \
- $(AUTOMAKE) --gnu --ignore-deps mpbsd/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-
-clean-noinstLTLIBRARIES:
- -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
- @list='$(noinst_LTLIBRARIES)'; for p in $$list; do \
- dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
- test "$$dir" != "$$p" || dir=.; \
- echo "rm -f \"$${dir}/so_locations\""; \
- rm -f "$${dir}/so_locations"; \
- done
-libmpbsd.la: $(libmpbsd_la_OBJECTS) $(libmpbsd_la_DEPENDENCIES)
- $(LINK) $(am_libmpbsd_la_rpath) $(libmpbsd_la_OBJECTS) $(libmpbsd_la_LIBADD) $(LIBS)
-
-mostlyclean-compile:
- -rm -f *.$(OBJEXT)
-
-distclean-compile:
- -rm -f *.tab.c
-$(top_builddir)/ansi2knr:
- $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
- -test "$U" = "" || rm -f *_.c
-
-.c.o:
- $(COMPILE) -c $<
-
-.c.obj:
- $(COMPILE) -c `$(CYGPATH_W) '$<'`
-
-.c.lo:
- $(LTCOMPILE) -c -o $@ $<
-
-add_.lo: add_.c
- $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o add_.lo `test -f 'add_.c' || echo '$(srcdir)/'`add_.c
-
-add.lo: ../mpz/add.c
- $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o add.lo `test -f '../mpz/add.c' || echo '$(srcdir)/'`../mpz/add.c
-
-cmp_.lo: cmp_.c
- $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cmp_.lo `test -f 'cmp_.c' || echo '$(srcdir)/'`cmp_.c
-
-cmp.lo: ../mpz/cmp.c
- $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cmp.lo `test -f '../mpz/cmp.c' || echo '$(srcdir)/'`../mpz/cmp.c
-
-gcd_.lo: gcd_.c
- $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o gcd_.lo `test -f 'gcd_.c' || echo '$(srcdir)/'`gcd_.c
-
-gcd.lo: ../mpz/gcd.c
- $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o gcd.lo `test -f '../mpz/gcd.c' || echo '$(srcdir)/'`../mpz/gcd.c
-
-mul_.lo: mul_.c
- $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mul_.lo `test -f 'mul_.c' || echo '$(srcdir)/'`mul_.c
-
-mul.lo: ../mpz/mul.c
- $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o mul.lo `test -f '../mpz/mul.c' || echo '$(srcdir)/'`../mpz/mul.c
-
-powm_.lo: powm_.c
- $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o powm_.lo `test -f 'powm_.c' || echo '$(srcdir)/'`powm_.c
-
-powm.lo: ../mpz/powm.c
- $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o powm.lo `test -f '../mpz/powm.c' || echo '$(srcdir)/'`../mpz/powm.c
-
-realloc_.lo: realloc_.c
- $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o realloc_.lo `test -f 'realloc_.c' || echo '$(srcdir)/'`realloc_.c
-
-realloc.lo: ../mpz/realloc.c
- $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o realloc.lo `test -f '../mpz/realloc.c' || echo '$(srcdir)/'`../mpz/realloc.c
-
-set_.lo: set_.c
- $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o set_.lo `test -f 'set_.c' || echo '$(srcdir)/'`set_.c
-
-set.lo: ../mpz/set.c
- $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o set.lo `test -f '../mpz/set.c' || echo '$(srcdir)/'`../mpz/set.c
-
-sqrtrem_.lo: sqrtrem_.c
- $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sqrtrem_.lo `test -f 'sqrtrem_.c' || echo '$(srcdir)/'`sqrtrem_.c
-
-sqrtrem.lo: ../mpz/sqrtrem.c
- $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sqrtrem.lo `test -f '../mpz/sqrtrem.c' || echo '$(srcdir)/'`../mpz/sqrtrem.c
-
-sub_.lo: sub_.c
- $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sub_.lo `test -f 'sub_.c' || echo '$(srcdir)/'`sub_.c
-
-sub.lo: ../mpz/sub.c
- $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o sub.lo `test -f '../mpz/sub.c' || echo '$(srcdir)/'`../mpz/sub.c
-
-tdiv_qr_.lo: tdiv_qr_.c
- $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tdiv_qr_.lo `test -f 'tdiv_qr_.c' || echo '$(srcdir)/'`tdiv_qr_.c
-
-tdiv_qr.lo: ../mpz/tdiv_qr.c
- $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o tdiv_qr.lo `test -f '../mpz/tdiv_qr.c' || echo '$(srcdir)/'`../mpz/tdiv_qr.c
-add_.c: ../mpz/add.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/add.c; then echo $(srcdir)/../mpz/add.c; else echo ../mpz/add.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmp_.c: ../mpz/cmp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/cmp.c; then echo $(srcdir)/../mpz/cmp.c; else echo ../mpz/cmp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gcd_.c: ../mpz/gcd.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/gcd.c; then echo $(srcdir)/../mpz/gcd.c; else echo ../mpz/gcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-itom_.c: itom.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/itom.c; then echo $(srcdir)/itom.c; else echo itom.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mfree_.c: mfree.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mfree.c; then echo $(srcdir)/mfree.c; else echo mfree.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-min_.c: min.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/min.c; then echo $(srcdir)/min.c; else echo min.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mout_.c: mout.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mout.c; then echo $(srcdir)/mout.c; else echo mout.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mtox_.c: mtox.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mtox.c; then echo $(srcdir)/mtox.c; else echo mtox.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_.c: ../mpz/mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/mul.c; then echo $(srcdir)/../mpz/mul.c; else echo ../mpz/mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-powm_.c: ../mpz/powm.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/powm.c; then echo $(srcdir)/../mpz/powm.c; else echo ../mpz/powm.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-realloc_.c: ../mpz/realloc.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/realloc.c; then echo $(srcdir)/../mpz/realloc.c; else echo ../mpz/realloc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-rpow_.c: rpow.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/rpow.c; then echo $(srcdir)/rpow.c; else echo rpow.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sdiv_.c: sdiv.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sdiv.c; then echo $(srcdir)/sdiv.c; else echo sdiv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_.c: ../mpz/set.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/set.c; then echo $(srcdir)/../mpz/set.c; else echo ../mpz/set.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sqrtrem_.c: ../mpz/sqrtrem.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/sqrtrem.c; then echo $(srcdir)/../mpz/sqrtrem.c; else echo ../mpz/sqrtrem.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sub_.c: ../mpz/sub.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/sub.c; then echo $(srcdir)/../mpz/sub.c; else echo ../mpz/sub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tdiv_qr_.c: ../mpz/tdiv_qr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/../mpz/tdiv_qr.c; then echo $(srcdir)/../mpz/tdiv_qr.c; else echo ../mpz/tdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-xtom_.c: xtom.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/xtom.c; then echo $(srcdir)/xtom.c; else echo xtom.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-add_.$(OBJEXT) add_.lo cmp_.$(OBJEXT) cmp_.lo gcd_.$(OBJEXT) gcd_.lo \
-itom_.$(OBJEXT) itom_.lo mfree_.$(OBJEXT) mfree_.lo min_.$(OBJEXT) \
-min_.lo mout_.$(OBJEXT) mout_.lo mtox_.$(OBJEXT) mtox_.lo \
-mul_.$(OBJEXT) mul_.lo powm_.$(OBJEXT) powm_.lo realloc_.$(OBJEXT) \
-realloc_.lo rpow_.$(OBJEXT) rpow_.lo sdiv_.$(OBJEXT) sdiv_.lo \
-set_.$(OBJEXT) set_.lo sqrtrem_.$(OBJEXT) sqrtrem_.lo sub_.$(OBJEXT) \
-sub_.lo tdiv_qr_.$(OBJEXT) tdiv_qr_.lo xtom_.$(OBJEXT) xtom_.lo : \
-$(ANSI2KNR)
-
-mostlyclean-libtool:
- -rm -f *.lo
-
-clean-libtool:
- -rm -rf .libs _libs
-
-ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- mkid -fID $$unique
-tags: TAGS
-
-TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- set x; \
- here=`pwd`; \
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- shift; \
- if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
- test -n "$$unique" || unique=$$empty_fix; \
- if test $$# -gt 0; then \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- "$$@" $$unique; \
- else \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- $$unique; \
- fi; \
- fi
-ctags: CTAGS
-CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- test -z "$(CTAGS_ARGS)$$unique" \
- || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
- $$unique
-
-GTAGS:
- here=`$(am__cd) $(top_builddir) && pwd` \
- && $(am__cd) $(top_srcdir) \
- && gtags -i $(GTAGS_ARGS) "$$here"
-
-distclean-tags:
- -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-
-distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d "$(distdir)/$$file"; then \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
- else \
- test -f "$(distdir)/$$file" \
- || cp -p $$d/$$file "$(distdir)/$$file" \
- || exit 1; \
- fi; \
- done
-check-am: all-am
-check: check-am
-all-am: Makefile $(LTLIBRARIES)
-installdirs:
-install: install-am
-install-exec: install-exec-am
-install-data: install-data-am
-uninstall: uninstall-am
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-am
-install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
- -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-am
-
-clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
- mostlyclean-am
-
-distclean: distclean-am
- -rm -f Makefile
-distclean-am: clean-am distclean-compile distclean-generic \
- distclean-tags
-
-dvi: dvi-am
-
-dvi-am:
-
-html: html-am
-
-html-am:
-
-info: info-am
-
-info-am:
-
-install-data-am:
-
-install-dvi: install-dvi-am
-
-install-dvi-am:
-
-install-exec-am:
-
-install-html: install-html-am
-
-install-html-am:
-
-install-info: install-info-am
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-am
-
-install-pdf-am:
-
-install-ps: install-ps-am
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-am
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-am
-
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
- mostlyclean-libtool
-
-pdf: pdf-am
-
-pdf-am:
-
-ps: ps-am
-
-ps-am:
-
-uninstall-am:
-
-.MAKE: $(top_builddir)/ansi2knr install-am install-strip
-
-.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
- clean-libtool clean-noinstLTLIBRARIES ctags distclean \
- distclean-compile distclean-generic distclean-libtool \
- distclean-tags distdir dvi dvi-am html html-am info info-am \
- install install-am install-data install-data-am install-dvi \
- install-dvi-am install-exec install-exec-am install-html \
- install-html-am install-info install-info-am install-man \
- install-pdf install-pdf-am install-ps install-ps-am \
- install-strip installcheck installcheck-am installdirs \
- maintainer-clean maintainer-clean-generic mostlyclean \
- mostlyclean-compile mostlyclean-generic mostlyclean-kr \
- mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
- uninstall-am
-
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
+++ /dev/null
-/* itom -- BSD compatible allocate and initiate a MINT.
-
-Copyright 1991, 1994, 1995, 2000, 2001 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include "mp.h"
-#include "gmp.h"
-#include "gmp-impl.h"
-
-MINT *
-itom (signed short int n)
-{
- MINT *x;
- mp_ptr xp;
-
- x = (MINT *) (*__gmp_allocate_func) (sizeof (MINT));
- x->_mp_alloc = 1;
- x->_mp_d = xp = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
- if (n > 0)
- {
- x->_mp_size = 1;
- xp[0] = n;
- }
- else if (n < 0)
- {
- x->_mp_size = -1;
- xp[0] = (unsigned short) -n;
- }
- else
- x->_mp_size = 0;
-
- return x;
-}
+++ /dev/null
-/* mfree -- BSD compatible mfree.
-
-Copyright 1991, 1994, 1995, 2000, 2001 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include "mp.h"
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-mfree (MINT *m)
-{
- (*__gmp_free_func) (m->_mp_d, m->_mp_alloc * BYTES_PER_MP_LIMB);
- (*__gmp_free_func) (m, sizeof (MINT));
-}
+++ /dev/null
-/* min(MINT) -- Do decimal input from standard input and store result in
- MINT.
-
-Copyright 1991, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include <stdio.h>
-#include <ctype.h>
-#include "mp.h"
-#include "gmp.h"
-#include "gmp-impl.h"
-
-extern const unsigned char __gmp_digit_value_tab[];
-#define digit_value_tab __gmp_digit_value_tab
-
-void
-min (MINT *dest)
-{
- char *str;
- size_t alloc_size, str_size;
- int c;
- int negative;
- mp_size_t dest_size;
- const unsigned char *digit_value;
-
- digit_value = digit_value_tab;
-
- alloc_size = 100;
- str = (char *) (*__gmp_allocate_func) (alloc_size);
- str_size = 0;
-
- /* Skip whitespace. */
- do
- c = getc (stdin);
- while (isspace (c));
-
- negative = 0;
- if (c == '-')
- {
- negative = 1;
- c = getc (stdin);
- }
-
- if (c == EOF || digit_value[c] >= 10)
- return; /* error if no digits */
-
- do
- {
- int dig;
- dig = digit_value[c];
- if (dig >= 10)
- break;
- if (str_size >= alloc_size)
- {
- size_t old_alloc_size = alloc_size;
- alloc_size = alloc_size * 3 / 2;
- str = (char *) (*__gmp_reallocate_func) (str, old_alloc_size, alloc_size);
- }
- str[str_size++] = dig;
- c = getc (stdin);
- }
- while (c != EOF);
-
- ungetc (c, stdin);
-
- dest_size = str_size / mp_bases[10].chars_per_limb + 1;
- if (dest->_mp_alloc < dest_size)
- _mp_realloc (dest, dest_size);
-
- dest_size = mpn_set_str (dest->_mp_d, (unsigned char *) str, str_size, 10);
- dest->_mp_size = negative ? -dest_size : dest_size;
-
- (*__gmp_free_func) (str, alloc_size);
- return;
-}
+++ /dev/null
-/* mout(MINT) -- Do decimal output of MINT to standard output.
-
-Copyright 1991, 1994, 1996, 2000, 2001, 2002, 2005 Free Software Foundation,
-Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include <stdio.h>
-#include <string.h>
-#include "mp.h"
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-void
-mout (const MINT *x)
-{
- mp_ptr xp;
- mp_srcptr x_ptr;
- mp_size_t x_size;
- unsigned char *str;
- size_t str_size;
- int i;
- TMP_DECL;
-
- x_size = x->_mp_size;
- if (x_size == 0)
- {
- fputc ('0', stdout);
- fputc ('\n', stdout);
- return;
- }
- if (x_size < 0)
- {
- fputc ('-', stdout);
- x_size = -x_size;
- }
-
- TMP_MARK;
- x_ptr = x->_mp_d;
- MPN_SIZEINBASE (str_size, x_ptr, x_size, 10);
- str_size += 2;
- str = (unsigned char *) TMP_ALLOC (str_size);
-
- /* mpn_get_str clobbers its argument */
- xp = TMP_ALLOC_LIMBS (x_size);
- MPN_COPY (xp, x_ptr, x_size);
-
- str_size = mpn_get_str (str, 10, xp, x_size);
-
- /* mpn_get_str might make a leading zero, skip it. */
- str_size -= (*str == 0);
- str += (*str == 0);
- ASSERT (*str != 0);
-
- /* Translate to printable chars. */
- for (i = 0; i < str_size; i++)
- str[i] = "0123456789"[str[i]];
- str[str_size] = 0;
-
- str_size = strlen ((char *) str);
- if (str_size % 10 != 0)
- {
- fwrite (str, 1, str_size % 10, stdout);
- str += str_size % 10;
- str_size -= str_size % 10;
- if (str_size != 0)
- fputc (' ', stdout);
- }
- for (i = 0; i < str_size; i += 10)
- {
- fwrite (str, 1, 10, stdout);
- str += 10;
- if (i + 10 < str_size)
- fputc (' ', stdout);
- }
- fputc ('\n', stdout);
- TMP_FREE;
-}
+++ /dev/null
-/* mtox -- Convert OPERAND to hexadecimal and return a malloc'ed string
- with the result of the conversion.
-
-Copyright 1991, 1994, 2000, 2001, 2002 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include <string.h>
-#include "mp.h"
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-char *
-mtox (const MINT *x)
-{
- mp_size_t xsize = x->_mp_size;
- mp_ptr xp;
- mp_size_t xsign;
- unsigned char *str, *s;
- size_t str_size, alloc_size, i;
-
- xsign = xsize;
- if (xsize < 0)
- xsize = -xsize;
-
- /* digits, plus '\0', plus possible '-', for an exact size */
- xp = x->_mp_d;
- MPN_SIZEINBASE_16 (alloc_size, xp, xsize);
- alloc_size += 1 + (xsign < 0);
-
- str = (unsigned char *) (*__gmp_allocate_func) (alloc_size);
- s = str;
-
- if (xsign < 0)
- *s++ = '-';
-
- str_size = mpn_get_str (s, 16, xp, xsize);
- ASSERT (str_size <= alloc_size - (xsign < 0));
- ASSERT (str_size == 1 || *s != 0);
-
- for (i = 0; i < str_size; i++)
- s[i] = "0123456789abcdef"[s[i]];
- s[str_size] = 0;
-
- ASSERT (strlen (str) + 1 == alloc_size);
- return (char *) str;
-}
+++ /dev/null
-/* rpow -- MINT raised to short. */
-
-/*
-Copyright 2001 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include "mp.h"
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-rpow (const MINT *b, short e, MINT *r)
-{
- if (e >= 0)
- mpz_n_pow_ui (r, PTR(b), (mp_size_t) SIZ(b), (unsigned long) e);
- else
- SIZ(r) = 0;
-}
+++ /dev/null
-/* sdiv -- Divide a MINT by a short integer. Produce a MINT quotient
- and a short remainder.
-
-Copyright 1991, 1994, 1995, 2000, 2001 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include "mp.h"
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-void
-sdiv (const MINT *dividend, signed short int divisor_short, MINT *quot, short *rem_ptr)
-{
- mp_size_t sign_dividend;
- signed long int sign_divisor;
- mp_size_t dividend_size, quot_size;
- mp_ptr dividend_ptr, quot_ptr;
- mp_limb_t divisor_limb;
- mp_limb_t remainder_limb;
-
- sign_dividend = dividend->_mp_size;
- dividend_size = ABS (dividend->_mp_size);
-
- if (dividend_size == 0)
- {
- quot->_mp_size = 0;
- *rem_ptr = 0;
- return;
- }
-
- sign_divisor = divisor_short;
- divisor_limb = (unsigned short) ABS (divisor_short);
-
- /* No need for temporary allocation and copying even if QUOT == DIVIDEND
- as the divisor is just one limb, and thus no intermediate remainders
- need to be stored. */
-
- if (quot->_mp_alloc < dividend_size)
- _mp_realloc (quot, dividend_size);
-
- quot_ptr = quot->_mp_d;
- dividend_ptr = dividend->_mp_d;
-
- remainder_limb = mpn_divmod_1 (quot_ptr,
- dividend_ptr, dividend_size, divisor_limb);
-
- *rem_ptr = sign_dividend >= 0 ? remainder_limb : -remainder_limb;
- /* The quotient is DIVIDEND_SIZE limbs, but the most significant
- might be zero. Set QUOT_SIZE properly. */
- quot_size = dividend_size - (quot_ptr[dividend_size - 1] == 0);
- quot->_mp_size = (sign_divisor ^ sign_dividend) >= 0 ? quot_size : -quot_size;
-}
+++ /dev/null
-/* xtom -- convert a hexadecimal string to a MINT, and return a pointer to
- the MINT.
-
-Copyright 1991, 1994, 1995, 1996, 2000, 2001, 2002, 2005 Free Software
-Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include <string.h>
-#include <ctype.h>
-#include "mp.h"
-#include "gmp.h"
-#include "gmp-impl.h"
-
-extern const unsigned char __gmp_digit_value_tab[];
-#define digit_value __gmp_digit_value_tab
-
-MINT *
-xtom (const char *str)
-{
- size_t str_size;
- char *s, *begs;
- size_t i;
- mp_size_t xsize;
- int c;
- int negative;
- MINT *x = (MINT *) (*__gmp_allocate_func) (sizeof (MINT));
- TMP_DECL;
-
- /* Skip whitespace. */
- do
- c = (unsigned char) *str++;
- while (isspace (c));
-
- negative = 0;
- if (c == '-')
- {
- negative = 1;
- c = (unsigned char) *str++;
- }
-
- if (digit_value[c] >= 16)
- return 0; /* error if no digits */
-
- TMP_MARK;
- str_size = strlen (str - 1);
- s = begs = (char *) TMP_ALLOC (str_size + 1);
-
- for (i = 0; i < str_size; i++)
- {
- if (!isspace (c))
- {
- int dig = digit_value[c];
- if (dig >= 16)
- {
- TMP_FREE;
- return 0;
- }
- *s++ = dig;
- }
- c = (unsigned char) *str++;
- }
-
- str_size = s - begs;
-
- xsize = str_size / mp_bases[16].chars_per_limb + 1;
- x->_mp_alloc = xsize;
- x->_mp_d = (mp_ptr) (*__gmp_allocate_func) (xsize * BYTES_PER_MP_LIMB);
-
- xsize = mpn_set_str (x->_mp_d, (unsigned char *) begs, str_size, 16);
- x->_mp_size = negative ? -xsize : xsize;
-
- TMP_FREE;
- return x;
-}
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
VPATH = @srcdir@
+am__make_dryrun = \
+ { \
+ am__dry=no; \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
+ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+ *) \
+ for am__flg in $$MAKEFLAGS; do \
+ case $$am__flg in \
+ *=*|--*) ;; \
+ *n*) am__dry=yes; break;; \
+ esac; \
+ done;; \
+ esac; \
+ test $$am__dry = yes; \
+ }
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
subdir = mpf
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/configure.in
+ $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_CLEAN_VPATH_FILES =
LTLIBRARIES = $(noinst_LTLIBRARIES)
libmpf_la_LIBADD =
-am_libmpf_la_OBJECTS = init$U.lo init2$U.lo inits$U.lo set$U.lo \
- set_ui$U.lo set_si$U.lo set_str$U.lo set_d$U.lo set_z$U.lo \
- set_q$U.lo iset$U.lo iset_ui$U.lo iset_si$U.lo iset_str$U.lo \
- iset_d$U.lo clear$U.lo clears$U.lo get_str$U.lo dump$U.lo \
- size$U.lo eq$U.lo reldiff$U.lo sqrt$U.lo random2$U.lo \
- inp_str$U.lo out_str$U.lo add$U.lo add_ui$U.lo sub$U.lo \
- sub_ui$U.lo ui_sub$U.lo mul$U.lo mul_ui$U.lo div$U.lo \
- div_ui$U.lo cmp$U.lo cmp_d$U.lo cmp_si$U.lo cmp_ui$U.lo \
- mul_2exp$U.lo div_2exp$U.lo abs$U.lo neg$U.lo get_d$U.lo \
- get_d_2exp$U.lo set_dfl_prec$U.lo set_prc$U.lo \
- set_prc_raw$U.lo get_dfl_prec$U.lo get_prc$U.lo ui_div$U.lo \
- sqrt_ui$U.lo pow_ui$U.lo urandomb$U.lo swap$U.lo get_si$U.lo \
- get_ui$U.lo int_p$U.lo ceilfloor$U.lo trunc$U.lo \
- fits_sint$U.lo fits_slong$U.lo fits_sshort$U.lo fits_uint$U.lo \
- fits_ulong$U.lo fits_ushort$U.lo
+am_libmpf_la_OBJECTS = init.lo init2.lo inits.lo set.lo set_ui.lo \
+ set_si.lo set_str.lo set_d.lo set_z.lo set_q.lo iset.lo \
+ iset_ui.lo iset_si.lo iset_str.lo iset_d.lo clear.lo clears.lo \
+ get_str.lo dump.lo size.lo eq.lo reldiff.lo sqrt.lo random2.lo \
+ inp_str.lo out_str.lo add.lo add_ui.lo sub.lo sub_ui.lo \
+ ui_sub.lo mul.lo mul_ui.lo div.lo div_ui.lo cmp.lo cmp_d.lo \
+ cmp_si.lo cmp_ui.lo mul_2exp.lo div_2exp.lo abs.lo neg.lo \
+ get_d.lo get_d_2exp.lo set_dfl_prec.lo set_prc.lo \
+ set_prc_raw.lo get_dfl_prec.lo get_prc.lo ui_div.lo sqrt_ui.lo \
+ pow_ui.lo urandomb.lo swap.lo get_si.lo get_ui.lo int_p.lo \
+ ceilfloor.lo trunc.lo fits_sint.lo fits_slong.lo \
+ fits_sshort.lo fits_uint.lo fits_ulong.lo fits_ushort.lo
libmpf_la_OBJECTS = $(am_libmpf_la_OBJECTS)
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
depcomp =
$(LDFLAGS) -o $@
SOURCES = $(libmpf_la_SOURCES)
DIST_SOURCES = $(libmpf_la_SOURCES)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
STRIP = @STRIP@
TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
U_FOR_BUILD = @U_FOR_BUILD@
VERSION = @VERSION@
WITH_READLINE_01 = @WITH_READLINE_01@
mkdir_p = @mkdir_p@
mpn_objects = @mpn_objects@
mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-libmpf.la: $(libmpf_la_OBJECTS) $(libmpf_la_DEPENDENCIES)
+libmpf.la: $(libmpf_la_OBJECTS) $(libmpf_la_DEPENDENCIES) $(EXTRA_libmpf_la_DEPENDENCIES)
$(LINK) $(libmpf_la_OBJECTS) $(libmpf_la_LIBADD) $(LIBS)
mostlyclean-compile:
distclean-compile:
-rm -f *.tab.c
-$(top_builddir)/ansi2knr:
- $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
- -test "$U" = "" || rm -f *_.c
.c.o:
$(COMPILE) -c $<
.c.lo:
$(LTCOMPILE) -c -o $@ $<
-abs_.c: abs.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/abs.c; then echo $(srcdir)/abs.c; else echo abs.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-add_.c: add.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/add.c; then echo $(srcdir)/add.c; else echo add.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-add_ui_.c: add_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/add_ui.c; then echo $(srcdir)/add_ui.c; else echo add_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-ceilfloor_.c: ceilfloor.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/ceilfloor.c; then echo $(srcdir)/ceilfloor.c; else echo ceilfloor.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-clear_.c: clear.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/clear.c; then echo $(srcdir)/clear.c; else echo clear.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-clears_.c: clears.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/clears.c; then echo $(srcdir)/clears.c; else echo clears.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmp_.c: cmp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp.c; then echo $(srcdir)/cmp.c; else echo cmp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmp_d_.c: cmp_d.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp_d.c; then echo $(srcdir)/cmp_d.c; else echo cmp_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmp_si_.c: cmp_si.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp_si.c; then echo $(srcdir)/cmp_si.c; else echo cmp_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmp_ui_.c: cmp_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp_ui.c; then echo $(srcdir)/cmp_ui.c; else echo cmp_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-div_.c: div.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/div.c; then echo $(srcdir)/div.c; else echo div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-div_2exp_.c: div_2exp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/div_2exp.c; then echo $(srcdir)/div_2exp.c; else echo div_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-div_ui_.c: div_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/div_ui.c; then echo $(srcdir)/div_ui.c; else echo div_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dump_.c: dump.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dump.c; then echo $(srcdir)/dump.c; else echo dump.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-eq_.c: eq.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/eq.c; then echo $(srcdir)/eq.c; else echo eq.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fits_sint_.c: fits_sint.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_sint.c; then echo $(srcdir)/fits_sint.c; else echo fits_sint.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fits_slong_.c: fits_slong.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_slong.c; then echo $(srcdir)/fits_slong.c; else echo fits_slong.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fits_sshort_.c: fits_sshort.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_sshort.c; then echo $(srcdir)/fits_sshort.c; else echo fits_sshort.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fits_uint_.c: fits_uint.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_uint.c; then echo $(srcdir)/fits_uint.c; else echo fits_uint.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fits_ulong_.c: fits_ulong.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_ulong.c; then echo $(srcdir)/fits_ulong.c; else echo fits_ulong.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fits_ushort_.c: fits_ushort.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_ushort.c; then echo $(srcdir)/fits_ushort.c; else echo fits_ushort.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_d_.c: get_d.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_d.c; then echo $(srcdir)/get_d.c; else echo get_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_d_2exp_.c: get_d_2exp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_d_2exp.c; then echo $(srcdir)/get_d_2exp.c; else echo get_d_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_dfl_prec_.c: get_dfl_prec.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_dfl_prec.c; then echo $(srcdir)/get_dfl_prec.c; else echo get_dfl_prec.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_prc_.c: get_prc.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_prc.c; then echo $(srcdir)/get_prc.c; else echo get_prc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_si_.c: get_si.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_si.c; then echo $(srcdir)/get_si.c; else echo get_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_str_.c: get_str.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_str.c; then echo $(srcdir)/get_str.c; else echo get_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_ui_.c: get_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_ui.c; then echo $(srcdir)/get_ui.c; else echo get_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-init_.c: init.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/init.c; then echo $(srcdir)/init.c; else echo init.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-init2_.c: init2.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/init2.c; then echo $(srcdir)/init2.c; else echo init2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-inits_.c: inits.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/inits.c; then echo $(srcdir)/inits.c; else echo inits.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-inp_str_.c: inp_str.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/inp_str.c; then echo $(srcdir)/inp_str.c; else echo inp_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-int_p_.c: int_p.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/int_p.c; then echo $(srcdir)/int_p.c; else echo int_p.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-iset_.c: iset.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset.c; then echo $(srcdir)/iset.c; else echo iset.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-iset_d_.c: iset_d.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset_d.c; then echo $(srcdir)/iset_d.c; else echo iset_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-iset_si_.c: iset_si.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset_si.c; then echo $(srcdir)/iset_si.c; else echo iset_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-iset_str_.c: iset_str.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset_str.c; then echo $(srcdir)/iset_str.c; else echo iset_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-iset_ui_.c: iset_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset_ui.c; then echo $(srcdir)/iset_ui.c; else echo iset_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_.c: mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul.c; then echo $(srcdir)/mul.c; else echo mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_2exp_.c: mul_2exp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_2exp.c; then echo $(srcdir)/mul_2exp.c; else echo mul_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_ui_.c: mul_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_ui.c; then echo $(srcdir)/mul_ui.c; else echo mul_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-neg_.c: neg.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/neg.c; then echo $(srcdir)/neg.c; else echo neg.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-out_str_.c: out_str.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/out_str.c; then echo $(srcdir)/out_str.c; else echo out_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-pow_ui_.c: pow_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pow_ui.c; then echo $(srcdir)/pow_ui.c; else echo pow_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-random2_.c: random2.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/random2.c; then echo $(srcdir)/random2.c; else echo random2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-reldiff_.c: reldiff.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/reldiff.c; then echo $(srcdir)/reldiff.c; else echo reldiff.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_.c: set.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set.c; then echo $(srcdir)/set.c; else echo set.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_d_.c: set_d.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_d.c; then echo $(srcdir)/set_d.c; else echo set_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_dfl_prec_.c: set_dfl_prec.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_dfl_prec.c; then echo $(srcdir)/set_dfl_prec.c; else echo set_dfl_prec.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_prc_.c: set_prc.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_prc.c; then echo $(srcdir)/set_prc.c; else echo set_prc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_prc_raw_.c: set_prc_raw.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_prc_raw.c; then echo $(srcdir)/set_prc_raw.c; else echo set_prc_raw.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_q_.c: set_q.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_q.c; then echo $(srcdir)/set_q.c; else echo set_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_si_.c: set_si.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_si.c; then echo $(srcdir)/set_si.c; else echo set_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_str_.c: set_str.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_str.c; then echo $(srcdir)/set_str.c; else echo set_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_ui_.c: set_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_ui.c; then echo $(srcdir)/set_ui.c; else echo set_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_z_.c: set_z.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_z.c; then echo $(srcdir)/set_z.c; else echo set_z.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-size_.c: size.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/size.c; then echo $(srcdir)/size.c; else echo size.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sqrt_.c: sqrt.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqrt.c; then echo $(srcdir)/sqrt.c; else echo sqrt.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sqrt_ui_.c: sqrt_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqrt_ui.c; then echo $(srcdir)/sqrt_ui.c; else echo sqrt_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sub_.c: sub.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sub.c; then echo $(srcdir)/sub.c; else echo sub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sub_ui_.c: sub_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sub_ui.c; then echo $(srcdir)/sub_ui.c; else echo sub_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-swap_.c: swap.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/swap.c; then echo $(srcdir)/swap.c; else echo swap.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-trunc_.c: trunc.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/trunc.c; then echo $(srcdir)/trunc.c; else echo trunc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-ui_div_.c: ui_div.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/ui_div.c; then echo $(srcdir)/ui_div.c; else echo ui_div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-ui_sub_.c: ui_sub.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/ui_sub.c; then echo $(srcdir)/ui_sub.c; else echo ui_sub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-urandomb_.c: urandomb.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/urandomb.c; then echo $(srcdir)/urandomb.c; else echo urandomb.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-abs_.$(OBJEXT) abs_.lo add_.$(OBJEXT) add_.lo add_ui_.$(OBJEXT) \
-add_ui_.lo ceilfloor_.$(OBJEXT) ceilfloor_.lo clear_.$(OBJEXT) \
-clear_.lo clears_.$(OBJEXT) clears_.lo cmp_.$(OBJEXT) cmp_.lo \
-cmp_d_.$(OBJEXT) cmp_d_.lo cmp_si_.$(OBJEXT) cmp_si_.lo \
-cmp_ui_.$(OBJEXT) cmp_ui_.lo div_.$(OBJEXT) div_.lo \
-div_2exp_.$(OBJEXT) div_2exp_.lo div_ui_.$(OBJEXT) div_ui_.lo \
-dump_.$(OBJEXT) dump_.lo eq_.$(OBJEXT) eq_.lo fits_sint_.$(OBJEXT) \
-fits_sint_.lo fits_slong_.$(OBJEXT) fits_slong_.lo \
-fits_sshort_.$(OBJEXT) fits_sshort_.lo fits_uint_.$(OBJEXT) \
-fits_uint_.lo fits_ulong_.$(OBJEXT) fits_ulong_.lo \
-fits_ushort_.$(OBJEXT) fits_ushort_.lo get_d_.$(OBJEXT) get_d_.lo \
-get_d_2exp_.$(OBJEXT) get_d_2exp_.lo get_dfl_prec_.$(OBJEXT) \
-get_dfl_prec_.lo get_prc_.$(OBJEXT) get_prc_.lo get_si_.$(OBJEXT) \
-get_si_.lo get_str_.$(OBJEXT) get_str_.lo get_ui_.$(OBJEXT) get_ui_.lo \
-init_.$(OBJEXT) init_.lo init2_.$(OBJEXT) init2_.lo inits_.$(OBJEXT) \
-inits_.lo inp_str_.$(OBJEXT) inp_str_.lo int_p_.$(OBJEXT) int_p_.lo \
-iset_.$(OBJEXT) iset_.lo iset_d_.$(OBJEXT) iset_d_.lo \
-iset_si_.$(OBJEXT) iset_si_.lo iset_str_.$(OBJEXT) iset_str_.lo \
-iset_ui_.$(OBJEXT) iset_ui_.lo mul_.$(OBJEXT) mul_.lo \
-mul_2exp_.$(OBJEXT) mul_2exp_.lo mul_ui_.$(OBJEXT) mul_ui_.lo \
-neg_.$(OBJEXT) neg_.lo out_str_.$(OBJEXT) out_str_.lo \
-pow_ui_.$(OBJEXT) pow_ui_.lo random2_.$(OBJEXT) random2_.lo \
-reldiff_.$(OBJEXT) reldiff_.lo set_.$(OBJEXT) set_.lo set_d_.$(OBJEXT) \
-set_d_.lo set_dfl_prec_.$(OBJEXT) set_dfl_prec_.lo set_prc_.$(OBJEXT) \
-set_prc_.lo set_prc_raw_.$(OBJEXT) set_prc_raw_.lo set_q_.$(OBJEXT) \
-set_q_.lo set_si_.$(OBJEXT) set_si_.lo set_str_.$(OBJEXT) set_str_.lo \
-set_ui_.$(OBJEXT) set_ui_.lo set_z_.$(OBJEXT) set_z_.lo \
-size_.$(OBJEXT) size_.lo sqrt_.$(OBJEXT) sqrt_.lo sqrt_ui_.$(OBJEXT) \
-sqrt_ui_.lo sub_.$(OBJEXT) sub_.lo sub_ui_.$(OBJEXT) sub_ui_.lo \
-swap_.$(OBJEXT) swap_.lo trunc_.$(OBJEXT) trunc_.lo ui_div_.$(OBJEXT) \
-ui_div_.lo ui_sub_.$(OBJEXT) ui_sub_.lo urandomb_.$(OBJEXT) \
-urandomb_.lo : $(ANSI2KNR)
mostlyclean-libtool:
-rm -f *.lo
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
mostlyclean: mostlyclean-am
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
uninstall-am:
-.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+.MAKE: install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
clean-libtool clean-noinstLTLIBRARIES ctags distclean \
install-pdf install-pdf-am install-ps install-ps-am \
install-strip installcheck installcheck-am installdirs \
maintainer-clean maintainer-clean-generic mostlyclean \
- mostlyclean-compile mostlyclean-generic mostlyclean-kr \
- mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
- uninstall-am
+ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+ pdf pdf-am ps ps-am tags uninstall uninstall-am
# Tell versions [3.59,3.63) of GNU make to not export all variables.
/* mpf_ceil, mpf_floor -- round an mpf to an integer.
-Copyright 2001, 2004 Free Software Foundation, Inc.
+Copyright 2001, 2004, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
Notice the use of prec+1 ensures mpf_ceil and mpf_floor are equivalent to
mpf_set if u is already an integer. */
-static void __gmpf_ceil_or_floor __GMP_PROTO ((REGPARM_2_1 (mpf_ptr, mpf_srcptr, int))) REGPARM_ATTR (1);
+static void __gmpf_ceil_or_floor (REGPARM_2_1 (mpf_ptr, mpf_srcptr, int)) REGPARM_ATTR (1);
#define mpf_ceil_or_floor(r,u,dir) __gmpf_ceil_or_floor (REGPARM_2_1 (r, u, dir))
REGPARM_ATTR (1) static void
/* mpf_cmp_si -- Compare a float with a signed integer.
-Copyright 1993, 1994, 1995, 1999, 2000, 2001, 2002, 2004 Free Software
+Copyright 1993, 1994, 1995, 1999, 2000, 2001, 2002, 2004, 2012 Free Software
Foundation, Inc.
This file is part of the GNU MP Library.
/* mpf_div -- Divide two floats.
-Copyright 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2005, 2010 Free Software
-Foundation, Inc.
+Copyright 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2005, 2010, 2012 Free
+Software Foundation, Inc.
This file is part of the GNU MP Library.
to save one limb in the division.
If r==u but the size is enough bigger than prec that there won't be an
- overlap between quotient and dividend in mpn_tdiv_qr, then we can avoid
+ overlap between quotient and dividend in mpn_div_q, then we can avoid
copying up,usize. This would only arise from a prec reduced with
mpf_set_prec_raw and will be pretty unusual, but might be worthwhile if
it could be worked into the copy_u decision cleanly. */
usize = SIZ(u);
vsize = SIZ(v);
- sign_quotient = usize ^ vsize;
- usize = ABS (usize);
- vsize = ABS (vsize);
- prec = PREC(r);
- if (vsize == 0)
+ if (UNLIKELY (vsize == 0))
DIVIDE_BY_ZERO;
if (usize == 0)
return;
}
+ sign_quotient = usize ^ vsize;
+ usize = ABS (usize);
+ vsize = ABS (vsize);
+ prec = PREC(r);
+
TMP_MARK;
rexp = EXP(u) - EXP(v) + 1;
/* mpf_div_ui -- Divide a float with an unsigned integer.
-Copyright 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2005 Free Software
+Copyright 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2005, 2012 Free Software
Foundation, Inc.
This file is part of the GNU MP Library.
}
#endif
- usize = u->_mp_size;
- sign_quotient = usize;
- usize = ABS (usize);
- prec = r->_mp_prec;
-
- if (v == 0)
+ if (UNLIKELY (v == 0))
DIVIDE_BY_ZERO;
+ usize = u->_mp_size;
+
if (usize == 0)
{
r->_mp_size = 0;
return;
}
+ sign_quotient = usize;
+ usize = ABS (usize);
+ prec = r->_mp_prec;
+
TMP_MARK;
rp = r->_mp_d;
/* mpf_eq -- Compare two floats up to a specified bit #.
-Copyright 1993, 1995, 1996, 2001, 2002, 2008, 2009 Free Software Foundation,
-Inc.
+Copyright 1993, 1995, 1996, 2001, 2002, 2008, 2009, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
example, the number 3.1416 would be returned as "31416" in DIGIT_PTR and
1 in EXP.
-Copyright 1993, 1994, 1995, 1996, 1997, 2000, 2001, 2002, 2003, 2005, 2006 Free
-Software Foundation, Inc.
+Copyright 1993, 1994, 1995, 1996, 1997, 2000, 2001, 2002, 2003, 2005, 2006, 2011
+Free Software Foundation, Inc.
This file is part of the GNU MP Library.
if (base >= 0)
{
num_to_text = "0123456789abcdefghijklmnopqrstuvwxyz";
- if (base == 0)
+ if (base <= 1)
base = 10;
else if (base > 36)
{
else
{
base = -base;
+ if (base <= 1)
+ base = 10;
+ else if (base > 36)
+ return NULL;
num_to_text = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
}
conversion.) */
tstr = (unsigned char *) TMP_ALLOC (n_digits + 2 * GMP_LIMB_BITS + 3);
- n_limbs_needed = 2 + (mp_size_t)
- (n_digits / (GMP_NUMB_BITS * mp_bases[base].chars_per_bit_exactly));
+ LIMBS_PER_DIGIT_IN_BASE (n_limbs_needed, n_digits, base);
if (ue <= n_limbs_needed)
{
unsigned long e;
n_more_limbs_needed = n_limbs_needed - ue;
- e = (unsigned long) n_more_limbs_needed * (GMP_NUMB_BITS * mp_bases[base].chars_per_bit_exactly);
+ DIGITS_IN_BASE_PER_LIMB (e, n_more_limbs_needed, base);
if (un > n_limbs_needed)
{
mp_ptr dummyp, xp;
n_less_limbs_needed = ue - n_limbs_needed;
- e = (unsigned long) n_less_limbs_needed * (GMP_NUMB_BITS * mp_bases[base].chars_per_bit_exactly);
+ DIGITS_IN_BASE_PER_LIMB (e, n_less_limbs_needed, base);
if (un > n_limbs_needed)
{
/* mpf_init_set_si() -- Initialize a float and assign it from a signed int.
-Copyright 1993, 1994, 1995, 2000, 2001, 2003, 2004 Free Software Foundation,
-Inc.
+Copyright 1993, 1994, 1995, 2000, 2001, 2003, 2004, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
the float OP to STREAM in base BASE. Return the number of characters
written, or 0 if an error occurred.
-Copyright 1996, 1997, 2001, 2002, 2005 Free Software Foundation, Inc.
+Copyright 1996, 1997, 2001, 2002, 2005, 2011 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#include "gmp.h"
#include "gmp-impl.h"
+#include "longlong.h"
size_t
/* mpf_pow_ui -- Compute b^e.
-Copyright 1998, 1999, 2001 Free Software Foundation, Inc.
+Copyright 1998, 1999, 2001, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
mpf_pow_ui (mpf_ptr r, mpf_srcptr b, unsigned long int e)
{
mpf_t b2;
- unsigned long int e2;
mpf_init2 (b2, mpf_get_prec (r));
mpf_set (b2, b);
- mpf_set_ui (r, 1);
if ((e & 1) != 0)
- mpf_set (r, b2);
- for (e2 = e >> 1; e2 != 0; e2 >>= 1)
+ mpf_set (r, b);
+ else
+ mpf_set_ui (r, 1);
+ while (e >>= 1)
{
mpf_mul (b2, b2, b2);
- if ((e2 & 1) != 0)
+ if ((e & 1) != 0)
mpf_mul (r, r, b2);
}
/* The precision we use for d = x-y is based on what mpf_div will want from
- the dividend. It calls mpn_tdiv_qr to produce a quotient of rprec+1
- limbs. So rprec+1 == dsize - xsize + 1, hence dprec = rprec+xsize. */
+ the dividend. It calls mpn_div_q to produce a quotient of rprec+1 limbs.
+ So rprec+1 == dsize - xsize + 1, hence dprec = rprec+xsize. */
void
mpf_reldiff (mpf_t rdiff, mpf_srcptr x, mpf_srcptr y)
/* mpf_set_si() -- Assign a float from a signed int.
-Copyright 1993, 1994, 1995, 2000, 2001, 2002, 2004 Free Software Foundation,
-Inc.
+Copyright 1993, 1994, 1995, 2000, 2001, 2002, 2004, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
of STRING is used to figure out the base.
Copyright 1993, 1994, 1995, 1996, 1997, 2000, 2001, 2002, 2003, 2005, 2007,
-2008 Free Software Foundation, Inc.
+2008, 2011 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
This still needs work, as suggested by some FIXME comments.
1. Don't depend on superfluous mantissa digits.
2. Allocate temp space more cleverly.
- 3. Use mpn_tdiv_qr instead of mpn_lshift+mpn_divrem.
+ 3. Use mpn_div_q instead of mpn_lshift+mpn_divrem.
*/
#define _GNU_SOURCE /* for DECIMAL_POINT in langinfo.h */
#include "gmp-impl.h"
#include "longlong.h"
-extern const unsigned char __gmp_digit_value_tab[];
+
#define digit_value_tab __gmp_digit_value_tab
/* Compute base^exp and return the most significant prec limbs in rp[].
{
/* not a digit, must be a decimal point */
for (i = 0; i < pointlen; i++)
- if (str[i] != point[i])
- return -1;
+ if (str[i] != point[i])
+ return -1;
if (digit_value[(unsigned char) str[pointlen]] >= (base == 0 ? 10 : base))
return -1;
}
{
int dig;
- for (j = 0; j < pointlen; j++)
- if (str[j] != point[j])
- goto not_point;
- if (1)
+ for (j = 0; j < pointlen; j++)
+ if (str[j] != point[j])
+ goto not_point;
+ if (1)
{
if (dotpos != 0)
{
}
else
{
- not_point:
+ not_point:
dig = digit_value[c];
if (dig >= base)
{
/* This breaks things like 0.000...0001. To safely ignore superfluous
digits, we need to skip over leading zeros. */
/* Just consider the relevant leading digits of the mantissa. */
- n_chars_needed = 2 + (size_t)
- (((size_t) prec * GMP_NUMB_BITS) * mp_bases[base].chars_per_bit_exactly);
+ LIMBS_PER_DIGIT_IN_BASE (n_chars_needed, prec, base);
if (str_size > n_chars_needed)
str_size = n_chars_needed;
#endif
- ma = 2 + (mp_size_t)
- (str_size / (GMP_NUMB_BITS * mp_bases[base].chars_per_bit_exactly));
+ LIMBS_PER_DIGIT_IN_BASE (ma, str_size, base);
mp = TMP_ALLOC_LIMBS (ma);
mn = mpn_set_str (mp, (unsigned char *) begs, str_size, base);
if (divflag)
{
#if 0
- /* FIXME: Should use mpn_tdiv here. */
- mpn_tdiv_qr (qp, mp, 0L, mp, mn, rp, rn);
+ /* FIXME: Should use mpn_div_q here. */
+ ...
+ mpn_div_q (tp, mp, mn, rp, rn, scratch);
+ ...
#else
mp_ptr qp;
mp_limb_t qlimb;
/* mpf_sqrt -- Compute the square root of a float.
-Copyright 1993, 1994, 1996, 2000, 2001, 2004, 2005 Free Software Foundation,
-Inc.
+Copyright 1993, 1994, 1996, 2000, 2001, 2004, 2005, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
TMP_DECL;
usize = u->_mp_size;
- if (usize <= 0)
+ if (UNLIKELY (usize <= 0))
{
if (usize < 0)
SQRT_OF_NEGATIVE;
/* mpf_ui_div -- Divide an unsigned integer with a float.
-Copyright 1993, 1994, 1995, 1996, 2000, 2001, 2002, 2004, 2005 Free Software
-Foundation, Inc.
+Copyright 1993, 1994, 1995, 1996, 2000, 2001, 2002, 2004, 2005, 2012 Free
+Software Foundation, Inc.
This file is part of the GNU MP Library.
vsize = v->_mp_size;
sign_quotient = vsize;
- vsize = ABS (vsize);
- prec = r->_mp_prec;
if (UNLIKELY (vsize == 0))
DIVIDE_BY_ZERO;
return;
}
+ vsize = ABS (vsize);
+ prec = r->_mp_prec;
+
TMP_MARK;
rexp = 1 - v->_mp_exp + 1;
OFILES = @mpn_objects@
-
-# All possible mpn normal and optional function files are listed here, to
-# get automake to generate ansi2knr rules for each. Such rules will be
-# ignored for any that are instead implemented with a .asm (or whatever) for
-# a particular target.
-#
-nodist_EXTRA_libmpn_la_SOURCES = \
- add.c add_1.c add_n.c \
- addmul_1.c addmul_2.c addmul_3.c addmul_4.c addmul_5.c addmul_6.c \
- addmul_7.c addmul_8.c \
- and_n.c andn_n.c \
- cmp.c com.c copyd.c copyi.c \
- dive_1.c diveby3.c divis.c divrem.c divrem_1.c divrem_2.c \
- sbpi1_bdiv_qr.c sbpi1_bdiv_q.c \
- sbpi1_div_qr.c sbpi1_div_q.c sbpi1_divappr_q.c \
- dcpi1_bdiv_qr.c dcpi1_bdiv_q.c \
- dcpi1_div_qr.c dcpi1_div_q.c dcpi1_divappr_q.c \
- dump.c fib2_ui.c gcd.c \
- gcd_1.c gcdext.c get_d.c get_str.c \
- hamdist.c hgcd2.c hgcd.c invert_limb.c \
- ior_n.c iorn_n.c jacbase.c lshift.c \
- matrix22_mul.c mod_1.c mod_34lsub1.c mode1o.c \
- mod_1_1.c mod_1_2.c mod_1_3.c mod_1_4.c \
- mul.c mul_1.c mul_2.c mul_3.c mul_4.c mul_fft.c mul_n.c mul_basecase.c \
- nussbaumer_mul.c \
- toom22_mul.c toom32_mul.c toom42_mul.c toom52_mul.c toom62_mul.c \
- toom33_mul.c toom43_mul.c toom53_mul.c toom63_mul.c \
- toom44_mul.c \
- toom6h_mul.c toom6_sqr.c toom8h_mul.c toom8_sqr.c \
- toom_couple_handling.c \
- toom2_sqr.c toom3_sqr.c toom4_sqr.c \
- toom_eval_dgr3_pm1.c toom_eval_dgr3_pm2.c \
- toom_eval_pm1.c toom_eval_pm1.c toom_eval_pm2exp.c toom_eval_pm2rexp.c \
- toom_interpolate_5pts.c toom_interpolate_6pts.c toom_interpolate_7pts.c \
- toom_interpolate_8pts.c toom_interpolate_12pts.c toom_interpolate_16pts.c \
- invertappr.c invert.c binvert.c mulmod_bnm1.c sqrmod_bnm1.c \
- mullo_n.c mullo_basecase.c nand_n.c neg.c nior_n.c perfsqr.c \
- popcount.c pre_divrem_1.c pre_mod_1.c pow_1.c random.c random2.c rshift.c \
- rootrem.c scan0.c scan1.c set_str.c \
- sqr_basecase.c sqr_diagonal.c \
- sqrtrem.c sub.c sub_1.c sub_n.c submul_1.c \
- tdiv_qr.c udiv_qrnnd.c udiv_w_sdiv.c xor_n.c xnor_n.c
-
noinst_LTLIBRARIES = libmpn.la
nodist_libmpn_la_SOURCES = fib_table.c mp_bases.c
libmpn_la_LIBADD = $(OFILES)
TARG_DIST = a29k alpha arm clipper cray generic i960 ia64 lisp m68k m88k \
minithres mips32 mips64 ns32k pa32 pa64 power powerpc32 powerpc64 pyr \
- s390_32 s390_64 sh sparc32 sparc64 vax x86 x86_64 z8000 z8000x
+ s390_32 s390_64 sh sparc32 sparc64 thumb vax x86 x86_64 z8000
EXTRA_DIST = asm-defs.m4 cpp-ccas m4-ccas $(TARG_DIST)
perfsqr.h:
cd ..; $(MAKE) $(AM_MAKEFLAGS) mpn/perfsqr.h
-tune-gcd-p: gcd.c
- $(COMPILE) -g -O1 -I $(top_srcdir)/tune -DTUNE_GCD_P=1 gcd.c -o tune-gcd-p -L ../.libs -L../tune/.libs -lspeed -lgmp -lm
-
include Makeasm.am
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
VPATH = @srcdir@
+am__make_dryrun = \
+ { \
+ am__dry=no; \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
+ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+ *) \
+ for am__flg in $$MAKEFLAGS; do \
+ case $$am__flg in \
+ *=*|--*) ;; \
+ *n*) am__dry=yes; break;; \
+ esac; \
+ done;; \
+ esac; \
+ test $$am__dry = yes; \
+ }
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
DIST_COMMON = README $(srcdir)/Makeasm.am $(srcdir)/Makefile.am \
$(srcdir)/Makefile.in
subdir = mpn
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/configure.in
+ $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_CLEAN_VPATH_FILES =
LTLIBRARIES = $(noinst_LTLIBRARIES)
am__DEPENDENCIES_1 =
-nodist_libmpn_la_OBJECTS = fib_table$U.lo mp_bases$U.lo
+nodist_libmpn_la_OBJECTS = fib_table.lo mp_bases.lo
libmpn_la_OBJECTS = $(nodist_libmpn_la_OBJECTS)
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
depcomp =
LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
$(LDFLAGS) -o $@
-SOURCES = $(nodist_libmpn_la_SOURCES) \
- $(nodist_EXTRA_libmpn_la_SOURCES)
+SOURCES = $(nodist_libmpn_la_SOURCES)
DIST_SOURCES =
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
STRIP = @STRIP@
TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
U_FOR_BUILD = @U_FOR_BUILD@
VERSION = @VERSION@
WITH_READLINE_01 = @WITH_READLINE_01@
mkdir_p = @mkdir_p@
mpn_objects = @mpn_objects@
mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
-DOPERATION_`echo $* | sed 's/_$$//'`
OFILES = @mpn_objects@
-
-# All possible mpn normal and optional function files are listed here, to
-# get automake to generate ansi2knr rules for each. Such rules will be
-# ignored for any that are instead implemented with a .asm (or whatever) for
-# a particular target.
-#
-nodist_EXTRA_libmpn_la_SOURCES = \
- add.c add_1.c add_n.c \
- addmul_1.c addmul_2.c addmul_3.c addmul_4.c addmul_5.c addmul_6.c \
- addmul_7.c addmul_8.c \
- and_n.c andn_n.c \
- cmp.c com.c copyd.c copyi.c \
- dive_1.c diveby3.c divis.c divrem.c divrem_1.c divrem_2.c \
- sbpi1_bdiv_qr.c sbpi1_bdiv_q.c \
- sbpi1_div_qr.c sbpi1_div_q.c sbpi1_divappr_q.c \
- dcpi1_bdiv_qr.c dcpi1_bdiv_q.c \
- dcpi1_div_qr.c dcpi1_div_q.c dcpi1_divappr_q.c \
- dump.c fib2_ui.c gcd.c \
- gcd_1.c gcdext.c get_d.c get_str.c \
- hamdist.c hgcd2.c hgcd.c invert_limb.c \
- ior_n.c iorn_n.c jacbase.c lshift.c \
- matrix22_mul.c mod_1.c mod_34lsub1.c mode1o.c \
- mod_1_1.c mod_1_2.c mod_1_3.c mod_1_4.c \
- mul.c mul_1.c mul_2.c mul_3.c mul_4.c mul_fft.c mul_n.c mul_basecase.c \
- nussbaumer_mul.c \
- toom22_mul.c toom32_mul.c toom42_mul.c toom52_mul.c toom62_mul.c \
- toom33_mul.c toom43_mul.c toom53_mul.c toom63_mul.c \
- toom44_mul.c \
- toom6h_mul.c toom6_sqr.c toom8h_mul.c toom8_sqr.c \
- toom_couple_handling.c \
- toom2_sqr.c toom3_sqr.c toom4_sqr.c \
- toom_eval_dgr3_pm1.c toom_eval_dgr3_pm2.c \
- toom_eval_pm1.c toom_eval_pm1.c toom_eval_pm2exp.c toom_eval_pm2rexp.c \
- toom_interpolate_5pts.c toom_interpolate_6pts.c toom_interpolate_7pts.c \
- toom_interpolate_8pts.c toom_interpolate_12pts.c toom_interpolate_16pts.c \
- invertappr.c invert.c binvert.c mulmod_bnm1.c sqrmod_bnm1.c \
- mullo_n.c mullo_basecase.c nand_n.c neg.c nior_n.c perfsqr.c \
- popcount.c pre_divrem_1.c pre_mod_1.c pow_1.c random.c random2.c rshift.c \
- rootrem.c scan0.c scan1.c set_str.c \
- sqr_basecase.c sqr_diagonal.c \
- sqrtrem.c sub.c sub_1.c sub_n.c submul_1.c \
- tdiv_qr.c udiv_qrnnd.c udiv_w_sdiv.c xor_n.c xnor_n.c
-
noinst_LTLIBRARIES = libmpn.la
nodist_libmpn_la_SOURCES = fib_table.c mp_bases.c
libmpn_la_LIBADD = $(OFILES)
libmpn_la_DEPENDENCIES = $(OFILES)
TARG_DIST = a29k alpha arm clipper cray generic i960 ia64 lisp m68k m88k \
minithres mips32 mips64 ns32k pa32 pa64 power powerpc32 powerpc64 pyr \
- s390_32 s390_64 sh sparc32 sparc64 vax x86 x86_64 z8000 z8000x
+ s390_32 s390_64 sh sparc32 sparc64 thumb vax x86 x86_64 z8000
EXTRA_DIST = asm-defs.m4 cpp-ccas m4-ccas $(TARG_DIST)
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
+$(srcdir)/Makeasm.am:
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-libmpn.la: $(libmpn_la_OBJECTS) $(libmpn_la_DEPENDENCIES)
+libmpn.la: $(libmpn_la_OBJECTS) $(libmpn_la_DEPENDENCIES) $(EXTRA_libmpn_la_DEPENDENCIES)
$(LINK) $(libmpn_la_OBJECTS) $(libmpn_la_LIBADD) $(LIBS)
mostlyclean-compile:
distclean-compile:
-rm -f *.tab.c
-$(top_builddir)/ansi2knr:
- $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
- -test "$U" = "" || rm -f *_.c
.c.o:
$(COMPILE) -c $<
.c.lo:
$(LTCOMPILE) -c -o $@ $<
-add_.c: add.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/add.c; then echo $(srcdir)/add.c; else echo add.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-add_1_.c: add_1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/add_1.c; then echo $(srcdir)/add_1.c; else echo add_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-add_n_.c: add_n.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/add_n.c; then echo $(srcdir)/add_n.c; else echo add_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-addmul_1_.c: addmul_1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_1.c; then echo $(srcdir)/addmul_1.c; else echo addmul_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-addmul_2_.c: addmul_2.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_2.c; then echo $(srcdir)/addmul_2.c; else echo addmul_2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-addmul_3_.c: addmul_3.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_3.c; then echo $(srcdir)/addmul_3.c; else echo addmul_3.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-addmul_4_.c: addmul_4.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_4.c; then echo $(srcdir)/addmul_4.c; else echo addmul_4.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-addmul_5_.c: addmul_5.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_5.c; then echo $(srcdir)/addmul_5.c; else echo addmul_5.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-addmul_6_.c: addmul_6.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_6.c; then echo $(srcdir)/addmul_6.c; else echo addmul_6.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-addmul_7_.c: addmul_7.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_7.c; then echo $(srcdir)/addmul_7.c; else echo addmul_7.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-addmul_8_.c: addmul_8.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/addmul_8.c; then echo $(srcdir)/addmul_8.c; else echo addmul_8.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-and_n_.c: and_n.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/and_n.c; then echo $(srcdir)/and_n.c; else echo and_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-andn_n_.c: andn_n.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/andn_n.c; then echo $(srcdir)/andn_n.c; else echo andn_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-binvert_.c: binvert.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/binvert.c; then echo $(srcdir)/binvert.c; else echo binvert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmp_.c: cmp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp.c; then echo $(srcdir)/cmp.c; else echo cmp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-com_.c: com.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/com.c; then echo $(srcdir)/com.c; else echo com.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-copyd_.c: copyd.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/copyd.c; then echo $(srcdir)/copyd.c; else echo copyd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-copyi_.c: copyi.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/copyi.c; then echo $(srcdir)/copyi.c; else echo copyi.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dcpi1_bdiv_q_.c: dcpi1_bdiv_q.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dcpi1_bdiv_q.c; then echo $(srcdir)/dcpi1_bdiv_q.c; else echo dcpi1_bdiv_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dcpi1_bdiv_qr_.c: dcpi1_bdiv_qr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dcpi1_bdiv_qr.c; then echo $(srcdir)/dcpi1_bdiv_qr.c; else echo dcpi1_bdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dcpi1_div_q_.c: dcpi1_div_q.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dcpi1_div_q.c; then echo $(srcdir)/dcpi1_div_q.c; else echo dcpi1_div_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dcpi1_div_qr_.c: dcpi1_div_qr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dcpi1_div_qr.c; then echo $(srcdir)/dcpi1_div_qr.c; else echo dcpi1_div_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dcpi1_divappr_q_.c: dcpi1_divappr_q.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dcpi1_divappr_q.c; then echo $(srcdir)/dcpi1_divappr_q.c; else echo dcpi1_divappr_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dive_1_.c: dive_1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dive_1.c; then echo $(srcdir)/dive_1.c; else echo dive_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-diveby3_.c: diveby3.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/diveby3.c; then echo $(srcdir)/diveby3.c; else echo diveby3.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divis_.c: divis.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divis.c; then echo $(srcdir)/divis.c; else echo divis.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divrem_.c: divrem.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem.c; then echo $(srcdir)/divrem.c; else echo divrem.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divrem_1_.c: divrem_1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem_1.c; then echo $(srcdir)/divrem_1.c; else echo divrem_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divrem_2_.c: divrem_2.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem_2.c; then echo $(srcdir)/divrem_2.c; else echo divrem_2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dump_.c: dump.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dump.c; then echo $(srcdir)/dump.c; else echo dump.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fib2_ui_.c: fib2_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fib2_ui.c; then echo $(srcdir)/fib2_ui.c; else echo fib2_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fib_table_.c: fib_table.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fib_table.c; then echo $(srcdir)/fib_table.c; else echo fib_table.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gcd_.c: gcd.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcd.c; then echo $(srcdir)/gcd.c; else echo gcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gcd_1_.c: gcd_1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcd_1.c; then echo $(srcdir)/gcd_1.c; else echo gcd_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gcdext_.c: gcdext.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdext.c; then echo $(srcdir)/gcdext.c; else echo gcdext.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_d_.c: get_d.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_d.c; then echo $(srcdir)/get_d.c; else echo get_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_str_.c: get_str.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_str.c; then echo $(srcdir)/get_str.c; else echo get_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-hamdist_.c: hamdist.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/hamdist.c; then echo $(srcdir)/hamdist.c; else echo hamdist.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-hgcd_.c: hgcd.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/hgcd.c; then echo $(srcdir)/hgcd.c; else echo hgcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-hgcd2_.c: hgcd2.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/hgcd2.c; then echo $(srcdir)/hgcd2.c; else echo hgcd2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-invert_.c: invert.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/invert.c; then echo $(srcdir)/invert.c; else echo invert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-invert_limb_.c: invert_limb.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/invert_limb.c; then echo $(srcdir)/invert_limb.c; else echo invert_limb.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-invertappr_.c: invertappr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/invertappr.c; then echo $(srcdir)/invertappr.c; else echo invertappr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-ior_n_.c: ior_n.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/ior_n.c; then echo $(srcdir)/ior_n.c; else echo ior_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-iorn_n_.c: iorn_n.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iorn_n.c; then echo $(srcdir)/iorn_n.c; else echo iorn_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-jacbase_.c: jacbase.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/jacbase.c; then echo $(srcdir)/jacbase.c; else echo jacbase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-lshift_.c: lshift.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/lshift.c; then echo $(srcdir)/lshift.c; else echo lshift.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-matrix22_mul_.c: matrix22_mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/matrix22_mul.c; then echo $(srcdir)/matrix22_mul.c; else echo matrix22_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mod_1_.c: mod_1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1.c; then echo $(srcdir)/mod_1.c; else echo mod_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mod_1_1_.c: mod_1_1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_1.c; then echo $(srcdir)/mod_1_1.c; else echo mod_1_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mod_1_2_.c: mod_1_2.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_2.c; then echo $(srcdir)/mod_1_2.c; else echo mod_1_2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mod_1_3_.c: mod_1_3.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_3.c; then echo $(srcdir)/mod_1_3.c; else echo mod_1_3.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mod_1_4_.c: mod_1_4.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_4.c; then echo $(srcdir)/mod_1_4.c; else echo mod_1_4.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mod_34lsub1_.c: mod_34lsub1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_34lsub1.c; then echo $(srcdir)/mod_34lsub1.c; else echo mod_34lsub1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mode1o_.c: mode1o.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mode1o.c; then echo $(srcdir)/mode1o.c; else echo mode1o.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mp_bases_.c: mp_bases.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mp_bases.c; then echo $(srcdir)/mp_bases.c; else echo mp_bases.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_.c: mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul.c; then echo $(srcdir)/mul.c; else echo mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_1_.c: mul_1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_1.c; then echo $(srcdir)/mul_1.c; else echo mul_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_2_.c: mul_2.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_2.c; then echo $(srcdir)/mul_2.c; else echo mul_2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_3_.c: mul_3.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_3.c; then echo $(srcdir)/mul_3.c; else echo mul_3.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_4_.c: mul_4.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_4.c; then echo $(srcdir)/mul_4.c; else echo mul_4.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_basecase_.c: mul_basecase.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_basecase.c; then echo $(srcdir)/mul_basecase.c; else echo mul_basecase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_fft_.c: mul_fft.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_fft.c; then echo $(srcdir)/mul_fft.c; else echo mul_fft.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_n_.c: mul_n.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_n.c; then echo $(srcdir)/mul_n.c; else echo mul_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mullo_basecase_.c: mullo_basecase.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mullo_basecase.c; then echo $(srcdir)/mullo_basecase.c; else echo mullo_basecase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mullo_n_.c: mullo_n.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mullo_n.c; then echo $(srcdir)/mullo_n.c; else echo mullo_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mulmod_bnm1_.c: mulmod_bnm1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mulmod_bnm1.c; then echo $(srcdir)/mulmod_bnm1.c; else echo mulmod_bnm1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-nand_n_.c: nand_n.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/nand_n.c; then echo $(srcdir)/nand_n.c; else echo nand_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-neg_.c: neg.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/neg.c; then echo $(srcdir)/neg.c; else echo neg.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-nior_n_.c: nior_n.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/nior_n.c; then echo $(srcdir)/nior_n.c; else echo nior_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-nussbaumer_mul_.c: nussbaumer_mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/nussbaumer_mul.c; then echo $(srcdir)/nussbaumer_mul.c; else echo nussbaumer_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-perfsqr_.c: perfsqr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/perfsqr.c; then echo $(srcdir)/perfsqr.c; else echo perfsqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-popcount_.c: popcount.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/popcount.c; then echo $(srcdir)/popcount.c; else echo popcount.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-pow_1_.c: pow_1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pow_1.c; then echo $(srcdir)/pow_1.c; else echo pow_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-pre_divrem_1_.c: pre_divrem_1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pre_divrem_1.c; then echo $(srcdir)/pre_divrem_1.c; else echo pre_divrem_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-pre_mod_1_.c: pre_mod_1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pre_mod_1.c; then echo $(srcdir)/pre_mod_1.c; else echo pre_mod_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-random_.c: random.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/random.c; then echo $(srcdir)/random.c; else echo random.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-random2_.c: random2.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/random2.c; then echo $(srcdir)/random2.c; else echo random2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-rootrem_.c: rootrem.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/rootrem.c; then echo $(srcdir)/rootrem.c; else echo rootrem.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-rshift_.c: rshift.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/rshift.c; then echo $(srcdir)/rshift.c; else echo rshift.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sbpi1_bdiv_q_.c: sbpi1_bdiv_q.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sbpi1_bdiv_q.c; then echo $(srcdir)/sbpi1_bdiv_q.c; else echo sbpi1_bdiv_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sbpi1_bdiv_qr_.c: sbpi1_bdiv_qr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sbpi1_bdiv_qr.c; then echo $(srcdir)/sbpi1_bdiv_qr.c; else echo sbpi1_bdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sbpi1_div_q_.c: sbpi1_div_q.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sbpi1_div_q.c; then echo $(srcdir)/sbpi1_div_q.c; else echo sbpi1_div_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sbpi1_div_qr_.c: sbpi1_div_qr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sbpi1_div_qr.c; then echo $(srcdir)/sbpi1_div_qr.c; else echo sbpi1_div_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sbpi1_divappr_q_.c: sbpi1_divappr_q.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sbpi1_divappr_q.c; then echo $(srcdir)/sbpi1_divappr_q.c; else echo sbpi1_divappr_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-scan0_.c: scan0.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/scan0.c; then echo $(srcdir)/scan0.c; else echo scan0.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-scan1_.c: scan1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/scan1.c; then echo $(srcdir)/scan1.c; else echo scan1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_str_.c: set_str.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_str.c; then echo $(srcdir)/set_str.c; else echo set_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sqr_basecase_.c: sqr_basecase.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqr_basecase.c; then echo $(srcdir)/sqr_basecase.c; else echo sqr_basecase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sqr_diagonal_.c: sqr_diagonal.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqr_diagonal.c; then echo $(srcdir)/sqr_diagonal.c; else echo sqr_diagonal.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sqrmod_bnm1_.c: sqrmod_bnm1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqrmod_bnm1.c; then echo $(srcdir)/sqrmod_bnm1.c; else echo sqrmod_bnm1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sqrtrem_.c: sqrtrem.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqrtrem.c; then echo $(srcdir)/sqrtrem.c; else echo sqrtrem.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sub_.c: sub.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sub.c; then echo $(srcdir)/sub.c; else echo sub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sub_1_.c: sub_1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sub_1.c; then echo $(srcdir)/sub_1.c; else echo sub_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sub_n_.c: sub_n.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sub_n.c; then echo $(srcdir)/sub_n.c; else echo sub_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-submul_1_.c: submul_1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/submul_1.c; then echo $(srcdir)/submul_1.c; else echo submul_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tdiv_qr_.c: tdiv_qr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_qr.c; then echo $(srcdir)/tdiv_qr.c; else echo tdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom22_mul_.c: toom22_mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom22_mul.c; then echo $(srcdir)/toom22_mul.c; else echo toom22_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom2_sqr_.c: toom2_sqr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom2_sqr.c; then echo $(srcdir)/toom2_sqr.c; else echo toom2_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom32_mul_.c: toom32_mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom32_mul.c; then echo $(srcdir)/toom32_mul.c; else echo toom32_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom33_mul_.c: toom33_mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom33_mul.c; then echo $(srcdir)/toom33_mul.c; else echo toom33_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom3_sqr_.c: toom3_sqr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom3_sqr.c; then echo $(srcdir)/toom3_sqr.c; else echo toom3_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom42_mul_.c: toom42_mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom42_mul.c; then echo $(srcdir)/toom42_mul.c; else echo toom42_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom43_mul_.c: toom43_mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom43_mul.c; then echo $(srcdir)/toom43_mul.c; else echo toom43_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom44_mul_.c: toom44_mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom44_mul.c; then echo $(srcdir)/toom44_mul.c; else echo toom44_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom4_sqr_.c: toom4_sqr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom4_sqr.c; then echo $(srcdir)/toom4_sqr.c; else echo toom4_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom52_mul_.c: toom52_mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom52_mul.c; then echo $(srcdir)/toom52_mul.c; else echo toom52_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom53_mul_.c: toom53_mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom53_mul.c; then echo $(srcdir)/toom53_mul.c; else echo toom53_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom62_mul_.c: toom62_mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom62_mul.c; then echo $(srcdir)/toom62_mul.c; else echo toom62_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom63_mul_.c: toom63_mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom63_mul.c; then echo $(srcdir)/toom63_mul.c; else echo toom63_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom6_sqr_.c: toom6_sqr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom6_sqr.c; then echo $(srcdir)/toom6_sqr.c; else echo toom6_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom6h_mul_.c: toom6h_mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom6h_mul.c; then echo $(srcdir)/toom6h_mul.c; else echo toom6h_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom8_sqr_.c: toom8_sqr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom8_sqr.c; then echo $(srcdir)/toom8_sqr.c; else echo toom8_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom8h_mul_.c: toom8h_mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom8h_mul.c; then echo $(srcdir)/toom8h_mul.c; else echo toom8h_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom_couple_handling_.c: toom_couple_handling.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_couple_handling.c; then echo $(srcdir)/toom_couple_handling.c; else echo toom_couple_handling.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom_eval_dgr3_pm1_.c: toom_eval_dgr3_pm1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_eval_dgr3_pm1.c; then echo $(srcdir)/toom_eval_dgr3_pm1.c; else echo toom_eval_dgr3_pm1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom_eval_dgr3_pm2_.c: toom_eval_dgr3_pm2.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_eval_dgr3_pm2.c; then echo $(srcdir)/toom_eval_dgr3_pm2.c; else echo toom_eval_dgr3_pm2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom_eval_pm1_.c: toom_eval_pm1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_eval_pm1.c; then echo $(srcdir)/toom_eval_pm1.c; else echo toom_eval_pm1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom_eval_pm2exp_.c: toom_eval_pm2exp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_eval_pm2exp.c; then echo $(srcdir)/toom_eval_pm2exp.c; else echo toom_eval_pm2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom_eval_pm2rexp_.c: toom_eval_pm2rexp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_eval_pm2rexp.c; then echo $(srcdir)/toom_eval_pm2rexp.c; else echo toom_eval_pm2rexp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom_interpolate_12pts_.c: toom_interpolate_12pts.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_interpolate_12pts.c; then echo $(srcdir)/toom_interpolate_12pts.c; else echo toom_interpolate_12pts.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom_interpolate_16pts_.c: toom_interpolate_16pts.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_interpolate_16pts.c; then echo $(srcdir)/toom_interpolate_16pts.c; else echo toom_interpolate_16pts.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom_interpolate_5pts_.c: toom_interpolate_5pts.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_interpolate_5pts.c; then echo $(srcdir)/toom_interpolate_5pts.c; else echo toom_interpolate_5pts.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom_interpolate_6pts_.c: toom_interpolate_6pts.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_interpolate_6pts.c; then echo $(srcdir)/toom_interpolate_6pts.c; else echo toom_interpolate_6pts.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom_interpolate_7pts_.c: toom_interpolate_7pts.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_interpolate_7pts.c; then echo $(srcdir)/toom_interpolate_7pts.c; else echo toom_interpolate_7pts.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom_interpolate_8pts_.c: toom_interpolate_8pts.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom_interpolate_8pts.c; then echo $(srcdir)/toom_interpolate_8pts.c; else echo toom_interpolate_8pts.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-udiv_qrnnd_.c: udiv_qrnnd.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/udiv_qrnnd.c; then echo $(srcdir)/udiv_qrnnd.c; else echo udiv_qrnnd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-udiv_w_sdiv_.c: udiv_w_sdiv.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/udiv_w_sdiv.c; then echo $(srcdir)/udiv_w_sdiv.c; else echo udiv_w_sdiv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-xnor_n_.c: xnor_n.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/xnor_n.c; then echo $(srcdir)/xnor_n.c; else echo xnor_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-xor_n_.c: xor_n.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/xor_n.c; then echo $(srcdir)/xor_n.c; else echo xor_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-add_.$(OBJEXT) add_.lo add_1_.$(OBJEXT) add_1_.lo add_n_.$(OBJEXT) \
-add_n_.lo addmul_1_.$(OBJEXT) addmul_1_.lo addmul_2_.$(OBJEXT) \
-addmul_2_.lo addmul_3_.$(OBJEXT) addmul_3_.lo addmul_4_.$(OBJEXT) \
-addmul_4_.lo addmul_5_.$(OBJEXT) addmul_5_.lo addmul_6_.$(OBJEXT) \
-addmul_6_.lo addmul_7_.$(OBJEXT) addmul_7_.lo addmul_8_.$(OBJEXT) \
-addmul_8_.lo and_n_.$(OBJEXT) and_n_.lo andn_n_.$(OBJEXT) andn_n_.lo \
-binvert_.$(OBJEXT) binvert_.lo cmp_.$(OBJEXT) cmp_.lo com_.$(OBJEXT) \
-com_.lo copyd_.$(OBJEXT) copyd_.lo copyi_.$(OBJEXT) copyi_.lo \
-dcpi1_bdiv_q_.$(OBJEXT) dcpi1_bdiv_q_.lo dcpi1_bdiv_qr_.$(OBJEXT) \
-dcpi1_bdiv_qr_.lo dcpi1_div_q_.$(OBJEXT) dcpi1_div_q_.lo \
-dcpi1_div_qr_.$(OBJEXT) dcpi1_div_qr_.lo dcpi1_divappr_q_.$(OBJEXT) \
-dcpi1_divappr_q_.lo dive_1_.$(OBJEXT) dive_1_.lo diveby3_.$(OBJEXT) \
-diveby3_.lo divis_.$(OBJEXT) divis_.lo divrem_.$(OBJEXT) divrem_.lo \
-divrem_1_.$(OBJEXT) divrem_1_.lo divrem_2_.$(OBJEXT) divrem_2_.lo \
-dump_.$(OBJEXT) dump_.lo fib2_ui_.$(OBJEXT) fib2_ui_.lo \
-fib_table_.$(OBJEXT) fib_table_.lo gcd_.$(OBJEXT) gcd_.lo \
-gcd_1_.$(OBJEXT) gcd_1_.lo gcdext_.$(OBJEXT) gcdext_.lo \
-get_d_.$(OBJEXT) get_d_.lo get_str_.$(OBJEXT) get_str_.lo \
-hamdist_.$(OBJEXT) hamdist_.lo hgcd_.$(OBJEXT) hgcd_.lo \
-hgcd2_.$(OBJEXT) hgcd2_.lo invert_.$(OBJEXT) invert_.lo \
-invert_limb_.$(OBJEXT) invert_limb_.lo invertappr_.$(OBJEXT) \
-invertappr_.lo ior_n_.$(OBJEXT) ior_n_.lo iorn_n_.$(OBJEXT) iorn_n_.lo \
-jacbase_.$(OBJEXT) jacbase_.lo lshift_.$(OBJEXT) lshift_.lo \
-matrix22_mul_.$(OBJEXT) matrix22_mul_.lo mod_1_.$(OBJEXT) mod_1_.lo \
-mod_1_1_.$(OBJEXT) mod_1_1_.lo mod_1_2_.$(OBJEXT) mod_1_2_.lo \
-mod_1_3_.$(OBJEXT) mod_1_3_.lo mod_1_4_.$(OBJEXT) mod_1_4_.lo \
-mod_34lsub1_.$(OBJEXT) mod_34lsub1_.lo mode1o_.$(OBJEXT) mode1o_.lo \
-mp_bases_.$(OBJEXT) mp_bases_.lo mul_.$(OBJEXT) mul_.lo \
-mul_1_.$(OBJEXT) mul_1_.lo mul_2_.$(OBJEXT) mul_2_.lo mul_3_.$(OBJEXT) \
-mul_3_.lo mul_4_.$(OBJEXT) mul_4_.lo mul_basecase_.$(OBJEXT) \
-mul_basecase_.lo mul_fft_.$(OBJEXT) mul_fft_.lo mul_n_.$(OBJEXT) \
-mul_n_.lo mullo_basecase_.$(OBJEXT) mullo_basecase_.lo \
-mullo_n_.$(OBJEXT) mullo_n_.lo mulmod_bnm1_.$(OBJEXT) mulmod_bnm1_.lo \
-nand_n_.$(OBJEXT) nand_n_.lo neg_.$(OBJEXT) neg_.lo nior_n_.$(OBJEXT) \
-nior_n_.lo nussbaumer_mul_.$(OBJEXT) nussbaumer_mul_.lo \
-perfsqr_.$(OBJEXT) perfsqr_.lo popcount_.$(OBJEXT) popcount_.lo \
-pow_1_.$(OBJEXT) pow_1_.lo pre_divrem_1_.$(OBJEXT) pre_divrem_1_.lo \
-pre_mod_1_.$(OBJEXT) pre_mod_1_.lo random_.$(OBJEXT) random_.lo \
-random2_.$(OBJEXT) random2_.lo rootrem_.$(OBJEXT) rootrem_.lo \
-rshift_.$(OBJEXT) rshift_.lo sbpi1_bdiv_q_.$(OBJEXT) sbpi1_bdiv_q_.lo \
-sbpi1_bdiv_qr_.$(OBJEXT) sbpi1_bdiv_qr_.lo sbpi1_div_q_.$(OBJEXT) \
-sbpi1_div_q_.lo sbpi1_div_qr_.$(OBJEXT) sbpi1_div_qr_.lo \
-sbpi1_divappr_q_.$(OBJEXT) sbpi1_divappr_q_.lo scan0_.$(OBJEXT) \
-scan0_.lo scan1_.$(OBJEXT) scan1_.lo set_str_.$(OBJEXT) set_str_.lo \
-sqr_basecase_.$(OBJEXT) sqr_basecase_.lo sqr_diagonal_.$(OBJEXT) \
-sqr_diagonal_.lo sqrmod_bnm1_.$(OBJEXT) sqrmod_bnm1_.lo \
-sqrtrem_.$(OBJEXT) sqrtrem_.lo sub_.$(OBJEXT) sub_.lo sub_1_.$(OBJEXT) \
-sub_1_.lo sub_n_.$(OBJEXT) sub_n_.lo submul_1_.$(OBJEXT) submul_1_.lo \
-tdiv_qr_.$(OBJEXT) tdiv_qr_.lo toom22_mul_.$(OBJEXT) toom22_mul_.lo \
-toom2_sqr_.$(OBJEXT) toom2_sqr_.lo toom32_mul_.$(OBJEXT) \
-toom32_mul_.lo toom33_mul_.$(OBJEXT) toom33_mul_.lo \
-toom3_sqr_.$(OBJEXT) toom3_sqr_.lo toom42_mul_.$(OBJEXT) \
-toom42_mul_.lo toom43_mul_.$(OBJEXT) toom43_mul_.lo \
-toom44_mul_.$(OBJEXT) toom44_mul_.lo toom4_sqr_.$(OBJEXT) \
-toom4_sqr_.lo toom52_mul_.$(OBJEXT) toom52_mul_.lo \
-toom53_mul_.$(OBJEXT) toom53_mul_.lo toom62_mul_.$(OBJEXT) \
-toom62_mul_.lo toom63_mul_.$(OBJEXT) toom63_mul_.lo \
-toom6_sqr_.$(OBJEXT) toom6_sqr_.lo toom6h_mul_.$(OBJEXT) \
-toom6h_mul_.lo toom8_sqr_.$(OBJEXT) toom8_sqr_.lo \
-toom8h_mul_.$(OBJEXT) toom8h_mul_.lo toom_couple_handling_.$(OBJEXT) \
-toom_couple_handling_.lo toom_eval_dgr3_pm1_.$(OBJEXT) \
-toom_eval_dgr3_pm1_.lo toom_eval_dgr3_pm2_.$(OBJEXT) \
-toom_eval_dgr3_pm2_.lo toom_eval_pm1_.$(OBJEXT) toom_eval_pm1_.lo \
-toom_eval_pm2exp_.$(OBJEXT) toom_eval_pm2exp_.lo \
-toom_eval_pm2rexp_.$(OBJEXT) toom_eval_pm2rexp_.lo \
-toom_interpolate_12pts_.$(OBJEXT) toom_interpolate_12pts_.lo \
-toom_interpolate_16pts_.$(OBJEXT) toom_interpolate_16pts_.lo \
-toom_interpolate_5pts_.$(OBJEXT) toom_interpolate_5pts_.lo \
-toom_interpolate_6pts_.$(OBJEXT) toom_interpolate_6pts_.lo \
-toom_interpolate_7pts_.$(OBJEXT) toom_interpolate_7pts_.lo \
-toom_interpolate_8pts_.$(OBJEXT) toom_interpolate_8pts_.lo \
-udiv_qrnnd_.$(OBJEXT) udiv_qrnnd_.lo udiv_w_sdiv_.$(OBJEXT) \
-udiv_w_sdiv_.lo xnor_n_.$(OBJEXT) xnor_n_.lo xor_n_.$(OBJEXT) \
-xor_n_.lo : $(ANSI2KNR)
mostlyclean-libtool:
-rm -f *.lo
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
mostlyclean: mostlyclean-am
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
uninstall-am:
-.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+.MAKE: install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
clean-libtool clean-noinstLTLIBRARIES ctags distclean \
install-pdf install-pdf-am install-ps install-ps-am \
install-strip installcheck installcheck-am installdirs \
maintainer-clean maintainer-clean-generic mostlyclean \
- mostlyclean-compile mostlyclean-generic mostlyclean-kr \
- mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
- uninstall-am
+ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+ pdf pdf-am ps ps-am tags uninstall uninstall-am
# These are BUILT_SOURCES at the top-level, so normally they're built before
perfsqr.h:
cd ..; $(MAKE) $(AM_MAKEFLAGS) mpn/perfsqr.h
-tune-gcd-p: gcd.c
- $(COMPILE) -g -O1 -I $(top_srcdir)/tune -DTUNE_GCD_P=1 gcd.c -o tune-gcd-p -L ../.libs -L../tune/.libs -lspeed -lgmp -lm
-
# .s assembler, no preprocessing.
#
.s.o:
dnl Alpha mpn_add_n -- Add two limb vectors of the same length > 0 and
dnl store sum in a third limb vector.
-dnl Copyright 1995, 1999, 2000, 2005 Free Software Foundation, Inc.
+dnl Copyright 1995, 1999, 2000, 2005, 2011 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl size r19
ASM_START()
+PROLOGUE(mpn_add_nc)
+ bis r20,r31,r25
+ br L(com)
+EPILOGUE()
PROLOGUE(mpn_add_n)
bis r31,r31,r25 C clear cy
- subq r19,4,r19 C decr loop cnt
+L(com): subq r19,4,r19 C decr loop cnt
blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop
C Start software pipeline for 1st loop
ldq r0,0(r18)
ldq r1,8(r18)
ldq r5,8(r17)
addq r17,32,r17 C update s1_ptr
+ addq r0,r4,r28 C 1st main add
ldq r2,16(r18)
- addq r0,r4,r20 C 1st main add
+ addq r25,r28,r20 C 1st carry add
ldq r3,24(r18)
- subq r19,4,r19 C decr loop cnt
+ cmpult r28,r4,r8 C compute cy from last add
ldq r6,-16(r17)
- cmpult r20,r0,r25 C compute cy from last add
+ cmpult r20,r28,r25 C compute cy from last add
ldq r7,-8(r17)
+ bis r8,r25,r25 C combine cy from the two adds
+ subq r19,4,r19 C decr loop cnt
addq r1,r5,r28 C 2nd main add
addq r18,32,r18 C update s2_ptr
addq r28,r25,r21 C 2nd carry add
$Lret: bis r25,r31,r0 C return cy
ret r31,(r26),1
-EPILOGUE(mpn_add_n)
+EPILOGUE()
ASM_END()
#define DIVREM_1_NORM_THRESHOLD 0 /* preinv always */
#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_1P_METHOD 2
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 32
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 6
#define MOD_1U_TO_MOD_1_1_THRESHOLD 2
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 7
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 12
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 73
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 78
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 15
#define USE_PREINV_DIVREM_1 1 /* preinv always */
+#define DIV_QR_2_PI2_THRESHOLD 25
#define DIVEXACT_1_THRESHOLD 0 /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD 87
+#define BMOD_1_TO_MOD_1_THRESHOLD 80
-#define MUL_TOOM22_THRESHOLD 16
-#define MUL_TOOM33_THRESHOLD 53
-#define MUL_TOOM44_THRESHOLD 121
-#define MUL_TOOM6H_THRESHOLD 173
+#define MUL_TOOM22_THRESHOLD 14
+#define MUL_TOOM33_THRESHOLD 66
+#define MUL_TOOM44_THRESHOLD 118
+#define MUL_TOOM6H_THRESHOLD 157
#define MUL_TOOM8H_THRESHOLD 236
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 81
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 91
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 84
#define MUL_TOOM42_TO_TOOM53_THRESHOLD 81
#define MUL_TOOM42_TO_TOOM63_THRESHOLD 56
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 66
#define SQR_BASECASE_THRESHOLD 5
-#define SQR_TOOM2_THRESHOLD 28
-#define SQR_TOOM3_THRESHOLD 78
-#define SQR_TOOM4_THRESHOLD 136
-#define SQR_TOOM6_THRESHOLD 180
+#define SQR_TOOM2_THRESHOLD 26
+#define SQR_TOOM3_THRESHOLD 77
+#define SQR_TOOM4_THRESHOLD 130
+#define SQR_TOOM6_THRESHOLD 173
#define SQR_TOOM8_THRESHOLD 260
+#define MULMID_TOOM42_THRESHOLD 20
+
#define MULMOD_BNM1_THRESHOLD 11
-#define SQRMOD_BNM1_THRESHOLD 17
+#define SQRMOD_BNM1_THRESHOLD 13
#define MUL_FFT_MODF_THRESHOLD 244 /* k = 5 */
#define MUL_FFT_TABLE3 \
#define MUL_FFT_TABLE3_SIZE 141
#define MUL_FFT_THRESHOLD 3008
-#define SQR_FFT_MODF_THRESHOLD 220 /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD 212 /* k = 5 */
#define SQR_FFT_TABLE3 \
{ { 220, 5}, { 13, 6}, { 15, 7}, { 8, 6}, \
{ 17, 7}, { 9, 6}, { 19, 7}, { 13, 8}, \
{ 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
{2097152,22}, {4194304,23}, {8388608,24} }
#define SQR_FFT_TABLE3_SIZE 135
-#define SQR_FFT_THRESHOLD 2240
+#define SQR_FFT_THRESHOLD 1984
-#define MULLO_BASECASE_THRESHOLD 0 /* always */
-#define MULLO_DC_THRESHOLD 55
-#define MULLO_MUL_N_THRESHOLD 5558
+#define MULLO_BASECASE_THRESHOLD 2
+#define MULLO_DC_THRESHOLD 50
+#define MULLO_MUL_N_THRESHOLD 5397
-#define DC_DIV_QR_THRESHOLD 55
-#define DC_DIVAPPR_Q_THRESHOLD 192
+#define DC_DIV_QR_THRESHOLD 52
+#define DC_DIVAPPR_Q_THRESHOLD 172
#define DC_BDIV_QR_THRESHOLD 51
-#define DC_BDIV_Q_THRESHOLD 120
+#define DC_BDIV_Q_THRESHOLD 112
-#define INV_MULMOD_BNM1_THRESHOLD 61
-#define INV_NEWTON_THRESHOLD 174
+#define INV_MULMOD_BNM1_THRESHOLD 38
+#define INV_NEWTON_THRESHOLD 179
#define INV_APPR_THRESHOLD 180
-#define BINV_NEWTON_THRESHOLD 199
-#define REDC_1_TO_REDC_N_THRESHOLD 55
+#define BINV_NEWTON_THRESHOLD 197
+#define REDC_1_TO_REDC_N_THRESHOLD 51
-#define MU_DIV_QR_THRESHOLD 979
+#define MU_DIV_QR_THRESHOLD 998
#define MU_DIVAPPR_Q_THRESHOLD 998
#define MUPI_DIV_QR_THRESHOLD 90
-#define MU_BDIV_QR_THRESHOLD 792
-#define MU_BDIV_Q_THRESHOLD 942
-
-#define MATRIX22_STRASSEN_THRESHOLD 15
-#define HGCD_THRESHOLD 94
-#define GCD_DC_THRESHOLD 306
-#define GCDEXT_DC_THRESHOLD 210
-#define JACOBI_BASE_METHOD 2
-
-#define GET_STR_DC_THRESHOLD 16
-#define GET_STR_PRECOMPUTE_THRESHOLD 31
-#define SET_STR_DC_THRESHOLD 422
-#define SET_STR_PRECOMPUTE_THRESHOLD 1524
+#define MU_BDIV_QR_THRESHOLD 807
+#define MU_BDIV_Q_THRESHOLD 1078
+
+#define POWM_SEC_TABLE 2,17,188,393
+
+#define MATRIX22_STRASSEN_THRESHOLD 11
+#define HGCD_THRESHOLD 105
+#define HGCD_APPR_THRESHOLD 105
+#define HGCD_REDUCE_THRESHOLD 1494
+#define GCD_DC_THRESHOLD 285
+#define GCDEXT_DC_THRESHOLD 206
+#define JACOBI_BASE_METHOD 3
+
+#define GET_STR_DC_THRESHOLD 14
+#define GET_STR_PRECOMPUTE_THRESHOLD 29
+#define SET_STR_DC_THRESHOLD 426
+#define SET_STR_PRECOMPUTE_THRESHOLD 1535
+
+#define FAC_DSC_THRESHOLD 1502
+#define FAC_ODD_THRESHOLD 0 /* always */
#define DIVREM_1_NORM_THRESHOLD 0 /* preinv always */
#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_1P_METHOD 2
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
#define MOD_1N_TO_MOD_1_1_THRESHOLD 3
#define MOD_1U_TO_MOD_1_1_THRESHOLD 2
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 8
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 30
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 10
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 17
#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 7
#define USE_PREINV_DIVREM_1 1 /* preinv always */
+#define DIV_QR_2_PI2_THRESHOLD 8
#define DIVEXACT_1_THRESHOLD 0 /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD 18
-
-#define MUL_TOOM22_THRESHOLD 35
-#define MUL_TOOM33_THRESHOLD 74
-#define MUL_TOOM44_THRESHOLD 178
-#define MUL_TOOM6H_THRESHOLD 288
-#define MUL_TOOM8H_THRESHOLD 333
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 75
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 101
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 105
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 105
-
-#define SQR_BASECASE_THRESHOLD 5
-#define SQR_TOOM2_THRESHOLD 61
-#define SQR_TOOM3_THRESHOLD 107
-#define SQR_TOOM4_THRESHOLD 170
-#define SQR_TOOM6_THRESHOLD 309
-#define SQR_TOOM8_THRESHOLD 360
-
-#define MULMOD_BNM1_THRESHOLD 20
+#define BMOD_1_TO_MOD_1_THRESHOLD 19
+
+#define MUL_TOOM22_THRESHOLD 32
+#define MUL_TOOM33_THRESHOLD 105
+#define MUL_TOOM44_THRESHOLD 166
+#define MUL_TOOM6H_THRESHOLD 232
+#define MUL_TOOM8H_THRESHOLD 357
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 96
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 110
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 93
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 113
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 133
+
+#define SQR_BASECASE_THRESHOLD 4
+#define SQR_TOOM2_THRESHOLD 60
+#define SQR_TOOM3_THRESHOLD 102
+#define SQR_TOOM4_THRESHOLD 155
+#define SQR_TOOM6_THRESHOLD 306
+#define SQR_TOOM8_THRESHOLD 333
+
+#define MULMID_TOOM42_THRESHOLD 52
+
+#define MULMOD_BNM1_THRESHOLD 15
#define SQRMOD_BNM1_THRESHOLD 23
-#define MUL_FFT_MODF_THRESHOLD 480 /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD 412 /* k = 5 */
#define MUL_FFT_TABLE3 \
{ { 480, 5}, { 18, 6}, { 10, 5}, { 21, 6}, \
{ 11, 5}, { 23, 6}, { 12, 5}, { 25, 6}, \
#define MUL_FFT_TABLE3_SIZE 151
#define MUL_FFT_THRESHOLD 5760
-#define SQR_FFT_MODF_THRESHOLD 476 /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD 412 /* k = 5 */
#define SQR_FFT_TABLE3 \
{ { 476, 5}, { 19, 6}, { 10, 5}, { 23, 6}, \
{ 12, 5}, { 25, 6}, { 27, 7}, { 14, 6}, \
{ 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
{1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
#define SQR_FFT_TABLE3_SIZE 152
-#define SQR_FFT_THRESHOLD 3136
+#define SQR_FFT_THRESHOLD 4224
#define MULLO_BASECASE_THRESHOLD 0 /* always */
-#define MULLO_DC_THRESHOLD 101
-#define MULLO_MUL_N_THRESHOLD 15604
+#define MULLO_DC_THRESHOLD 113
+#define MULLO_MUL_N_THRESHOLD 11278
-#define DC_DIV_QR_THRESHOLD 119
+#define DC_DIV_QR_THRESHOLD 112
#define DC_DIVAPPR_Q_THRESHOLD 390
#define DC_BDIV_QR_THRESHOLD 110
-#define DC_BDIV_Q_THRESHOLD 318
+#define DC_BDIV_Q_THRESHOLD 286
-#define INV_MULMOD_BNM1_THRESHOLD 79
-#define INV_NEWTON_THRESHOLD 387
-#define INV_APPR_THRESHOLD 381
+#define INV_MULMOD_BNM1_THRESHOLD 62
+#define INV_NEWTON_THRESHOLD 393
+#define INV_APPR_THRESHOLD 375
-#define BINV_NEWTON_THRESHOLD 393
-#define REDC_1_TO_REDC_N_THRESHOLD 110
+#define BINV_NEWTON_THRESHOLD 390
+#define REDC_1_TO_REDC_N_THRESHOLD 124
-#define MU_DIV_QR_THRESHOLD 1718
-#define MU_DIVAPPR_Q_THRESHOLD 1895
-#define MUPI_DIV_QR_THRESHOLD 180
-#define MU_BDIV_QR_THRESHOLD 1387
+#define MU_DIV_QR_THRESHOLD 1652
+#define MU_DIVAPPR_Q_THRESHOLD 1685
+#define MUPI_DIV_QR_THRESHOLD 171
+#define MU_BDIV_QR_THRESHOLD 1470
#define MU_BDIV_Q_THRESHOLD 1652
-#define MATRIX22_STRASSEN_THRESHOLD 17
-#define HGCD_THRESHOLD 282
-#define GCD_DC_THRESHOLD 1138
-#define GCDEXT_DC_THRESHOLD 773
+#define POWM_SEC_TABLE 2,23,88,387,961,2578
+
+#define MATRIX22_STRASSEN_THRESHOLD 15
+#define HGCD_THRESHOLD 278
+#define HGCD_APPR_THRESHOLD 357
+#define HGCD_REDUCE_THRESHOLD 2899
+#define GCD_DC_THRESHOLD 1258
+#define GCDEXT_DC_THRESHOLD 777
#define JACOBI_BASE_METHOD 3
-#define GET_STR_DC_THRESHOLD 14
-#define GET_STR_PRECOMPUTE_THRESHOLD 19
-#define SET_STR_DC_THRESHOLD 3754
-#define SET_STR_PRECOMPUTE_THRESHOLD 8097
+#define GET_STR_DC_THRESHOLD 15
+#define GET_STR_PRECOMPUTE_THRESHOLD 24
+#define SET_STR_DC_THRESHOLD 3866
+#define SET_STR_PRECOMPUTE_THRESHOLD 7708
+
+#define FAC_DSC_THRESHOLD 1025
+#define FAC_ODD_THRESHOLD 24
--- /dev/null
+dnl Alpha mpn_mod_1s_4p
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2009, 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C TODO:
+C * Optimise. 2.75 c/l should be possible.
+C * Write a proper mpn_mod_1s_4p_cps. The code below was compiler generated.
+C * Optimise feed-in code, starting the sw pipeline in switch code.
+C * Shorten software pipeline. The mul instructions are scheduled too far
+C from their users. Fixing this will allow us to use fewer registers.
+C * If we cannot reduce register usage, write perhaps small-n basecase.
+C * Does this work for PIC?
+
+C cycles/limb
+C EV4: ?
+C EV5: 23
+C EV6: 3
+
+define(`ap', `r16')
+define(`n', `r17')
+define(`pl', `r24')
+define(`ph', `r25')
+define(`rl', `r6')
+define(`rh', `r7')
+define(`B1modb', `r1')
+define(`B2modb', `r2')
+define(`B3modb', `r3')
+define(`B4modb', `r4')
+define(`B5modb', `r5')
+
+ASM_START()
+PROLOGUE(mpn_mod_1s_4p)
+ lda r30, -64(r30)
+ stq r9, 8(r30)
+ ldq B1modb, 16(r19)
+ stq r10, 16(r30)
+ ldq B2modb, 24(r19)
+ stq r11, 24(r30)
+ ldq B3modb, 32(r19)
+ stq r12, 32(r30)
+ ldq B4modb, 40(r19)
+ stq r13, 40(r30)
+ ldq B5modb, 48(r19)
+ s8addq n, ap, ap C point ap at vector end
+
+ and n, 3, r0
+ lda n, -4(n)
+ beq r0, L(b0)
+ lda r6, -2(r0)
+ blt r6, L(b1)
+ beq r6, L(b2)
+
+L(b3): ldq r21, -16(ap)
+ ldq r22, -8(ap)
+ ldq r20, -24(ap)
+ mulq r21, B1modb, r8
+ umulh r21, B1modb, r12
+ mulq r22, B2modb, r9
+ umulh r22, B2modb, r13
+ addq r8, r20, pl
+ cmpult pl, r8, r0
+ addq r0, r12, ph
+ addq r9, pl, rl
+ cmpult rl, r9, r0
+ addq r13, ph, ph
+ addq r0, ph, rh
+ lda ap, -56(ap)
+ br L(com)
+
+L(b0): ldq r21, -24(ap)
+ ldq r22, -16(ap)
+ ldq r23, -8(ap)
+ ldq r20, -32(ap)
+ mulq r21, B1modb, r8
+ umulh r21, B1modb, r12
+ mulq r22, B2modb, r9
+ umulh r22, B2modb, r13
+ mulq r23, B3modb, r10
+ umulh r23, B3modb, r27
+ addq r8, r20, pl
+ cmpult pl, r8, r0
+ addq r0, r12, ph
+ addq r9, pl, pl
+ cmpult pl, r9, r0
+ addq r13, ph, ph
+ addq r0, ph, ph
+ addq r10, pl, rl
+ cmpult rl, r10, r0
+ addq r27, ph, ph
+ addq r0, ph, rh
+ lda ap, -64(ap)
+ br L(com)
+
+L(b1): bis r31, r31, rh
+ ldq rl, -8(ap)
+ lda ap, -40(ap)
+ br L(com)
+
+L(b2): ldq rh, -8(ap)
+ ldq rl, -16(ap)
+ lda ap, -48(ap)
+
+L(com): ble n, L(ed3)
+ ldq r21, 8(ap)
+ ldq r22, 16(ap)
+ ldq r23, 24(ap)
+ ldq r20, 0(ap)
+ lda n, -4(n)
+ lda ap, -32(ap)
+ mulq r21, B1modb, r8
+ umulh r21, B1modb, r12
+ mulq r22, B2modb, r9
+ umulh r22, B2modb, r13
+ mulq r23, B3modb, r10
+ umulh r23, B3modb, r27
+ mulq rl, B4modb, r11
+ umulh rl, B4modb, r28
+ ble n, L(ed2)
+
+ ALIGN(16)
+L(top): ldq r21, 8(ap)
+ mulq rh, B5modb, rl
+ addq r8, r20, pl
+ ldq r22, 16(ap)
+ cmpult pl, r8, r0
+ umulh rh, B5modb, rh
+ ldq r23, 24(ap)
+ addq r0, r12, ph
+ addq r9, pl, pl
+ mulq r21, B1modb, r8
+ cmpult pl, r9, r0
+ addq r13, ph, ph
+ umulh r21, B1modb, r12
+ lda ap, -32(ap)
+ addq r0, ph, ph
+ addq r10, pl, pl
+ mulq r22, B2modb, r9
+ cmpult pl, r10, r0
+ addq r27, ph, ph
+ addq r11, pl, pl
+ umulh r22, B2modb, r13
+ addq r0, ph, ph
+ cmpult pl, r11, r0
+ addq r28, ph, ph
+ mulq r23, B3modb, r10
+ ldq r20, 32(ap)
+ addq pl, rl, rl
+ umulh r23, B3modb, r27
+ addq r0, ph, ph
+ cmpult rl, pl, r0
+ mulq rl, B4modb, r11
+ addq ph, rh, rh
+ umulh rl, B4modb, r28
+ addq r0, rh, rh
+ lda n, -4(n)
+ bgt n, L(top)
+
+L(ed2): mulq rh, B5modb, rl
+ addq r8, r20, pl
+ umulh rh, B5modb, rh
+ cmpult pl, r8, r0
+ addq r0, r12, ph
+ addq r9, pl, pl
+ cmpult pl, r9, r0
+ addq r13, ph, ph
+ addq r0, ph, ph
+ addq r10, pl, pl
+ cmpult pl, r10, r0
+ addq r27, ph, ph
+ addq r11, pl, pl
+ addq r0, ph, ph
+ cmpult pl, r11, r0
+ addq r28, ph, ph
+ addq pl, rl, rl
+ addq r0, ph, ph
+ cmpult rl, pl, r0
+ addq ph, rh, rh
+ addq r0, rh, rh
+
+L(ed3): mulq rh, B1modb, r8
+ umulh rh, B1modb, rh
+ addq r8, rl, rl
+ cmpult rl, r8, r0
+ addq r0, rh, rh
+
+ ldq r24, 8(r19) C cnt
+ sll rh, r24, rh
+ subq r31, r24, r25
+ srl rl, r25, r2
+ sll rl, r24, rl
+ or r2, rh, rh
+
+ ldq r23, 0(r19) C bi
+ mulq rh, r23, r8
+ umulh rh, r23, r9
+ addq rh, 1, r7
+ addq r8, rl, r8 C ql
+ cmpult r8, rl, r0
+ addq r9, r7, r9
+ addq r0, r9, r9 C qh
+ mulq r9, r18, r21 C qh * b
+ subq rl, r21, rl
+ cmpult r8, rl, r0 C rl > ql
+ negq r0, r0
+ and r0, r18, r0
+ addq rl, r0, rl
+ cmpule r18, rl, r0 C rl >= b
+ negq r0, r0
+ and r0, r18, r0
+ subq rl, r0, rl
+
+ srl rl, r24, r0
+
+ ldq r9, 8(r30)
+ ldq r10, 16(r30)
+ ldq r11, 24(r30)
+ ldq r12, 32(r30)
+ ldq r13, 40(r30)
+ lda r30, 64(r30)
+ ret r31, (r26), 1
+EPILOGUE()
+
+PROLOGUE(mpn_mod_1s_4p_cps,gp)
+ lda r30, -32(r30)
+ stq r26, 0(r30)
+ stq r9, 8(r30)
+ stq r10, 16(r30)
+ stq r11, 24(r30)
+ mov r16, r11
+ LEA( r4, __clz_tab)
+ lda r10, 65(r31)
+ cmpbge r31, r17, r1
+ srl r1, 1, r1
+ xor r1, 127, r1
+ addq r1, r4, r1
+ ldq_u r2, 0(r1)
+ extbl r2, r1, r2
+ s8subq r2, 7, r2
+ srl r17, r2, r3
+ subq r10, r2, r10
+ addq r3, r4, r3
+ ldq_u r1, 0(r3)
+ extbl r1, r3, r1
+ subq r10, r1, r10
+ sll r17, r10, r9
+ mov r9, r16
+ jsr r26, mpn_invert_limb
+ ldah r29, 0(r26)
+ subq r31, r10, r2
+ lda r1, 1(r31)
+ sll r1, r10, r1
+ subq r31, r9, r3
+ srl r0, r2, r2
+ ldq r26, 0(r30)
+ bis r2, r1, r2
+ lda r29, 0(r29)
+ stq r0, 0(r11)
+ stq r10, 8(r11)
+ mulq r2, r3, r2
+ srl r2, r10, r3
+ umulh r2, r0, r1
+ stq r3, 16(r11)
+ mulq r2, r0, r3
+ ornot r31, r1, r1
+ subq r1, r2, r1
+ mulq r1, r9, r1
+ addq r1, r9, r2
+ cmpule r1, r3, r3
+ cmoveq r3, r2, r1
+ srl r1, r10, r3
+ umulh r1, r0, r2
+ stq r3, 24(r11)
+ mulq r1, r0, r3
+ ornot r31, r2, r2
+ subq r2, r1, r2
+ mulq r2, r9, r2
+ addq r2, r9, r1
+ cmpule r2, r3, r3
+ cmoveq r3, r1, r2
+ srl r2, r10, r1
+ umulh r2, r0, r3
+ stq r1, 32(r11)
+ mulq r2, r0, r1
+ ornot r31, r3, r3
+ subq r3, r2, r3
+ mulq r3, r9, r3
+ addq r3, r9, r2
+ cmpule r3, r1, r1
+ cmoveq r1, r2, r3
+ srl r3, r10, r2
+ umulh r3, r0, r1
+ stq r2, 40(r11)
+ mulq r3, r0, r0
+ ornot r31, r1, r1
+ subq r1, r3, r1
+ mulq r1, r9, r1
+ addq r1, r9, r9
+ cmpule r1, r0, r0
+ cmoveq r0, r9, r1
+ ldq r9, 8(r30)
+ srl r1, r10, r1
+ ldq r10, 16(r30)
+ stq r1, 48(r11)
+ ldq r11, 24(r30)
+ lda r30, 32(r30)
+ ret r31, (r26), 1
+EPILOGUE()
#!/usr/bin/perl -w
-# Copyright 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2003, 2004, 2005, 2011 Free Software Foundation, Inc.
#
# This file is part of the GNU MP Library.
#
(
'addq' => 'E',
'and' => 'E',
+ 'andnot' => 'E',
'beq' => 'U',
'bge' => 'U',
'bgt' => 'U',
+ 'bic' => 'E',
+ 'bis' => 'E',
'blt' => 'U',
'bne' => 'U',
'br' => 'L',
'ldt' => 'L',
'ret' => 'L',
'mov' => 'E',
+ 'mull' => 'U',
'mulq' => 'U',
'negq' => 'E',
'nop' => 'E',
dnl Alpha mpn_invert_limb -- Invert a normalized limb.
-dnl Copyright 1996, 2000, 2001, 2002, 2003, 2007 Free Software Foundation,
-dnl Inc.
+dnl Copyright 1996, 2000, 2001, 2002, 2003, 2007, 2011 Free Software
+dnl Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
include(`../config.m4')
C cycles/limb
-C EV4: ~175
-C EV5: ~111-126
-C EV6: ~52-76
+C EV4: ?
+C EV5: 137/140 (with BWX/without BWX)
+C EV6: 71/72 (with BWX/without BWX)
-C This is based on ideas of Peter L. Montgomery.
+C This was compiler generated, with minimal manual edits. Surely several
+C cycles could be cut with some thought.
ASM_START()
-
-FLOAT64($C36,9223372036854775808.0) C 2^63
-
PROLOGUE(mpn_invert_limb,gp)
- lda r30,-16(r30)
- addq r16,r16,r1
- bne r1,$73
- lda r0,-1
- br r31,$Lend
-$73:
- srl r16,1,r1
- stq r1,0(r30)
- ldt f11,0(r30)
- cvtqt f11,f1
- LEA(r1,$C36)
- ldt f10,0(r1) C f10 = 2^63
- divt f10,f1,f10 C f10 = 2^63 / (u / 2)
- LEA(r2,$invtab-4096)
- srl r16,52,r1 C extract high 12 bits
- addq r1,r1,r1 C align ...0000bbbbbbbb0
- addq r1,r2,r1 C compute array offset
- ldq_u r2,0(r1) C load quadword containing our 16 bits
-bigend(`addq r1,1,r1')
- extwl r2,r1,r2 C extract desired 16 bits
- sll r2,48,r0
- umulh r16,r0,r1
- addq r16,r1,r3
- stq r3,0(r30)
- ldt f11,0(r30)
- cvtqt f11,f1
- mult f1,f10,f1
- cvttqc f1,f1
- stt f1,0(r30)
- ldq r4,0(r30)
- subq r0,r4,r0
- umulh r16,r0,r1
- mulq r16,r0,r2
- addq r16,r1,r3
- bge r3,$Loop2
-$Loop1: addq r2,r16,r2
- cmpult r2,r16,r1
- addq r3,r1,r3
- addq r0,1,r0
- blt r3,$Loop1
-$Loop2: cmpult r2,r16,r1
- subq r0,1,r0
- subq r3,r1,r3
- subq r2,r16,r2
- bge r3,$Loop2
-$Lend:
- lda r30,16(r30)
- ret r31,(r26),1
-EPILOGUE(mpn_invert_limb)
-DATASTART($invtab)
- .word 0xffff,0xffc0,0xff80,0xff40,0xff00,0xfec0,0xfe81,0xfe41
- .word 0xfe01,0xfdc2,0xfd83,0xfd43,0xfd04,0xfcc5,0xfc86,0xfc46
- .word 0xfc07,0xfbc8,0xfb8a,0xfb4b,0xfb0c,0xfacd,0xfa8e,0xfa50
- .word 0xfa11,0xf9d3,0xf994,0xf956,0xf918,0xf8d9,0xf89b,0xf85d
- .word 0xf81f,0xf7e1,0xf7a3,0xf765,0xf727,0xf6ea,0xf6ac,0xf66e
- .word 0xf631,0xf5f3,0xf5b6,0xf578,0xf53b,0xf4fd,0xf4c0,0xf483
- .word 0xf446,0xf409,0xf3cc,0xf38f,0xf352,0xf315,0xf2d8,0xf29c
- .word 0xf25f,0xf222,0xf1e6,0xf1a9,0xf16d,0xf130,0xf0f4,0xf0b8
- .word 0xf07c,0xf03f,0xf003,0xefc7,0xef8b,0xef4f,0xef14,0xeed8
- .word 0xee9c,0xee60,0xee25,0xede9,0xedae,0xed72,0xed37,0xecfb
- .word 0xecc0,0xec85,0xec4a,0xec0e,0xebd3,0xeb98,0xeb5d,0xeb22
- .word 0xeae8,0xeaad,0xea72,0xea37,0xe9fd,0xe9c2,0xe988,0xe94d
- .word 0xe913,0xe8d8,0xe89e,0xe864,0xe829,0xe7ef,0xe7b5,0xe77b
- .word 0xe741,0xe707,0xe6cd,0xe694,0xe65a,0xe620,0xe5e6,0xe5ad
- .word 0xe573,0xe53a,0xe500,0xe4c7,0xe48d,0xe454,0xe41b,0xe3e2
- .word 0xe3a9,0xe370,0xe336,0xe2fd,0xe2c5,0xe28c,0xe253,0xe21a
- .word 0xe1e1,0xe1a9,0xe170,0xe138,0xe0ff,0xe0c7,0xe08e,0xe056
- .word 0xe01e,0xdfe5,0xdfad,0xdf75,0xdf3d,0xdf05,0xdecd,0xde95
- .word 0xde5d,0xde25,0xdded,0xddb6,0xdd7e,0xdd46,0xdd0f,0xdcd7
- .word 0xdca0,0xdc68,0xdc31,0xdbf9,0xdbc2,0xdb8b,0xdb54,0xdb1d
- .word 0xdae6,0xdaae,0xda78,0xda41,0xda0a,0xd9d3,0xd99c,0xd965
- .word 0xd92f,0xd8f8,0xd8c1,0xd88b,0xd854,0xd81e,0xd7e8,0xd7b1
- .word 0xd77b,0xd745,0xd70e,0xd6d8,0xd6a2,0xd66c,0xd636,0xd600
- .word 0xd5ca,0xd594,0xd55f,0xd529,0xd4f3,0xd4bd,0xd488,0xd452
- .word 0xd41d,0xd3e7,0xd3b2,0xd37c,0xd347,0xd312,0xd2dd,0xd2a7
- .word 0xd272,0xd23d,0xd208,0xd1d3,0xd19e,0xd169,0xd134,0xd100
- .word 0xd0cb,0xd096,0xd061,0xd02d,0xcff8,0xcfc4,0xcf8f,0xcf5b
- .word 0xcf26,0xcef2,0xcebe,0xce89,0xce55,0xce21,0xcded,0xcdb9
- .word 0xcd85,0xcd51,0xcd1d,0xcce9,0xccb5,0xcc81,0xcc4e,0xcc1a
- .word 0xcbe6,0xcbb3,0xcb7f,0xcb4c,0xcb18,0xcae5,0xcab1,0xca7e
- .word 0xca4b,0xca17,0xc9e4,0xc9b1,0xc97e,0xc94b,0xc918,0xc8e5
- .word 0xc8b2,0xc87f,0xc84c,0xc819,0xc7e7,0xc7b4,0xc781,0xc74f
- .word 0xc71c,0xc6e9,0xc6b7,0xc684,0xc652,0xc620,0xc5ed,0xc5bb
- .word 0xc589,0xc557,0xc524,0xc4f2,0xc4c0,0xc48e,0xc45c,0xc42a
- .word 0xc3f8,0xc3c7,0xc395,0xc363,0xc331,0xc300,0xc2ce,0xc29c
- .word 0xc26b,0xc239,0xc208,0xc1d6,0xc1a5,0xc174,0xc142,0xc111
- .word 0xc0e0,0xc0af,0xc07e,0xc04d,0xc01c,0xbfeb,0xbfba,0xbf89
- .word 0xbf58,0xbf27,0xbef6,0xbec5,0xbe95,0xbe64,0xbe33,0xbe03
- .word 0xbdd2,0xbda2,0xbd71,0xbd41,0xbd10,0xbce0,0xbcb0,0xbc80
- .word 0xbc4f,0xbc1f,0xbbef,0xbbbf,0xbb8f,0xbb5f,0xbb2f,0xbaff
- .word 0xbacf,0xba9f,0xba6f,0xba40,0xba10,0xb9e0,0xb9b1,0xb981
- .word 0xb951,0xb922,0xb8f2,0xb8c3,0xb894,0xb864,0xb835,0xb806
- .word 0xb7d6,0xb7a7,0xb778,0xb749,0xb71a,0xb6eb,0xb6bc,0xb68d
- .word 0xb65e,0xb62f,0xb600,0xb5d1,0xb5a2,0xb574,0xb545,0xb516
- .word 0xb4e8,0xb4b9,0xb48a,0xb45c,0xb42e,0xb3ff,0xb3d1,0xb3a2
- .word 0xb374,0xb346,0xb318,0xb2e9,0xb2bb,0xb28d,0xb25f,0xb231
- .word 0xb203,0xb1d5,0xb1a7,0xb179,0xb14b,0xb11d,0xb0f0,0xb0c2
- .word 0xb094,0xb067,0xb039,0xb00b,0xafde,0xafb0,0xaf83,0xaf55
- .word 0xaf28,0xaefb,0xaecd,0xaea0,0xae73,0xae45,0xae18,0xadeb
- .word 0xadbe,0xad91,0xad64,0xad37,0xad0a,0xacdd,0xacb0,0xac83
- .word 0xac57,0xac2a,0xabfd,0xabd0,0xaba4,0xab77,0xab4a,0xab1e
- .word 0xaaf1,0xaac5,0xaa98,0xaa6c,0xaa40,0xaa13,0xa9e7,0xa9bb
- .word 0xa98e,0xa962,0xa936,0xa90a,0xa8de,0xa8b2,0xa886,0xa85a
- .word 0xa82e,0xa802,0xa7d6,0xa7aa,0xa77e,0xa753,0xa727,0xa6fb
- .word 0xa6d0,0xa6a4,0xa678,0xa64d,0xa621,0xa5f6,0xa5ca,0xa59f
- .word 0xa574,0xa548,0xa51d,0xa4f2,0xa4c6,0xa49b,0xa470,0xa445
- .word 0xa41a,0xa3ef,0xa3c4,0xa399,0xa36e,0xa343,0xa318,0xa2ed
- .word 0xa2c2,0xa297,0xa26d,0xa242,0xa217,0xa1ed,0xa1c2,0xa197
- .word 0xa16d,0xa142,0xa118,0xa0ed,0xa0c3,0xa098,0xa06e,0xa044
- .word 0xa01a,0x9fef,0x9fc5,0x9f9b,0x9f71,0x9f47,0x9f1c,0x9ef2
- .word 0x9ec8,0x9e9e,0x9e74,0x9e4b,0x9e21,0x9df7,0x9dcd,0x9da3
- .word 0x9d79,0x9d50,0x9d26,0x9cfc,0x9cd3,0x9ca9,0x9c80,0x9c56
- .word 0x9c2d,0x9c03,0x9bda,0x9bb0,0x9b87,0x9b5e,0x9b34,0x9b0b
- .word 0x9ae2,0x9ab9,0x9a8f,0x9a66,0x9a3d,0x9a14,0x99eb,0x99c2
- .word 0x9999,0x9970,0x9947,0x991e,0x98f6,0x98cd,0x98a4,0x987b
- .word 0x9852,0x982a,0x9801,0x97d8,0x97b0,0x9787,0x975f,0x9736
- .word 0x970e,0x96e5,0x96bd,0x9695,0x966c,0x9644,0x961c,0x95f3
- .word 0x95cb,0x95a3,0x957b,0x9553,0x952b,0x9503,0x94db,0x94b3
- .word 0x948b,0x9463,0x943b,0x9413,0x93eb,0x93c3,0x939b,0x9374
- .word 0x934c,0x9324,0x92fd,0x92d5,0x92ad,0x9286,0x925e,0x9237
- .word 0x920f,0x91e8,0x91c0,0x9199,0x9172,0x914a,0x9123,0x90fc
- .word 0x90d4,0x90ad,0x9086,0x905f,0x9038,0x9011,0x8fea,0x8fc3
- .word 0x8f9c,0x8f75,0x8f4e,0x8f27,0x8f00,0x8ed9,0x8eb2,0x8e8b
- .word 0x8e65,0x8e3e,0x8e17,0x8df1,0x8dca,0x8da3,0x8d7d,0x8d56
- .word 0x8d30,0x8d09,0x8ce3,0x8cbc,0x8c96,0x8c6f,0x8c49,0x8c23
- .word 0x8bfc,0x8bd6,0x8bb0,0x8b8a,0x8b64,0x8b3d,0x8b17,0x8af1
- .word 0x8acb,0x8aa5,0x8a7f,0x8a59,0x8a33,0x8a0d,0x89e7,0x89c1
- .word 0x899c,0x8976,0x8950,0x892a,0x8904,0x88df,0x88b9,0x8893
- .word 0x886e,0x8848,0x8823,0x87fd,0x87d8,0x87b2,0x878d,0x8767
- .word 0x8742,0x871d,0x86f7,0x86d2,0x86ad,0x8687,0x8662,0x863d
- .word 0x8618,0x85f3,0x85ce,0x85a9,0x8583,0x855e,0x8539,0x8514
- .word 0x84f0,0x84cb,0x84a6,0x8481,0x845c,0x8437,0x8412,0x83ee
- .word 0x83c9,0x83a4,0x8380,0x835b,0x8336,0x8312,0x82ed,0x82c9
- .word 0x82a4,0x8280,0x825b,0x8237,0x8212,0x81ee,0x81ca,0x81a5
- .word 0x8181,0x815d,0x8138,0x8114,0x80f0,0x80cc,0x80a8,0x8084
- .word 0x8060,0x803c,0x8018,0x7ff4,0x7fd0,0x7fac,0x7f88,0x7f64
- .word 0x7f40,0x7f1c,0x7ef8,0x7ed4,0x7eb1,0x7e8d,0x7e69,0x7e45
- .word 0x7e22,0x7dfe,0x7ddb,0x7db7,0x7d93,0x7d70,0x7d4c,0x7d29
- .word 0x7d05,0x7ce2,0x7cbf,0x7c9b,0x7c78,0x7c55,0x7c31,0x7c0e
- .word 0x7beb,0x7bc7,0x7ba4,0x7b81,0x7b5e,0x7b3b,0x7b18,0x7af5
- .word 0x7ad2,0x7aaf,0x7a8c,0x7a69,0x7a46,0x7a23,0x7a00,0x79dd
- .word 0x79ba,0x7997,0x7975,0x7952,0x792f,0x790c,0x78ea,0x78c7
- .word 0x78a4,0x7882,0x785f,0x783c,0x781a,0x77f7,0x77d5,0x77b2
- .word 0x7790,0x776e,0x774b,0x7729,0x7706,0x76e4,0x76c2,0x76a0
- .word 0x767d,0x765b,0x7639,0x7617,0x75f5,0x75d2,0x75b0,0x758e
- .word 0x756c,0x754a,0x7528,0x7506,0x74e4,0x74c2,0x74a0,0x747e
- .word 0x745d,0x743b,0x7419,0x73f7,0x73d5,0x73b4,0x7392,0x7370
- .word 0x734f,0x732d,0x730b,0x72ea,0x72c8,0x72a7,0x7285,0x7264
- .word 0x7242,0x7221,0x71ff,0x71de,0x71bc,0x719b,0x717a,0x7158
- .word 0x7137,0x7116,0x70f5,0x70d3,0x70b2,0x7091,0x7070,0x704f
- .word 0x702e,0x700c,0x6feb,0x6fca,0x6fa9,0x6f88,0x6f67,0x6f46
- .word 0x6f26,0x6f05,0x6ee4,0x6ec3,0x6ea2,0x6e81,0x6e60,0x6e40
- .word 0x6e1f,0x6dfe,0x6dde,0x6dbd,0x6d9c,0x6d7c,0x6d5b,0x6d3a
- .word 0x6d1a,0x6cf9,0x6cd9,0x6cb8,0x6c98,0x6c77,0x6c57,0x6c37
- .word 0x6c16,0x6bf6,0x6bd6,0x6bb5,0x6b95,0x6b75,0x6b54,0x6b34
- .word 0x6b14,0x6af4,0x6ad4,0x6ab4,0x6a94,0x6a73,0x6a53,0x6a33
- .word 0x6a13,0x69f3,0x69d3,0x69b3,0x6993,0x6974,0x6954,0x6934
- .word 0x6914,0x68f4,0x68d4,0x68b5,0x6895,0x6875,0x6855,0x6836
- .word 0x6816,0x67f6,0x67d7,0x67b7,0x6798,0x6778,0x6758,0x6739
- .word 0x6719,0x66fa,0x66db,0x66bb,0x669c,0x667c,0x665d,0x663e
- .word 0x661e,0x65ff,0x65e0,0x65c0,0x65a1,0x6582,0x6563,0x6544
- .word 0x6524,0x6505,0x64e6,0x64c7,0x64a8,0x6489,0x646a,0x644b
- .word 0x642c,0x640d,0x63ee,0x63cf,0x63b0,0x6391,0x6373,0x6354
- .word 0x6335,0x6316,0x62f7,0x62d9,0x62ba,0x629b,0x627c,0x625e
- .word 0x623f,0x6221,0x6202,0x61e3,0x61c5,0x61a6,0x6188,0x6169
- .word 0x614b,0x612c,0x610e,0x60ef,0x60d1,0x60b3,0x6094,0x6076
- .word 0x6058,0x6039,0x601b,0x5ffd,0x5fdf,0x5fc0,0x5fa2,0x5f84
- .word 0x5f66,0x5f48,0x5f2a,0x5f0b,0x5eed,0x5ecf,0x5eb1,0x5e93
- .word 0x5e75,0x5e57,0x5e39,0x5e1b,0x5dfd,0x5de0,0x5dc2,0x5da4
- .word 0x5d86,0x5d68,0x5d4a,0x5d2d,0x5d0f,0x5cf1,0x5cd3,0x5cb6
- .word 0x5c98,0x5c7a,0x5c5d,0x5c3f,0x5c21,0x5c04,0x5be6,0x5bc9
- .word 0x5bab,0x5b8e,0x5b70,0x5b53,0x5b35,0x5b18,0x5afb,0x5add
- .word 0x5ac0,0x5aa2,0x5a85,0x5a68,0x5a4b,0x5a2d,0x5a10,0x59f3
- .word 0x59d6,0x59b8,0x599b,0x597e,0x5961,0x5944,0x5927,0x590a
- .word 0x58ed,0x58d0,0x58b3,0x5896,0x5879,0x585c,0x583f,0x5822
- .word 0x5805,0x57e8,0x57cb,0x57ae,0x5791,0x5775,0x5758,0x573b
- .word 0x571e,0x5702,0x56e5,0x56c8,0x56ac,0x568f,0x5672,0x5656
- .word 0x5639,0x561c,0x5600,0x55e3,0x55c7,0x55aa,0x558e,0x5571
- .word 0x5555,0x5538,0x551c,0x5500,0x54e3,0x54c7,0x54aa,0x548e
- .word 0x5472,0x5456,0x5439,0x541d,0x5401,0x53e5,0x53c8,0x53ac
- .word 0x5390,0x5374,0x5358,0x533c,0x5320,0x5304,0x52e8,0x52cb
- .word 0x52af,0x5293,0x5277,0x525c,0x5240,0x5224,0x5208,0x51ec
- .word 0x51d0,0x51b4,0x5198,0x517c,0x5161,0x5145,0x5129,0x510d
- .word 0x50f2,0x50d6,0x50ba,0x509f,0x5083,0x5067,0x504c,0x5030
- .word 0x5015,0x4ff9,0x4fdd,0x4fc2,0x4fa6,0x4f8b,0x4f6f,0x4f54
- .word 0x4f38,0x4f1d,0x4f02,0x4ee6,0x4ecb,0x4eb0,0x4e94,0x4e79
- .word 0x4e5e,0x4e42,0x4e27,0x4e0c,0x4df0,0x4dd5,0x4dba,0x4d9f
- .word 0x4d84,0x4d69,0x4d4d,0x4d32,0x4d17,0x4cfc,0x4ce1,0x4cc6
- .word 0x4cab,0x4c90,0x4c75,0x4c5a,0x4c3f,0x4c24,0x4c09,0x4bee
- .word 0x4bd3,0x4bb9,0x4b9e,0x4b83,0x4b68,0x4b4d,0x4b32,0x4b18
- .word 0x4afd,0x4ae2,0x4ac7,0x4aad,0x4a92,0x4a77,0x4a5d,0x4a42
- .word 0x4a27,0x4a0d,0x49f2,0x49d8,0x49bd,0x49a3,0x4988,0x496e
- .word 0x4953,0x4939,0x491e,0x4904,0x48e9,0x48cf,0x48b5,0x489a
- .word 0x4880,0x4865,0x484b,0x4831,0x4817,0x47fc,0x47e2,0x47c8
- .word 0x47ae,0x4793,0x4779,0x475f,0x4745,0x472b,0x4711,0x46f6
- .word 0x46dc,0x46c2,0x46a8,0x468e,0x4674,0x465a,0x4640,0x4626
- .word 0x460c,0x45f2,0x45d8,0x45be,0x45a5,0x458b,0x4571,0x4557
- .word 0x453d,0x4523,0x4509,0x44f0,0x44d6,0x44bc,0x44a2,0x4489
- .word 0x446f,0x4455,0x443c,0x4422,0x4408,0x43ef,0x43d5,0x43bc
- .word 0x43a2,0x4388,0x436f,0x4355,0x433c,0x4322,0x4309,0x42ef
- .word 0x42d6,0x42bc,0x42a3,0x428a,0x4270,0x4257,0x423d,0x4224
- .word 0x420b,0x41f2,0x41d8,0x41bf,0x41a6,0x418c,0x4173,0x415a
- .word 0x4141,0x4128,0x410e,0x40f5,0x40dc,0x40c3,0x40aa,0x4091
- .word 0x4078,0x405f,0x4046,0x402d,0x4014,0x3ffb,0x3fe2,0x3fc9
- .word 0x3fb0,0x3f97,0x3f7e,0x3f65,0x3f4c,0x3f33,0x3f1a,0x3f01
- .word 0x3ee8,0x3ed0,0x3eb7,0x3e9e,0x3e85,0x3e6c,0x3e54,0x3e3b
- .word 0x3e22,0x3e0a,0x3df1,0x3dd8,0x3dc0,0x3da7,0x3d8e,0x3d76
- .word 0x3d5d,0x3d45,0x3d2c,0x3d13,0x3cfb,0x3ce2,0x3cca,0x3cb1
- .word 0x3c99,0x3c80,0x3c68,0x3c50,0x3c37,0x3c1f,0x3c06,0x3bee
- .word 0x3bd6,0x3bbd,0x3ba5,0x3b8d,0x3b74,0x3b5c,0x3b44,0x3b2b
- .word 0x3b13,0x3afb,0x3ae3,0x3acb,0x3ab2,0x3a9a,0x3a82,0x3a6a
- .word 0x3a52,0x3a3a,0x3a22,0x3a09,0x39f1,0x39d9,0x39c1,0x39a9
- .word 0x3991,0x3979,0x3961,0x3949,0x3931,0x3919,0x3901,0x38ea
- .word 0x38d2,0x38ba,0x38a2,0x388a,0x3872,0x385a,0x3843,0x382b
- .word 0x3813,0x37fb,0x37e3,0x37cc,0x37b4,0x379c,0x3785,0x376d
- .word 0x3755,0x373e,0x3726,0x370e,0x36f7,0x36df,0x36c8,0x36b0
- .word 0x3698,0x3681,0x3669,0x3652,0x363a,0x3623,0x360b,0x35f4
- .word 0x35dc,0x35c5,0x35ae,0x3596,0x357f,0x3567,0x3550,0x3539
- .word 0x3521,0x350a,0x34f3,0x34db,0x34c4,0x34ad,0x3496,0x347e
- .word 0x3467,0x3450,0x3439,0x3422,0x340a,0x33f3,0x33dc,0x33c5
- .word 0x33ae,0x3397,0x3380,0x3368,0x3351,0x333a,0x3323,0x330c
- .word 0x32f5,0x32de,0x32c7,0x32b0,0x3299,0x3282,0x326c,0x3255
- .word 0x323e,0x3227,0x3210,0x31f9,0x31e2,0x31cb,0x31b5,0x319e
- .word 0x3187,0x3170,0x3159,0x3143,0x312c,0x3115,0x30fe,0x30e8
- .word 0x30d1,0x30ba,0x30a4,0x308d,0x3076,0x3060,0x3049,0x3033
- .word 0x301c,0x3005,0x2fef,0x2fd8,0x2fc2,0x2fab,0x2f95,0x2f7e
- .word 0x2f68,0x2f51,0x2f3b,0x2f24,0x2f0e,0x2ef8,0x2ee1,0x2ecb
- .word 0x2eb4,0x2e9e,0x2e88,0x2e71,0x2e5b,0x2e45,0x2e2e,0x2e18
- .word 0x2e02,0x2dec,0x2dd5,0x2dbf,0x2da9,0x2d93,0x2d7c,0x2d66
- .word 0x2d50,0x2d3a,0x2d24,0x2d0e,0x2cf8,0x2ce1,0x2ccb,0x2cb5
- .word 0x2c9f,0x2c89,0x2c73,0x2c5d,0x2c47,0x2c31,0x2c1b,0x2c05
- .word 0x2bef,0x2bd9,0x2bc3,0x2bad,0x2b97,0x2b81,0x2b6c,0x2b56
- .word 0x2b40,0x2b2a,0x2b14,0x2afe,0x2ae8,0x2ad3,0x2abd,0x2aa7
- .word 0x2a91,0x2a7c,0x2a66,0x2a50,0x2a3a,0x2a25,0x2a0f,0x29f9
- .word 0x29e4,0x29ce,0x29b8,0x29a3,0x298d,0x2977,0x2962,0x294c
- .word 0x2937,0x2921,0x290c,0x28f6,0x28e0,0x28cb,0x28b5,0x28a0
- .word 0x288b,0x2875,0x2860,0x284a,0x2835,0x281f,0x280a,0x27f5
- .word 0x27df,0x27ca,0x27b4,0x279f,0x278a,0x2774,0x275f,0x274a
- .word 0x2735,0x271f,0x270a,0x26f5,0x26e0,0x26ca,0x26b5,0x26a0
- .word 0x268b,0x2676,0x2660,0x264b,0x2636,0x2621,0x260c,0x25f7
- .word 0x25e2,0x25cd,0x25b8,0x25a2,0x258d,0x2578,0x2563,0x254e
- .word 0x2539,0x2524,0x250f,0x24fa,0x24e5,0x24d1,0x24bc,0x24a7
- .word 0x2492,0x247d,0x2468,0x2453,0x243e,0x2429,0x2415,0x2400
- .word 0x23eb,0x23d6,0x23c1,0x23ad,0x2398,0x2383,0x236e,0x235a
- .word 0x2345,0x2330,0x231c,0x2307,0x22f2,0x22dd,0x22c9,0x22b4
- .word 0x22a0,0x228b,0x2276,0x2262,0x224d,0x2239,0x2224,0x2210
- .word 0x21fb,0x21e6,0x21d2,0x21bd,0x21a9,0x2194,0x2180,0x216c
- .word 0x2157,0x2143,0x212e,0x211a,0x2105,0x20f1,0x20dd,0x20c8
- .word 0x20b4,0x20a0,0x208b,0x2077,0x2063,0x204e,0x203a,0x2026
- .word 0x2012,0x1ffd,0x1fe9,0x1fd5,0x1fc1,0x1fac,0x1f98,0x1f84
- .word 0x1f70,0x1f5c,0x1f47,0x1f33,0x1f1f,0x1f0b,0x1ef7,0x1ee3
- .word 0x1ecf,0x1ebb,0x1ea7,0x1e93,0x1e7f,0x1e6a,0x1e56,0x1e42
- .word 0x1e2e,0x1e1a,0x1e06,0x1df3,0x1ddf,0x1dcb,0x1db7,0x1da3
- .word 0x1d8f,0x1d7b,0x1d67,0x1d53,0x1d3f,0x1d2b,0x1d18,0x1d04
- .word 0x1cf0,0x1cdc,0x1cc8,0x1cb5,0x1ca1,0x1c8d,0x1c79,0x1c65
- .word 0x1c52,0x1c3e,0x1c2a,0x1c17,0x1c03,0x1bef,0x1bdb,0x1bc8
- .word 0x1bb4,0x1ba0,0x1b8d,0x1b79,0x1b66,0x1b52,0x1b3e,0x1b2b
- .word 0x1b17,0x1b04,0x1af0,0x1add,0x1ac9,0x1ab6,0x1aa2,0x1a8f
- .word 0x1a7b,0x1a68,0x1a54,0x1a41,0x1a2d,0x1a1a,0x1a06,0x19f3
- .word 0x19e0,0x19cc,0x19b9,0x19a5,0x1992,0x197f,0x196b,0x1958
- .word 0x1945,0x1931,0x191e,0x190b,0x18f8,0x18e4,0x18d1,0x18be
- .word 0x18ab,0x1897,0x1884,0x1871,0x185e,0x184b,0x1837,0x1824
- .word 0x1811,0x17fe,0x17eb,0x17d8,0x17c4,0x17b1,0x179e,0x178b
- .word 0x1778,0x1765,0x1752,0x173f,0x172c,0x1719,0x1706,0x16f3
- .word 0x16e0,0x16cd,0x16ba,0x16a7,0x1694,0x1681,0x166e,0x165b
- .word 0x1648,0x1635,0x1623,0x1610,0x15fd,0x15ea,0x15d7,0x15c4
- .word 0x15b1,0x159f,0x158c,0x1579,0x1566,0x1553,0x1541,0x152e
- .word 0x151b,0x1508,0x14f6,0x14e3,0x14d0,0x14bd,0x14ab,0x1498
- .word 0x1485,0x1473,0x1460,0x144d,0x143b,0x1428,0x1416,0x1403
- .word 0x13f0,0x13de,0x13cb,0x13b9,0x13a6,0x1394,0x1381,0x136f
- .word 0x135c,0x1349,0x1337,0x1325,0x1312,0x1300,0x12ed,0x12db
- .word 0x12c8,0x12b6,0x12a3,0x1291,0x127f,0x126c,0x125a,0x1247
- .word 0x1235,0x1223,0x1210,0x11fe,0x11ec,0x11d9,0x11c7,0x11b5
- .word 0x11a3,0x1190,0x117e,0x116c,0x1159,0x1147,0x1135,0x1123
- .word 0x1111,0x10fe,0x10ec,0x10da,0x10c8,0x10b6,0x10a4,0x1091
- .word 0x107f,0x106d,0x105b,0x1049,0x1037,0x1025,0x1013,0x1001
- .word 0x0fef,0x0fdc,0x0fca,0x0fb8,0x0fa6,0x0f94,0x0f82,0x0f70
- .word 0x0f5e,0x0f4c,0x0f3a,0x0f28,0x0f17,0x0f05,0x0ef3,0x0ee1
- .word 0x0ecf,0x0ebd,0x0eab,0x0e99,0x0e87,0x0e75,0x0e64,0x0e52
- .word 0x0e40,0x0e2e,0x0e1c,0x0e0a,0x0df9,0x0de7,0x0dd5,0x0dc3
- .word 0x0db2,0x0da0,0x0d8e,0x0d7c,0x0d6b,0x0d59,0x0d47,0x0d35
- .word 0x0d24,0x0d12,0x0d00,0x0cef,0x0cdd,0x0ccb,0x0cba,0x0ca8
- .word 0x0c97,0x0c85,0x0c73,0x0c62,0x0c50,0x0c3f,0x0c2d,0x0c1c
- .word 0x0c0a,0x0bf8,0x0be7,0x0bd5,0x0bc4,0x0bb2,0x0ba1,0x0b8f
- .word 0x0b7e,0x0b6c,0x0b5b,0x0b4a,0x0b38,0x0b27,0x0b15,0x0b04
- .word 0x0af2,0x0ae1,0x0ad0,0x0abe,0x0aad,0x0a9c,0x0a8a,0x0a79
- .word 0x0a68,0x0a56,0x0a45,0x0a34,0x0a22,0x0a11,0x0a00,0x09ee
- .word 0x09dd,0x09cc,0x09bb,0x09a9,0x0998,0x0987,0x0976,0x0965
- .word 0x0953,0x0942,0x0931,0x0920,0x090f,0x08fe,0x08ec,0x08db
- .word 0x08ca,0x08b9,0x08a8,0x0897,0x0886,0x0875,0x0864,0x0853
- .word 0x0842,0x0831,0x081f,0x080e,0x07fd,0x07ec,0x07db,0x07ca
- .word 0x07b9,0x07a8,0x0798,0x0787,0x0776,0x0765,0x0754,0x0743
- .word 0x0732,0x0721,0x0710,0x06ff,0x06ee,0x06dd,0x06cd,0x06bc
- .word 0x06ab,0x069a,0x0689,0x0678,0x0668,0x0657,0x0646,0x0635
- .word 0x0624,0x0614,0x0603,0x05f2,0x05e1,0x05d1,0x05c0,0x05af
- .word 0x059e,0x058e,0x057d,0x056c,0x055c,0x054b,0x053a,0x052a
- .word 0x0519,0x0508,0x04f8,0x04e7,0x04d6,0x04c6,0x04b5,0x04a5
- .word 0x0494,0x0484,0x0473,0x0462,0x0452,0x0441,0x0431,0x0420
- .word 0x0410,0x03ff,0x03ef,0x03de,0x03ce,0x03bd,0x03ad,0x039c
- .word 0x038c,0x037b,0x036b,0x035b,0x034a,0x033a,0x0329,0x0319
- .word 0x0309,0x02f8,0x02e8,0x02d7,0x02c7,0x02b7,0x02a6,0x0296
- .word 0x0286,0x0275,0x0265,0x0255,0x0245,0x0234,0x0224,0x0214
- .word 0x0204,0x01f3,0x01e3,0x01d3,0x01c3,0x01b2,0x01a2,0x0192
- .word 0x0182,0x0172,0x0161,0x0151,0x0141,0x0131,0x0121,0x0111
- .word 0x0101,0x00f0,0x00e0,0x00d0,0x00c0,0x00b0,0x00a0,0x0090
- .word 0x0080,0x0070,0x0060,0x0050,0x0040,0x0030,0x0020,0x0010
+ LEA( r2, approx_tab)
+ srl r16, 54, r1
+ srl r16, 24, r4
+ and r16, 1, r5
+ bic r1, 1, r7
+ lda r4, 1(r4)
+ srl r16, 1, r3
+ifdef(`BWX',`
+ addq r7, r2, r1
+ ldwu r0, -512(r1)
+',`
+ addq r1, r2, r1
+ ldq_u r0, -512(r1)
+ extwl r0, r7, r0
+')
+ addq r3, r5, r3
+ mull r0, r0, r1
+ sll r0, 11, r0
+ mulq r1, r4, r1
+ srl r1, 40, r1
+ subq r0, r1, r0
+ lda r0, -1(r0)
+ mulq r0, r0, r2
+ sll r0, 60, r1
+ sll r0, 13, r0
+ mulq r2, r4, r2
+ subq r1, r2, r1
+ srl r1, 47, r1
+ addq r0, r1, r0
+ mulq r0, r3, r3
+ srl r0, 1, r1
+ cmoveq r5, 0, r1
+ subq r1, r3, r1
+ umulh r1, r0, r3
+ sll r0, 31, r0
+ srl r3, 1, r1
+ addq r0, r1, r0
+ mulq r0, r16, r2
+ umulh r0, r16, r3
+ addq r2, r16, r1
+ addq r3, r16, r16
+ cmpult r1, r2, r1
+ addq r16, r1, r3
+ subq r0, r3, r0
+ ret r31, (r26), 1
+EPILOGUE()
+DATASTART(approx_tab)
+ .word 0x7fd,0x7f5,0x7ed,0x7e5,0x7dd,0x7d5,0x7ce,0x7c6
+ .word 0x7bf,0x7b7,0x7b0,0x7a8,0x7a1,0x79a,0x792,0x78b
+ .word 0x784,0x77d,0x776,0x76f,0x768,0x761,0x75b,0x754
+ .word 0x74d,0x747,0x740,0x739,0x733,0x72c,0x726,0x720
+ .word 0x719,0x713,0x70d,0x707,0x700,0x6fa,0x6f4,0x6ee
+ .word 0x6e8,0x6e2,0x6dc,0x6d6,0x6d1,0x6cb,0x6c5,0x6bf
+ .word 0x6ba,0x6b4,0x6ae,0x6a9,0x6a3,0x69e,0x698,0x693
+ .word 0x68d,0x688,0x683,0x67d,0x678,0x673,0x66e,0x669
+ .word 0x664,0x65e,0x659,0x654,0x64f,0x64a,0x645,0x640
+ .word 0x63c,0x637,0x632,0x62d,0x628,0x624,0x61f,0x61a
+ .word 0x616,0x611,0x60c,0x608,0x603,0x5ff,0x5fa,0x5f6
+ .word 0x5f1,0x5ed,0x5e9,0x5e4,0x5e0,0x5dc,0x5d7,0x5d3
+ .word 0x5cf,0x5cb,0x5c6,0x5c2,0x5be,0x5ba,0x5b6,0x5b2
+ .word 0x5ae,0x5aa,0x5a6,0x5a2,0x59e,0x59a,0x596,0x592
+ .word 0x58e,0x58a,0x586,0x583,0x57f,0x57b,0x577,0x574
+ .word 0x570,0x56c,0x568,0x565,0x561,0x55e,0x55a,0x556
+ .word 0x553,0x54f,0x54c,0x548,0x545,0x541,0x53e,0x53a
+ .word 0x537,0x534,0x530,0x52d,0x52a,0x526,0x523,0x520
+ .word 0x51c,0x519,0x516,0x513,0x50f,0x50c,0x509,0x506
+ .word 0x503,0x500,0x4fc,0x4f9,0x4f6,0x4f3,0x4f0,0x4ed
+ .word 0x4ea,0x4e7,0x4e4,0x4e1,0x4de,0x4db,0x4d8,0x4d5
+ .word 0x4d2,0x4cf,0x4cc,0x4ca,0x4c7,0x4c4,0x4c1,0x4be
+ .word 0x4bb,0x4b9,0x4b6,0x4b3,0x4b0,0x4ad,0x4ab,0x4a8
+ .word 0x4a5,0x4a3,0x4a0,0x49d,0x49b,0x498,0x495,0x493
+ .word 0x490,0x48d,0x48b,0x488,0x486,0x483,0x481,0x47e
+ .word 0x47c,0x479,0x477,0x474,0x472,0x46f,0x46d,0x46a
+ .word 0x468,0x465,0x463,0x461,0x45e,0x45c,0x459,0x457
+ .word 0x455,0x452,0x450,0x44e,0x44b,0x449,0x447,0x444
+ .word 0x442,0x440,0x43e,0x43b,0x439,0x437,0x435,0x432
+ .word 0x430,0x42e,0x42c,0x42a,0x428,0x425,0x423,0x421
+ .word 0x41f,0x41d,0x41b,0x419,0x417,0x414,0x412,0x410
+ .word 0x40e,0x40c,0x40a,0x408,0x406,0x404,0x402,0x400
DATAEND()
ASM_END()
dnl Alpha mpn_sub_n -- Subtract two limb vectors of the same length > 0
dnl and store difference in a third limb vector.
-dnl Copyright 1995, 1999, 2000, 2005 Free Software Foundation, Inc.
+dnl Copyright 1995, 1999, 2000, 2005, 2011 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl size r19
ASM_START()
+PROLOGUE(mpn_sub_nc)
+ bis r31,r20,r25
+ br L(com)
+EPILOGUE()
PROLOGUE(mpn_sub_n)
bis r31,r31,r25 C clear cy
- subq r19,4,r19 C decr loop cnt
+L(com): subq r19,4,r19 C decr loop cnt
blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop
C Start software pipeline for 1st loop
ldq r0,0(r18)
ldq r1,8(r18)
ldq r5,8(r17)
addq r17,32,r17 C update s1_ptr
+ subq r4,r0,r28 C 1st main subtract
ldq r2,16(r18)
- subq r4,r0,r20 C 1st main subtract
+ subq r28,r25,r20 C 1st carry subtract
ldq r3,24(r18)
- subq r19,4,r19 C decr loop cnt
+ cmpult r4,r0,r8 C compute cy from last subtract
ldq r6,-16(r17)
- cmpult r4,r0,r25 C compute cy from last subtract
+ cmpult r28,r25,r25 C compute cy from last subtract
ldq r7,-8(r17)
+ bis r8,r25,r25 C combine cy from the two subtracts
+ subq r19,4,r19 C decr loop cnt
subq r5,r1,r28 C 2nd main subtract
addq r18,32,r18 C update s2_ptr
subq r28,r25,r21 C 2nd carry subtract
$Lret: bis r25,r31,r0 C return cy
ret r31,(r26),1
-EPILOGUE(mpn_sub_n)
+EPILOGUE()
ASM_END()
-Copyright 2002 Free Software Foundation, Inc.
+Copyright 2002, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
-This directory contains mpn functions for ARM processors.
-It has been optimized for StrongARM.
-
-TODO
-
-Write mpn_addmul_2. The speed of mpn_addmul_1 is 9.75 c/l;
-mpn_addmul_2 could run at 8 c/l. mpn_addmul_N could
-approach 6 c/l, but register shortage will make this hard.
-
-Perhaps nails is the way to go even for an embedded processor like
-this, since the umlal accumulation could be used very effectively in
-that case. with just 2 nail bits, we should get close to 5 c/l for a
-mpn_addmul_N or mpn_mul_basecase.
+This directory contains mpn functions for ARM processors. It has been
+optimised for Cortex-A9, but the code in the top-level directory should run
+on all ARM processors at architecture level v4 or later.
+++ /dev/null
-dnl ARM mpn_add_n -- Add two limb vectors of the same length > 0 and store sum
-dnl in a third limb vector.
-dnl Contributed by Robert Harley.
-
-dnl Copyright 1997, 2000, 2001 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C This code runs at 5 cycles/limb.
-
-define(`rp',`r0')
-define(`up',`r1')
-define(`vp',`r2')
-define(`n',`r3')
-
-
-ASM_START()
-PROLOGUE(mpn_add_n)
- stmfd sp!, { r8, r9, lr }
- movs n, n, lsr #1
- bcc L(skip1)
- ldr r12, [up], #4
- ldr lr, [vp], #4
- adds r12, r12, lr
- str r12, [rp], #4
-L(skip1):
- tst n, #1
- beq L(skip2)
- ldmia up!, { r8, r9 }
- ldmia vp!, { r12, lr }
- adcs r8, r8, r12
- adcs r9, r9, lr
- stmia rp!, { r8, r9 }
-L(skip2):
- bics n, n, #1
- beq L(return)
- stmfd sp!, { r4, r5, r6, r7 }
-L(add_n_loop):
- ldmia up!, { r4, r5, r6, r7 }
- ldmia vp!, { r8, r9, r12, lr }
- adcs r4, r4, r8
- ldr r8, [rp, #12] C cache allocate
- adcs r5, r5, r9
- adcs r6, r6, r12
- adcs r7, r7, lr
- stmia rp!, { r4, r5, r6, r7 }
- sub n, n, #2
- teq n, #0
- bne L(add_n_loop)
- ldmfd sp!, { r4, r5, r6, r7 }
-L(return):
- adc r0, n, #0
- ldmfd sp!, { r8, r9, pc }
-EPILOGUE(mpn_add_n)
+++ /dev/null
-dnl ARM mpn_addmul_1 -- Multiply a limb vector with a limb and add the result
-dnl to a second limb vector.
-
-dnl Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C StrongARM: 7.75-9.75 (dependent on vl value)
-C XScale: 8-9 (dependent on vl value, estimated)
-
-define(`rp',`r0')
-define(`up',`r1')
-define(`n',`r2')
-define(`vl',`r3')
-define(`rl',`r12')
-define(`ul',`r6')
-define(`r',`lr')
-
-
-ASM_START()
-PROLOGUE(mpn_addmul_1)
- stmfd sp!, { r4-r6, lr }
- mov r4, #0 C clear r4
- adds r0, r0, #0 C clear cy
- tst n, #1
- beq L(skip1)
- ldr ul, [up], #4
- ldr rl, [rp, #0]
- umull r5, r4, ul, vl
- adds r, rl, r5
- str r, [rp], #4
-L(skip1):
- tst n, #2
- beq L(skip2)
- ldr ul, [up], #4
- ldr rl, [rp, #0]
- mov r5, #0
- umlal r4, r5, ul, vl
- ldr ul, [up], #4
- adcs r, rl, r4
- ldr rl, [rp, #4]
- mov r4, #0
- umlal r5, r4, ul, vl
- str r, [rp], #4
- adcs r, rl, r5
- str r, [rp], #4
-L(skip2):
- bics r, n, #3
- beq L(return)
-
- ldr ul, [up], #4
- ldr rl, [rp, #0]
- mov r5, #0
- umlal r4, r5, ul, vl
- b L(in)
-
-L(loop):
- ldr ul, [up], #4
- adcs r, rl, r5
- ldr rl, [rp, #4]
- mov r5, #0
- umlal r4, r5, ul, vl
- str r, [rp], #4
-L(in): ldr ul, [up], #4
- adcs r, rl, r4
- ldr rl, [rp, #4]
- mov r4, #0
- umlal r5, r4, ul, vl
- str r, [rp], #4
- ldr ul, [up], #4
- adcs r, rl, r5
- ldr rl, [rp, #4]
- mov r5, #0
- umlal r4, r5, ul, vl
- str r, [rp], #4
- ldr ul, [up], #4
- adcs r, rl, r4
- ldr rl, [rp, #4]
- mov r4, #0
- umlal r5, r4, ul, vl
- str r, [rp], #4
- sub n, n, #4
- bics r, n, #3
- bne L(loop)
-
- adcs r, rl, r5
- str r, [rp], #4
-L(return):
- adc r0, r4, #0
- ldmfd sp!, { r4-r6, pc }
-EPILOGUE(mpn_addmul_1)
--- /dev/null
+dnl ARM mpn_add_n and mpn_sub_n
+
+dnl Contributed to the GNU project by Robert Harley.
+
+dnl Copyright 1997, 2000, 2001, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM ?
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 2.5 slightly fluctuating
+C Cortex-A15 2.25
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n', `r3')
+
+ifdef(`OPERATION_add_n', `
+ define(`ADDSUB', adds)
+ define(`ADDSUBC', adcs)
+ define(`CLRCY', `cmn r0, #0')
+ define(`SETCY', `cmp $1, #1')
+ define(`RETVAL', `adc r0, n, #0')
+ define(`func', mpn_add_n)
+ define(`func_nc', mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+ define(`ADDSUB', subs)
+ define(`ADDSUBC', sbcs)
+ define(`CLRCY', `cmp r0, r0')
+ define(`SETCY', `rsbs $1, $1, #0')
+ define(`RETVAL', `sbc r0, r0, r0
+ and r0, r0, #1')
+ define(`func', mpn_sub_n)
+ define(`func_nc', mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+PROLOGUE(func_nc)
+ ldr r12, [sp, #0]
+ stmfd sp!, { r8, r9, lr }
+ SETCY( r12)
+ b L(ent)
+EPILOGUE()
+PROLOGUE(func)
+ stmfd sp!, { r8, r9, lr }
+ CLRCY( r12)
+L(ent): tst n, #1
+ beq L(skip1)
+ ldr r12, [up], #4
+ ldr lr, [vp], #4
+ ADDSUBC r12, r12, lr
+ str r12, [rp], #4
+L(skip1):
+ tst n, #2
+ beq L(skip2)
+ ldmia up!, { r8, r9 }
+ ldmia vp!, { r12, lr }
+ ADDSUBC r8, r8, r12
+ ADDSUBC r9, r9, lr
+ stmia rp!, { r8, r9 }
+L(skip2):
+ bics n, n, #3
+ beq L(rtn)
+ stmfd sp!, { r4, r5, r6, r7 }
+
+L(top): ldmia up!, { r4, r5, r6, r7 }
+ ldmia vp!, { r8, r9, r12, lr }
+ ADDSUBC r4, r4, r8
+ sub n, n, #4
+ ADDSUBC r5, r5, r9
+ ADDSUBC r6, r6, r12
+ ADDSUBC r7, r7, lr
+ stmia rp!, { r4, r5, r6, r7 }
+ teq n, #0
+ bne L(top)
+
+ ldmfd sp!, { r4, r5, r6, r7 }
+
+L(rtn): RETVAL
+ ldmfd sp!, { r8, r9, pc }
+EPILOGUE()
--- /dev/null
+dnl ARM mpn_addcnd_n, mpn_subcnd_n
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM ?
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 2.5 slightly fluctuating
+C Cortex-A15 ?
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n', `r3')
+
+define(`cnd', `r12')
+
+ifdef(`OPERATION_addcnd_n', `
+ define(`ADDSUB', adds)
+ define(`ADDSUBC', adcs)
+ define(`INITCY', `cmn r0, #0')
+ define(`RETVAL', `adc r0, n, #0')
+ define(func, mpn_addcnd_n)')
+ifdef(`OPERATION_subcnd_n', `
+ define(`ADDSUB', subs)
+ define(`ADDSUBC', sbcs)
+ define(`INITCY', `cmp r0, #0')
+ define(`RETVAL', `adc r0, n, #0
+ rsb r0, r0, #1')
+ define(func, mpn_subcnd_n)')
+
+MULFUNC_PROLOGUE(mpn_addcnd_n mpn_subcnd_n)
+
+ASM_START()
+PROLOGUE(func)
+ push {r4-r11}
+ ldr cnd, [sp, #32]
+
+ INITCY C really only needed for n = 0 (mod 4)
+
+ teq cnd, #0 C could use this for clearing/setting cy
+ mvnne cnd, #0 C conditionally set to 0xffffffff
+
+ ands r4, n, #3
+ beq L(top)
+ cmp r4, #2
+ bcc L(b1)
+ beq L(b2)
+
+L(b3): ldm vp!, {r4,r5,r6}
+ ldm up!, {r8,r9,r10}
+ and r4, r4, cnd
+ and r5, r5, cnd
+ and r6, r6, cnd
+ ADDSUB r8, r8, r4
+ ADDSUBC r9, r9, r5
+ ADDSUBC r10, r10, r6
+ stm rp!, {r8,r9,r10}
+ sub n, n, #3
+ teq n, #0
+ bne L(top)
+ b L(end)
+
+L(b2): ldm vp!, {r4,r5}
+ ldm up!, {r8,r9}
+ and r4, r4, cnd
+ and r5, r5, cnd
+ ADDSUB r8, r8, r4
+ ADDSUBC r9, r9, r5
+ stm rp!, {r8,r9}
+ sub n, n, #2
+ teq n, #0
+ bne L(top)
+ b L(end)
+
+L(b1): ldr r4, [vp], #4
+ ldr r8, [up], #4
+ and r4, r4, cnd
+ ADDSUB r8, r8, r4
+ str r8, [rp], #4
+ sub n, n, #1
+ teq n, #0
+ beq L(end)
+
+L(top): ldm vp!, {r4,r5,r6,r7}
+ ldm up!, {r8,r9,r10,r11}
+ and r4, r4, cnd
+ and r5, r5, cnd
+ and r6, r6, cnd
+ and r7, r7, cnd
+ ADDSUBC r8, r8, r4
+ ADDSUBC r9, r9, r5
+ ADDSUBC r10, r10, r6
+ ADDSUBC r11, r11, r7
+ sub n, n, #4
+ stm rp!, {r8,r9,r10,r11}
+ teq n, #0
+ bne L(top)
+
+L(end): RETVAL
+ pop {r4-r11}
+ bx r14
+EPILOGUE()
--- /dev/null
+dnl ARM mpn_addlsh1_n and mpn_sublsh1_n
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C addlsh1_n sublsh1_n
+C cycles/limb cycles/limb
+C StrongARM ? ?
+C XScale ? ?
+C Cortex-A8 ? ?
+C Cortex-A9 3.12 3.7
+C Cortex-A15 ? ?
+
+C TODO
+C * The addlsh1_n code runs well, but is only barely faster than mpn_addmul_1.
+C The sublsh1_n code could surely be tweaked, its REVCY slows down things
+C very much. If two insns are really needed, it might help to separate them
+C for better micro-parallelism.
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n', `r3')
+
+ifdef(`OPERATION_addlsh1_n', `
+ define(`ADDSUB', adds)
+ define(`ADDSUBC', adcs)
+ define(`SETCY', `cmp $1, #1')
+ define(`RETVAL', `adc r0, $1, #2')
+ define(`SAVECY', `sbc $1, $2, #0')
+ define(`RESTCY', `cmn $1, #1')
+ define(`REVCY', `')
+ define(`INICYR', `mov $1, #0')
+ define(`r10r11', `r11')
+ define(`func', mpn_addlsh1_n)
+ define(`func_nc', mpn_addlsh1_nc)')
+ifdef(`OPERATION_sublsh1_n', `
+ define(`ADDSUB', subs)
+ define(`ADDSUBC', sbcs)
+ define(`SETCY', `rsbs $1, $1, #0')
+ define(`RETVAL', `adc r0, $1, #1')
+ define(`SAVECY', `sbc $1, $1, $1')
+ define(`RESTCY', `cmn $1, #1')
+ define(`REVCY', `sbc $1, $1, $1
+ cmn $1, #1')
+ define(`INICYR', `mvn $1, #0')
+ define(`r10r11', `r10')
+ define(`func', mpn_sublsh1_n)
+ define(`func_nc', mpn_sublsh1_nc)')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
+
+ASM_START()
+PROLOGUE(func)
+ push {r4-r10r11, r14}
+
+ifdef(`OPERATION_addlsh1_n', `
+ mvn r11, #0
+')
+ INICYR( r14)
+ subs n, n, #3
+ blt L(le2) C carry clear on branch path
+
+ cmn r0, #0 C clear carry
+ ldmia vp!, {r8, r9, r10}
+ b L(mid)
+
+L(top): RESTCY( r14)
+ ADDSUBC r4, r4, r8
+ ADDSUBC r5, r5, r9
+ ADDSUBC r6, r6, r10
+ ldmia vp!, {r8, r9, r10}
+ stmia rp!, {r4, r5, r6}
+ REVCY(r14)
+ adcs r8, r8, r8
+ adcs r9, r9, r9
+ adcs r10, r10, r10
+ ldmia up!, {r4, r5, r6}
+ SAVECY( r14, r11)
+ subs n, n, #3
+ blt L(exi)
+ RESTCY( r12)
+ ADDSUBC r4, r4, r8
+ ADDSUBC r5, r5, r9
+ ADDSUBC r6, r6, r10
+ ldmia vp!, {r8, r9, r10}
+ stmia rp!, {r4, r5, r6}
+ REVCY(r12)
+L(mid): adcs r8, r8, r8
+ adcs r9, r9, r9
+ adcs r10, r10, r10
+ ldmia up!, {r4, r5, r6}
+ SAVECY( r12, r11)
+ subs n, n, #3
+ bge L(top)
+
+ mov r7, r12 C swap alternating...
+ mov r12, r14 C ...carry-save...
+ mov r14, r7 C ...registers
+
+L(exi): RESTCY( r12)
+ ADDSUBC r4, r4, r8
+ ADDSUBC r5, r5, r9
+ ADDSUBC r6, r6, r10
+ stmia rp!, {r4, r5, r6}
+
+ REVCY(r12)
+L(le2): tst n, #1 C n = {-1,-2,-3} map to [2], [1], [0]
+ beq L(e1)
+
+L(e02): tst n, #2
+ beq L(rt0)
+ ldm vp, {r8, r9}
+ adcs r8, r8, r8
+ adcs r9, r9, r9
+ ldm up, {r4, r5}
+ SAVECY( r12, r11)
+ RESTCY( r14)
+ ADDSUBC r4, r4, r8
+ ADDSUBC r5, r5, r9
+ stm rp, {r4, r5}
+ b L(rt1)
+
+L(e1): ldr r8, [vp]
+ adcs r8, r8, r8
+ ldr r4, [up]
+ SAVECY( r12, r11)
+ RESTCY( r14)
+ ADDSUBC r4, r4, r8
+ str r4, [rp]
+
+L(rt1): mov r14, r12
+ REVCY(r12)
+L(rt0): RETVAL( r14)
+ pop {r4-r10r11, r14}
+ bx r14
+EPILOGUE()
--- /dev/null
+dnl ARM mpn_addmul_1 and mpn_submul_1.
+
+dnl Copyright 1998, 2000, 2001, 2003, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM: ?
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 5.25
+C Cortex-A15 4
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n', `r2')
+define(`vl', `r3')
+define(`rl', `r12')
+define(`ul', `r6')
+define(`r', `lr')
+
+ifdef(`OPERATION_addmul_1', `
+ define(`ADDSUB', adds)
+ define(`ADDSUBC', adcs)
+ define(`CLRRCY', `mov $1, #0
+ adds r0, r0, #0')
+ define(`RETVAL', `adc r0, r4, #0')
+ define(`func', mpn_addmul_1)')
+ifdef(`OPERATION_submul_1', `
+ define(`ADDSUB', subs)
+ define(`ADDSUBC', sbcs)
+ define(`CLRRCY', `subs $1, r0, r0')
+ define(`RETVAL', `sbc r0, r0, r0
+ sub r0, $1, r0')
+ define(`func', mpn_submul_1)')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+
+ASM_START()
+PROLOGUE(func)
+ stmfd sp!, { r4-r6, lr }
+ CLRRCY( r4)
+ tst n, #1
+ beq L(skip1)
+ ldr ul, [up], #4
+ ldr rl, [rp, #0]
+ umull r5, r4, ul, vl
+ ADDSUB r, rl, r5
+ str r, [rp], #4
+L(skip1):
+ tst n, #2
+ beq L(skip2)
+ ldr ul, [up], #4
+ ldr rl, [rp, #0]
+ mov r5, #0
+ umlal r4, r5, ul, vl
+ ldr ul, [up], #4
+ ADDSUBC r, rl, r4
+ ldr rl, [rp, #4]
+ mov r4, #0
+ umlal r5, r4, ul, vl
+ str r, [rp], #4
+ ADDSUBC r, rl, r5
+ str r, [rp], #4
+L(skip2):
+ bics n, n, #3
+ beq L(rtn)
+
+ ldr ul, [up], #4
+ ldr rl, [rp, #0]
+ mov r5, #0
+ umlal r4, r5, ul, vl
+ b L(in)
+
+L(top): ldr ul, [up], #4
+ ADDSUBC r, rl, r5
+ ldr rl, [rp, #4]
+ mov r5, #0
+ umlal r4, r5, ul, vl
+ str r, [rp], #4
+L(in): ldr ul, [up], #4
+ ADDSUBC r, rl, r4
+ ldr rl, [rp, #4]
+ mov r4, #0
+ umlal r5, r4, ul, vl
+ str r, [rp], #4
+ ldr ul, [up], #4
+ ADDSUBC r, rl, r5
+ ldr rl, [rp, #4]
+ mov r5, #0
+ umlal r4, r5, ul, vl
+ str r, [rp], #4
+ ldr ul, [up], #4
+ ADDSUBC r, rl, r4
+ ldr rl, [rp, #4]
+ mov r4, #0
+ umlal r5, r4, ul, vl
+ sub n, n, #4
+ tst n, n
+ str r, [rp], #4
+ bne L(top)
+
+ ADDSUBC r, rl, r5
+ str r, [rp]
+
+L(rtn): RETVAL( r4)
+ ldmfd sp!, { r4-r6, pc }
+EPILOGUE()
dnl m4 macros for ARM assembler.
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2012, 2013 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
dnl Standard commenting is with @, the default m4 # is for constants and we
dnl don't want to disable macro expansions in or after them.
-changecom(@)
+changecom(@&*$)
dnl APCS register names.
deflit(lr,r14)
deflit(pc,r15)
+
+define(`lea_list', `')
+define(`lea_num',0)
+
+dnl LEA(reg,gmp_symbol)
+dnl
+dnl Load the address of gmp_symbol into a register. The gmp_symbol must be
+dnl either local or protected/hidden, since we assume it has a fixed distance
+dnl from the point of use.
+
+define(`LEA',`dnl
+ldr $1, L(ptr`'lea_num)
+ifdef(`PIC',dnl
+`dnl
+L(bas`'lea_num):dnl
+ add $1, $1, pc`'dnl
+ m4append(`lea_list',`
+L(ptr'lea_num`): .word GSYM_PREFIX`'$2-L(bas'lea_num`)-8')
+ define(`lea_num', eval(lea_num+1))dnl
+',`dnl
+ m4append(`lea_list',`
+L(ptr'lea_num`): .word GSYM_PREFIX`'$2')
+ define(`lea_num', eval(lea_num+1))dnl
+')dnl
+')
+
+define(`EPILOGUE_cpu',
+`lea_list
+ SIZE(`$1',.-`$1')')
+
divert
--- /dev/null
+dnl ARM mpn_bdiv_dbm1c.
+
+dnl Copyright 2008, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM ?
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 4.25
+C Cortex-A15 2.5
+
+C TODO
+C * Try using umlal or umaal.
+C * Try using ldm/stm.
+
+define(`qp', `r0')
+define(`up', `r1')
+define(`n', `r2')
+define(`bd', `r3')
+define(`cy', `sp,#0')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_bdiv_dbm1c)
+ push {r4, r5, r6, r7, r8}
+ ldr r4, [up], #4
+ ldr r5, [sp, #20]
+ ands r12, n, #3
+ beq L(fi0)
+ cmp r12, #2
+ bcc L(fi1)
+ beq L(fi2)
+
+L(fi3): umull r8, r12, r4, bd
+ ldr r4, [up], #4
+ b L(lo3)
+
+L(fi0): umull r6, r7, r4, bd
+ ldr r4, [up], #4
+ b L(lo0)
+
+L(fi1): subs n, n, #1
+ umull r8, r12, r4, bd
+ bls L(wd1)
+ ldr r4, [up], #4
+ b L(lo1)
+
+L(fi2): umull r6, r7, r4, bd
+ ldr r4, [up], #4
+ b L(lo2)
+
+L(top): ldr r4, [up], #4
+ subs r5, r5, r6
+ str r5, [qp], #4
+ sbc r5, r5, r7
+L(lo1): umull r6, r7, r4, bd
+ ldr r4, [up], #4
+ subs r5, r5, r8
+ str r5, [qp], #4
+ sbc r5, r5, r12
+L(lo0): umull r8, r12, r4, bd
+ ldr r4, [up], #4
+ subs r5, r5, r6
+ str r5, [qp], #4
+ sbc r5, r5, r7
+L(lo3): umull r6, r7, r4, bd
+ ldr r4, [up], #4
+ subs r5, r5, r8
+ str r5, [qp], #4
+ sbc r5, r5, r12
+L(lo2): subs n, n, #4
+ umull r8, r12, r4, bd
+ bhi L(top)
+
+L(wd2): subs r5, r5, r6
+ str r5, [qp], #4
+ sbc r5, r5, r7
+L(wd1): subs r5, r5, r8
+ str r5, [qp]
+ sbc r0, r5, r12
+ pop {r4, r5, r6, r7, r8}
+ bx lr
+EPILOGUE()
--- /dev/null
+dnl ARM mpn_com.
+
+dnl Copyright 2003, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM ?
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 2.0
+C Cortex-A15 ?
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n', `r2')
+
+ASM_START()
+PROLOGUE(mpn_com)
+ tst n, #1
+ beq L(skip1)
+ ldr r3, [up], #4
+ mvn r3, r3
+ str r3, [rp], #4
+L(skip1):
+ tst n, #2
+ beq L(skip2)
+ ldmia up!, { r3, r12 } C load 2 limbs
+ mvn r3, r3
+ mvn r12, r12
+ stmia rp!, { r3, r12 } C store 2 limbs
+L(skip2):
+ bics n, n, #3
+ beq L(rtn)
+ stmfd sp!, { r7, r8, r9 } C save regs on stack
+
+L(top): ldmia up!, { r3, r8, r9, r12 } C load 4 limbs
+ subs n, n, #4
+ mvn r3, r3
+ mvn r8, r8
+ mvn r9, r9
+ mvn r12, r12
+ stmia rp!, { r3, r8, r9, r12 } C store 4 limbs
+ bne L(top)
+
+ ldmfd sp!, { r7, r8, r9 } C restore regs from stack
+L(rtn): bx lr
+EPILOGUE()
dnl ARM mpn_copyd.
-dnl Copyright 2003 Free Software Foundation, Inc.
+dnl Copyright 2003, 2012 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
-C This runs at 3 cycles/limb in the StrongARM.
-
-define(`rp',`r0')
-define(`up',`r1')
-define(`n',`r2')
+C cycles/limb
+C StrongARM ?
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 1.5
+C Cortex-A15 ?
+define(`rp', `r0')
+define(`up', `r1')
+define(`n', `r2')
ASM_START()
PROLOGUE(mpn_copyd)
stmda rp!, { r3, r12 } C store 2 limbs
L(skip2):
bics n, n, #3
- beq L(return)
+ beq L(rtn)
stmfd sp!, { r7, r8, r9 } C save regs on stack
-L(loop):
- ldmda up!, { r3, r8, r9, r12 } C load 4 limbs
- ldr r7, [rp, #-12] C cache allocate
+
+L(top): ldmda up!, { r3, r8, r9, r12 } C load 4 limbs
subs n, n, #4
stmda rp!, { r3, r8, r9, r12 } C store 4 limbs
- bne L(loop)
+ bne L(top)
+
ldmfd sp!, { r7, r8, r9 } C restore regs from stack
-L(return):
- mov pc, lr
-EPILOGUE(mpn_copyd)
+L(rtn): bx lr
+EPILOGUE()
dnl ARM mpn_copyi.
-dnl Copyright 2003 Free Software Foundation, Inc.
+dnl Copyright 2003, 2012 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
-C This runs at 3 cycles/limb in the StrongARM.
-
-define(`rp',`r0')
-define(`up',`r1')
-define(`n',`r2')
+C cycles/limb
+C StrongARM ?
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 1.5
+C Cortex-A15 ?
+define(`rp', `r0')
+define(`up', `r1')
+define(`n', `r2')
ASM_START()
PROLOGUE(mpn_copyi)
stmia rp!, { r3, r12 } C store 2 limbs
L(skip2):
bics n, n, #3
- beq L(return)
+ beq L(rtn)
stmfd sp!, { r7, r8, r9 } C save regs on stack
-L(loop):
- ldmia up!, { r3, r8, r9, r12 } C load 4 limbs
- ldr r7, [rp, #12] C cache allocate
+
+L(top): ldmia up!, { r3, r8, r9, r12 } C load 4 limbs
subs n, n, #4
stmia rp!, { r3, r8, r9, r12 } C store 4 limbs
- bne L(loop)
+ bne L(top)
+
ldmfd sp!, { r7, r8, r9 } C restore regs from stack
-L(return):
- mov pc, lr
-EPILOGUE(mpn_copyi)
+L(rtn): bx lr
+EPILOGUE()
#define GMP_LIMB_BITS 32
#define BYTES_PER_MP_LIMB 4
-/* 593MHz ARM (gcc50.fsffrance.org) */
+/* 1193MHz ARM (gcc55.fsffrance.org) */
#define DIVREM_1_NORM_THRESHOLD 0 /* preinv always */
#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 17
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 9
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD MP_SIZE_T_MAX
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 56
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 11
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
#define MOD_1_2_TO_MOD_1_4_THRESHOLD MP_SIZE_T_MAX
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 27
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 71
#define USE_PREINV_DIVREM_1 1 /* preinv always */
#define DIVREM_2_THRESHOLD 0 /* preinv always */
#define DIVEXACT_1_THRESHOLD 0 /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD 44
+#define BMOD_1_TO_MOD_1_THRESHOLD 41
-#define MUL_TOOM22_THRESHOLD 34
-#define MUL_TOOM33_THRESHOLD 121
-#define MUL_TOOM44_THRESHOLD 191
-#define MUL_TOOM6H_THRESHOLD 366
-#define MUL_TOOM8H_THRESHOLD 547
+#define MUL_TOOM22_THRESHOLD 36
+#define MUL_TOOM33_THRESHOLD 125
+#define MUL_TOOM44_THRESHOLD 193
+#define MUL_TOOM6H_THRESHOLD 303
+#define MUL_TOOM8H_THRESHOLD 418
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 129
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 191
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 117
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 137
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 125
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 176
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 114
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 129
-#define SQR_BASECASE_THRESHOLD 13
+#define SQR_BASECASE_THRESHOLD 12
#define SQR_TOOM2_THRESHOLD 78
-#define SQR_TOOM3_THRESHOLD 141
+#define SQR_TOOM3_THRESHOLD 137
#define SQR_TOOM4_THRESHOLD 212
-#define SQR_TOOM6_THRESHOLD 330
+#define SQR_TOOM6_THRESHOLD 306
#define SQR_TOOM8_THRESHOLD 422
-#define MULMOD_BNM1_THRESHOLD 21
-#define SQRMOD_BNM1_THRESHOLD 25
+#define MULMOD_BNM1_THRESHOLD 20
+#define SQRMOD_BNM1_THRESHOLD 26
-#define MUL_FFT_MODF_THRESHOLD 404 /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD 436 /* k = 5 */
#define MUL_FFT_TABLE3 \
- { { 404, 5}, { 21, 6}, { 11, 5}, { 25, 6}, \
- { 13, 5}, { 27, 6}, { 28, 7}, { 15, 6}, \
+ { { 436, 5}, { 27, 6}, { 28, 7}, { 15, 6}, \
{ 32, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \
- { 39, 7}, { 21, 6}, { 43, 7}, { 29, 8}, \
- { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \
- { 23, 7}, { 49, 8}, { 27, 7}, { 55, 9}, \
- { 15, 8}, { 31, 7}, { 63, 8}, { 43, 9}, \
- { 23, 8}, { 55, 9}, { 31, 8}, { 71, 9}, \
- { 39, 8}, { 83, 9}, { 47, 8}, { 99, 9}, \
- { 55,10}, { 31, 9}, { 63, 8}, { 127, 9}, \
- { 79,10}, { 47, 9}, { 103,11}, { 31,10}, \
- { 63, 9}, { 135,10}, { 95, 9}, { 191,10}, \
- { 111,11}, { 63,10}, { 127, 9}, { 255,10}, \
- { 143, 9}, { 287,10}, { 159, 9}, { 319,11}, \
- { 95,10}, { 191, 9}, { 383,10}, { 207,12}, \
- { 63,11}, { 127,10}, { 287,11}, { 159,10}, \
- { 351,11}, { 191,10}, { 415,11}, { 223,12}, \
- { 127,11}, { 255,10}, { 511,11}, { 319,10}, \
- { 639,11}, { 351,12}, { 191,11}, { 415,13}, \
- { 8192,14}, { 16384,15}, { 32768,16} }
-#define MUL_FFT_TABLE3_SIZE 79
+ { 39, 7}, { 29, 8}, { 15, 7}, { 35, 8}, \
+ { 19, 7}, { 41, 8}, { 23, 7}, { 49, 8}, \
+ { 27, 9}, { 15, 8}, { 31, 7}, { 63, 8}, \
+ { 256, 9}, { 512,10}, { 1024,11}, { 2048,12}, \
+ { 4096,13}, { 8192,14}, { 16384,15}, { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 28
#define MUL_FFT_THRESHOLD 5760
-#define SQR_FFT_MODF_THRESHOLD 400 /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD 404 /* k = 5 */
#define SQR_FFT_TABLE3 \
- { { 400, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \
- { 25, 7}, { 13, 6}, { 28, 7}, { 15, 6}, \
- { 32, 7}, { 19, 6}, { 39, 7}, { 29, 8}, \
- { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \
- { 23, 7}, { 47, 8}, { 27, 7}, { 55, 9}, \
- { 15, 8}, { 39, 9}, { 23, 8}, { 55,10}, \
- { 15, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \
- { 79, 9}, { 47, 8}, { 95, 9}, { 55,10}, \
- { 31, 9}, { 79,10}, { 47, 9}, { 103,11}, \
- { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \
- { 159, 8}, { 319,10}, { 95, 9}, { 191,10}, \
- { 111,11}, { 63,10}, { 127, 9}, { 271,10}, \
- { 143, 9}, { 303,10}, { 159,11}, { 95,10}, \
- { 191, 9}, { 383,10}, { 207,12}, { 63,11}, \
- { 127,10}, { 303,11}, { 159,10}, { 367,11}, \
- { 191,10}, { 415,11}, { 223,10}, { 447,12}, \
- { 127,11}, { 255,10}, { 511,11}, { 287,10}, \
- { 607,11}, { 319,10}, { 639,11}, { 351,12}, \
- { 191,11}, { 447,13}, { 8192,14}, { 16384,15}, \
- { 32768,16} }
-#define SQR_FFT_TABLE3_SIZE 77
-#define SQR_FFT_THRESHOLD 3136
+ { { 404, 5}, { 13, 4}, { 27, 5}, { 27, 6}, \
+ { 28, 7}, { 15, 6}, { 32, 7}, { 17, 6}, \
+ { 35, 7}, { 29, 8}, { 15, 7}, { 35, 8}, \
+ { 19, 7}, { 41, 8}, { 23, 7}, { 47, 8}, \
+ { 27, 9}, { 15, 8}, { 39, 9}, { 512,10}, \
+ { 1024,11}, { 2048,12}, { 4096,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 26
+#define SQR_FFT_THRESHOLD 3776
#define MULLO_BASECASE_THRESHOLD 0 /* always */
-#define MULLO_DC_THRESHOLD 120
-#define MULLO_MUL_N_THRESHOLD 11317
-
-#define DC_DIV_QR_THRESHOLD 134
-#define DC_DIVAPPR_Q_THRESHOLD 442
-#define DC_BDIV_QR_THRESHOLD 127
-#define DC_BDIV_Q_THRESHOLD 296
-
-#define INV_MULMOD_BNM1_THRESHOLD 66
-#define INV_NEWTON_THRESHOLD 458
-#define INV_APPR_THRESHOLD 454
-
-#define BINV_NEWTON_THRESHOLD 494
-#define REDC_1_TO_REDC_N_THRESHOLD 116
-
-#define MU_DIV_QR_THRESHOLD 2914
-#define MU_DIVAPPR_Q_THRESHOLD 3091
-#define MUPI_DIV_QR_THRESHOLD 221
-#define MU_BDIV_QR_THRESHOLD 2259
-#define MU_BDIV_Q_THRESHOLD 2747
-
-#define MATRIX22_STRASSEN_THRESHOLD 17
-#define HGCD_THRESHOLD 109
-#define GCD_DC_THRESHOLD 697
-#define GCDEXT_DC_THRESHOLD 535
+#define MULLO_DC_THRESHOLD 137
+#define MULLO_MUL_N_THRESHOLD 11479
+
+#define DC_DIV_QR_THRESHOLD 150
+#define DC_DIVAPPR_Q_THRESHOLD 494
+#define DC_BDIV_QR_THRESHOLD 148
+#define DC_BDIV_Q_THRESHOLD 345
+
+#define INV_MULMOD_BNM1_THRESHOLD 70
+#define INV_NEWTON_THRESHOLD 474
+#define INV_APPR_THRESHOLD 478
+
+#define BINV_NEWTON_THRESHOLD 542
+#define REDC_1_TO_REDC_N_THRESHOLD 117
+
+#define MU_DIV_QR_THRESHOLD 2089
+#define MU_DIVAPPR_Q_THRESHOLD 2172
+#define MUPI_DIV_QR_THRESHOLD 225
+#define MU_BDIV_QR_THRESHOLD 1528
+#define MU_BDIV_Q_THRESHOLD 2089
+
+#define MATRIX22_STRASSEN_THRESHOLD 16
+#define HGCD_THRESHOLD 197
+#define GCD_DC_THRESHOLD 902
+#define GCDEXT_DC_THRESHOLD 650
#define JACOBI_BASE_METHOD 2
-#define GET_STR_DC_THRESHOLD 14
-#define GET_STR_PRECOMPUTE_THRESHOLD 29
-#define SET_STR_DC_THRESHOLD 321
-#define SET_STR_PRECOMPUTE_THRESHOLD 1037
+#define GET_STR_DC_THRESHOLD 20
+#define GET_STR_PRECOMPUTE_THRESHOLD 39
+#define SET_STR_DC_THRESHOLD 1045
+#define SET_STR_PRECOMPUTE_THRESHOLD 2147
dnl ARM mpn_invert_limb -- Invert a normalized limb.
-dnl Copyright 2001, 2009, 2011 Free Software Foundation, Inc.
+dnl Copyright 2001, 2009, 2011, 2012 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
ASM_START()
PROLOGUE(mpn_invert_limb)
- ldr r2, L(4)
-L(2): add r2, pc, r2
+ LEA( r2, approx_tab-512)
mov r3, r0, lsr #23
mov r3, r3, asl #1
ldrh r3, [r3, r2]
adc r3, r3, r0
rsb r0, r3, r2
bx lr
-
- ALIGN(4)
-L(4): .word approx_tab-8-512-L(2)
EPILOGUE()
.section .rodata
--- /dev/null
+dnl ARM mpn_and_n, mpn_andn_n. mpn_nand_n, etc.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 1997, 2000, 2001, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb cycles/limb
+C and andn ior xor nand iorn nior xnor
+C StrongARM ? ?
+C XScale ? ?
+C Cortex-A8 ? ?
+C Cortex-A9 2.5-2.72 2.75-3
+C Cortex-A15 ? ?
+
+C TODO
+C * It seems that 2.25 c/l and 2.75 c/l is possible for A9.
+C * Debug popping issue, see comment below.
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n', `r3')
+
+define(`POSTOP')
+
+ifdef(`OPERATION_and_n',`
+ define(`func', `mpn_and_n')
+ define(`LOGOP', `and $1, $2, $3')')
+ifdef(`OPERATION_andn_n',`
+ define(`func', `mpn_andn_n')
+ define(`LOGOP', `bic $1, $2, $3')')
+ifdef(`OPERATION_nand_n',`
+ define(`func', `mpn_nand_n')
+ define(`POSTOP', `mvn $1, $1')
+ define(`LOGOP', `and $1, $2, $3')')
+ifdef(`OPERATION_ior_n',`
+ define(`func', `mpn_ior_n')
+ define(`LOGOP', `orr $1, $2, $3')')
+ifdef(`OPERATION_iorn_n',`
+ define(`func', `mpn_iorn_n')
+ define(`POSTOP', `mvn $1, $1')
+ define(`LOGOP', `bic $1, $3, $2')')
+ifdef(`OPERATION_nior_n',`
+ define(`func', `mpn_nior_n')
+ define(`POSTOP', `mvn $1, $1')
+ define(`LOGOP', `orr $1, $2, $3')')
+ifdef(`OPERATION_xor_n',`
+ define(`func', `mpn_xor_n')
+ define(`LOGOP', `eor $1, $2, $3')')
+ifdef(`OPERATION_xnor_n',`
+ define(`func', `mpn_xnor_n')
+ define(`POSTOP', `mvn $1, $1')
+ define(`LOGOP', `eor $1, $2, $3')')
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+ASM_START()
+PROLOGUE(func)
+ push { r8, r9, r10 }
+ tst n, #1
+ beq L(skip1)
+ ldr r10, [vp], #4
+ ldr r12, [up], #4
+ LOGOP( r12, r12, r10)
+ POSTOP( r12)
+ str r12, [rp], #4
+L(skip1):
+ tst n, #2
+ beq L(skip2)
+ ldmia vp!, { r10, r12 }
+ ldmia up!, { r8, r9 }
+ LOGOP( r8, r8, r10)
+ LOGOP( r9, r9, r12)
+ POSTOP( r8)
+ POSTOP( r9)
+ stmia rp!, { r8, r9 }
+L(skip2):
+ bics n, n, #3
+ beq L(rtn)
+ push { r4, r5, r6, r7 }
+
+ ldmia vp!, { r8, r9, r10, r12 }
+ b L(mid)
+
+L(top): ldmia vp!, { r8, r9, r10, r12 }
+ POSTOP( r4)
+ POSTOP( r5)
+ POSTOP( r6)
+ POSTOP( r7)
+ stmia rp!, { r4, r5, r6, r7 }
+L(mid): sub n, n, #4
+ ldmia up!, { r4, r5, r6, r7 }
+ teq n, #0
+ LOGOP( r4, r4, r8)
+ LOGOP( r5, r5, r9)
+ LOGOP( r6, r6, r10)
+ LOGOP( r7, r7, r12)
+ bne L(top)
+
+ POSTOP( r4)
+ POSTOP( r5)
+ POSTOP( r6)
+ POSTOP( r7)
+ stmia rp!, { r4, r5, r6, r7 }
+
+ pop { r4, r5, r6, r7 } C popping r8-r10 here strangely fails
+
+L(rtn): pop { r8, r9, r10 }
+ bx r14
+EPILOGUE()
--- /dev/null
+dnl ARM mpn_lshift.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM ?
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 3.5
+C Cortex-A15 ?
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n', `r2')
+define(`cnt', `r3')
+define(`tnc', `r12')
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+ add up, up, n, lsl #2
+ push {r4, r6, r7, r8}
+ ldr r4, [up, #-4]!
+ add rp, rp, n, lsl #2
+ rsb tnc, cnt, #32
+
+ lsl r7, r4, cnt
+ tst n, #1
+ beq L(evn) C n even
+
+L(odd): subs n, n, #2
+ bcc L(1) C n = 1
+ ldr r8, [up, #-4]!
+ b L(mid)
+
+L(evn): ldr r6, [up, #-4]!
+ subs n, n, #2
+ beq L(end)
+
+L(top): ldr r8, [up, #-4]!
+ orr r7, r7, r6, lsr tnc
+ str r7, [rp, #-4]!
+ lsl r7, r6, cnt
+L(mid): ldr r6, [up, #-4]!
+ orr r7, r7, r8, lsr tnc
+ str r7, [rp, #-4]!
+ lsl r7, r8, cnt
+ subs n, n, #2
+ bgt L(top)
+
+L(end): orr r7, r7, r6, lsr tnc
+ str r7, [rp, #-4]!
+ lsl r7, r6, cnt
+L(1): str r7, [rp, #-4]
+ lsr r0, r4, tnc
+ pop {r4, r6, r7, r8}
+ bx r14
+EPILOGUE()
--- /dev/null
+dnl ARM mpn_lshiftc.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM ?
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 4.0
+C Cortex-A15 ?
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n', `r2')
+define(`cnt', `r3')
+define(`tnc', `r12')
+
+ASM_START()
+PROLOGUE(mpn_lshiftc)
+ add up, up, n, lsl #2
+ push {r4, r6, r7, r8}
+ ldr r4, [up, #-4]!
+ add rp, rp, n, lsl #2
+ rsb tnc, cnt, #32
+ mvn r6, r4
+
+ lsl r7, r6, cnt
+ tst n, #1
+ beq L(evn) C n even
+
+L(odd): subs n, n, #2
+ bcc L(1) C n = 1
+ ldr r8, [up, #-4]!
+ mvn r8, r8
+ b L(mid)
+
+L(evn): ldr r6, [up, #-4]!
+ mvn r6, r6
+ subs n, n, #2
+ beq L(end)
+
+L(top): ldr r8, [up, #-4]!
+ orr r7, r7, r6, lsr tnc
+ str r7, [rp, #-4]!
+ mvn r8, r8
+ lsl r7, r6, cnt
+L(mid): ldr r6, [up, #-4]!
+ orr r7, r7, r8, lsr tnc
+ str r7, [rp, #-4]!
+ mvn r6, r6
+ lsl r7, r8, cnt
+ subs n, n, #2
+ bgt L(top)
+
+L(end): orr r7, r7, r6, lsr tnc
+ str r7, [rp, #-4]!
+ lsl r7, r6, cnt
+L(1): mvn r6, #0
+ orr r7, r7, r6, lsr tnc
+ str r7, [rp, #-4]
+ lsr r0, r4, tnc
+ pop {r4, r6, r7, r8}
+ bx r14
+EPILOGUE()
--- /dev/null
+dnl ARM mpn_mod_34lsub1 -- remainder modulo 2^24-1.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM ?
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 1.33
+C Cortex-A15 ?
+
+define(`ap', r0)
+define(`n', r1)
+
+C mp_limb_t mpn_mod_34lsub1 (mp_srcptr up, mp_size_t n)
+
+C TODO
+C * Write cleverer summation code.
+C * Consider loading 6 64-bit aligned registers at a time, to approach 1 c/l.
+
+ASM_START()
+ TEXT
+ ALIGN(32)
+PROLOGUE(mpn_mod_34lsub1)
+ push { r4, r5, r6, r7 }
+
+ subs n, n, #3
+ mov r7, #0
+ blt L(le2) C n <= 2
+
+ ldmia ap!, { r2, r3, r12 }
+ subs n, n, #3
+ blt L(sum) C n <= 5
+ adds r0, r0, #0 C clear carry
+ sub n, n, #3
+ b L(mid)
+
+L(top): adcs r2, r2, r4
+ adcs r3, r3, r5
+ adcs r12, r12, r6
+L(mid): ldmia ap!, { r4, r5, r6 }
+ tst n, n
+ sub n, n, #3
+ bpl L(top)
+
+ add n, n, #3
+
+ adcs r2, r2, r4
+ adcs r3, r3, r5
+ adcs r12, r12, r6
+ movcs r7, #1 C r7 <= 1
+
+L(sum): cmn n, #2
+ movlo r4, #0
+ ldrhs r4, [ap], #4
+ movls r5, #0
+ ldrhi r5, [ap], #4
+
+ adds r2, r2, r4
+ adcs r3, r3, r5
+ adcs r12, r12, #0
+ adc r7, r7, #0 C r7 <= 2
+
+L(sum2):
+ bic r0, r2, #0xff000000
+ add r0, r0, r2, lsr #24
+ add r0, r0, r7
+
+ lsl r7, r3, #8
+ bic r1, r7, #0xff000000
+ add r0, r0, r1
+ add r0, r0, r3, lsr #16
+
+ lsl r7, r12, #16
+ bic r1, r7, #0xff000000
+ add r0, r0, r1
+ add r0, r0, r12, lsr #8
+
+ pop { r4, r5, r6, r7 }
+ bx lr
+
+L(le2): cmn n, #1
+ bne L(1)
+ ldmia ap!, { r2, r3 }
+ mov r12, #0
+ b L(sum2)
+L(1): ldr r2, [ap]
+ bic r0, r2, #0xff000000
+ add r0, r0, r2, lsr #24
+ pop { r4, r5, r6, r7 }
+ bx lr
+EPILOGUE()
--- /dev/null
+dnl ARM mpn_modexact_1c_odd
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM ?
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 10
+C Cortex-A15 ?
+
+define(`up', `r0')
+define(`n', `r1')
+define(`d', `r2')
+define(`cy', `r3')
+
+ .protected binvert_limb_table
+ASM_START()
+PROLOGUE(mpn_modexact_1c_odd)
+ stmfd sp!, {r4, r5}
+
+ LEA( r4, binvert_limb_table)
+
+ ldr r5, [up], #4 C up[0]
+
+ and r12, d, #254
+ ldrb r4, [r4, r12, lsr #1]
+ mul r12, r4, r4
+ mul r12, d, r12
+ rsb r12, r12, r4, asl #1
+ mul r4, r12, r12
+ mul r4, d, r4
+ rsb r4, r4, r12, asl #1 C r4 = inverse
+
+ subs n, n, #1 C set carry as side-effect
+ beq L(end)
+
+L(top): sbcs cy, r5, cy
+ ldr r5, [up], #4
+ sub n, n, #1
+ mul r12, r4, cy
+ tst n, n
+ umull r12, cy, d, r12
+ bne L(top)
+
+L(end): sbcs cy, r5, cy
+ mul r12, r4, cy
+ umull r12, r0, d, r12
+ addcc r0, r0, #1
+
+ ldmfd sp!, {r4, r5}
+ bx r14
+EPILOGUE()
dnl in a second limb vector.
dnl Contributed by Robert Harley.
-dnl Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.
+dnl Copyright 1998, 2000, 2001, 2003, 2012 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
-C cycles/limb
-C StrongARM: 6-8 (dependent on vl value)
-C XScale: ?-?
+C cycles/limb
+C StrongARM 6-8
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 4.75
+C Cortex-A15 ?
C We should rewrite this along the lines of addmul_1.asm. That should save a
C cycle on StrongARM, and several cycles on XScale.
stmia rp!, { r8, r9 }
L(skip2):
bics n, n, #3
- beq L(return)
+ beq L(rtn)
stmfd sp!, { r6, r7 }
-L(loop):
- mov r6, r12
+
+L(top): mov r6, r12
ldmia up!, { r8, r9, r12, lr }
ldr r7, [rp, #12] C cache allocate
mov r7, #0
umlal r9, r12, lr, vl
subs n, n, #4
stmia rp!, { r6, r7, r8, r9 }
- bne L(loop)
+ bne L(top)
+
ldmfd sp!, { r6, r7 }
-L(return):
- mov r0, r12
+
+L(rtn): mov r0, r12
ldmfd sp!, { r8, r9, pc }
-EPILOGUE(mpn_mul_1)
+EPILOGUE()
--- /dev/null
+dnl ARM mpn_rsh1add_n and mpn_rsh1sub_n.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM ?
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 3.64-3.7
+C Cortex-A15 ?
+
+C TODO
+C * Not optimised.
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`vp', `r2')
+define(`n', `r3')
+
+ifdef(`OPERATION_rsh1add_n', `
+ define(`ADDSUB', adds)
+ define(`ADDSUBC', adcs)
+ define(`RSTCY', `cmn $1, $1')
+ define(`func', mpn_rsh1add_n)
+ define(`func_nc', mpn_rsh1add_nc)')
+ifdef(`OPERATION_rsh1sub_n', `
+ define(`ADDSUB', subs)
+ define(`ADDSUBC', sbcs)
+ define(`RSTCY',
+ `mvn $2, #0x80000000
+ cmp $2, $1')
+ define(`func', mpn_rsh1sub_n)
+ define(`func_nc', mpn_rsh1sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1sub_n)
+
+ASM_START()
+PROLOGUE(func)
+ push {r4-r11}
+ ldr r4, [up], #4
+ ldr r8, [vp], #4
+ ADDSUB r4, r4, r8
+ rrxs r12, r7
+ and r11, r4, #1 C return value
+ subs n, n, #4
+ blo L(end)
+
+L(top): ldmia up!, {r5,r6,r7}
+ ldmia vp!, {r8,r9,r10}
+ cmn r12, r12
+ ADDSUBC r5, r5, r8
+ ADDSUBC r6, r6, r9
+ ADDSUBC r7, r7, r10
+ rrxs r12, r7
+ rrxs r6, r6
+ rrxs r5, r5
+ rrxs r4, r4
+ subs n, n, #3
+ stmia rp!, {r4,r5,r6}
+ mov r4, r7
+ bhs L(top)
+
+L(end): cmn n, #2
+ bls L(e2)
+ ldm up, {r5,r6}
+ ldm vp, {r8,r9}
+ cmn r12, r12
+ ADDSUBC r5, r5, r8
+ ADDSUBC r6, r6, r9
+ rrxs r12, r6
+ rrxs r5, r5
+ rrxs r4, r4
+ stmia rp!, {r4,r5}
+ mov r4, r6
+ b L(e1)
+
+L(e2): bne L(e1)
+ ldr r5, [up, #0]
+ ldr r8, [vp, #0]
+ cmn r12, r12
+ ADDSUBC r5, r5, r8
+ rrxs r12, r5
+ rrxs r4, r4
+ str r4, [rp], #4
+ mov r4, r5
+
+L(e1): RSTCY( r12, r1)
+ rrxs r4, r4
+ str r4, [rp, #0]
+ mov r0, r11
+ pop {r4-r11}
+ bx r14
+EPILOGUE()
--- /dev/null
+dnl ARM mpn_rshift.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 1997, 2000, 2001, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM ?
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 3.5
+C Cortex-A15 ?
+
+define(`rp', `r0')
+define(`up', `r1')
+define(`n', `r2')
+define(`cnt', `r3')
+define(`tnc', `r12')
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+ push {r4, r6, r7, r8}
+ ldr r4, [up]
+ rsb tnc, cnt, #32
+
+ lsr r7, r4, cnt
+ tst n, #1
+ beq L(evn) C n even
+
+L(odd): subs n, n, #2
+ bcc L(1) C n = 1
+ ldr r8, [up, #4]!
+ b L(mid)
+
+L(evn): ldr r6, [up, #4]!
+ subs n, n, #2
+ beq L(end)
+
+L(top): ldr r8, [up, #4]!
+ orr r7, r7, r6, lsl tnc
+ str r7, [rp], #4
+ lsr r7, r6, cnt
+L(mid): ldr r6, [up, #4]!
+ orr r7, r7, r8, lsl tnc
+ str r7, [rp], #4
+ lsr r7, r8, cnt
+ subs n, n, #2
+ bgt L(top)
+
+L(end): orr r7, r7, r6, lsl tnc
+ str r7, [rp], #4
+ lsr r7, r6, cnt
+L(1): str r7, [rp], #4
+ lsl r0, r4, tnc
+ pop {r4, r6, r7, r8}
+ bx r14
+EPILOGUE()
+++ /dev/null
-dnl ARM mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
-dnl store difference in a third limb vector.
-dnl Contributed by Robert Harley.
-
-dnl Copyright 1997, 2000, 2001 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C This code runs at 5 cycles/limb.
-
-define(`rp',`r0')
-define(`up',`r1')
-define(`vp',`r2')
-define(`n',`r3')
-
-
-ASM_START()
-PROLOGUE(mpn_sub_n)
- stmfd sp!, { r8, r9, lr }
- subs r12, r12, r12
- tst n, #1
- beq L(skip1)
- ldr r12, [up], #4
- ldr lr, [vp], #4
- subs r12, r12, lr
- str r12, [rp], #4
-L(skip1):
- tst n, #2
- beq L(skip2)
- ldmia up!, { r8, r9 }
- ldmia vp!, { r12, lr }
- sbcs r8, r8, r12
- sbcs r9, r9, lr
- stmia rp!, { r8, r9 }
-L(skip2):
- bics n, n, #3
- beq L(return)
- stmfd sp!, { r4, r5, r6, r7 }
-L(sub_n_loop):
- ldmia up!, { r4, r5, r6, r7 }
- ldmia vp!, { r8, r9, r12, lr }
- sbcs r4, r4, r8
- ldr r8, [rp, #12] C cache allocate
- sbcs r5, r5, r9
- sbcs r6, r6, r12
- sbcs r7, r7, lr
- stmia rp!, { r4, r5, r6, r7 }
- sub n, n, #4
- teq n, #0
- bne L(sub_n_loop)
- ldmfd sp!, { r4, r5, r6, r7 }
-L(return):
- sbc r0, r0, r0
- and r0, r0, #1
- ldmfd sp!, { r8, r9, pc }
-EPILOGUE(mpn_sub_n)
+++ /dev/null
-dnl ARM mpn_submul_1 -- Multiply a limb vector with a limb and subtract the
-dnl result from a second limb vector.
-
-dnl Copyright 1998, 2000, 2001, 2003 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C StrongARM: 7.75-9.75 (dependent on vl value)
-C XScale: 8-9 (dependent on vl value, estimated)
-
-define(`rp',`r0')
-define(`up',`r1')
-define(`n',`r2')
-define(`vl',`r3')
-define(`rl',`r12')
-define(`ul',`r6')
-define(`r',`lr')
-
-
-ASM_START()
-PROLOGUE(mpn_submul_1)
- stmfd sp!, { r4-r6, lr }
- subs r4, r0, r0 C clear r4, set cy
- tst n, #1
- beq L(skip1)
- ldr ul, [up], #4
- ldr rl, [rp, #0]
- umull r5, r4, ul, vl
- subs r, rl, r5
- str r, [rp], #4
-L(skip1):
- tst n, #2
- beq L(skip2)
- ldr ul, [up], #4
- ldr rl, [rp, #0]
- mov r5, #0
- umlal r4, r5, ul, vl
- ldr ul, [up], #4
- sbcs r, rl, r4
- ldr rl, [rp, #4]
- mov r4, #0
- umlal r5, r4, ul, vl
- str r, [rp], #4
- sbcs r, rl, r5
- str r, [rp], #4
-L(skip2):
- bics r, n, #3
- beq L(return)
-
- ldr ul, [up], #4
- ldr rl, [rp, #0]
- mov r5, #0
- umlal r4, r5, ul, vl
- b L(in)
-
-L(loop):
- ldr ul, [up], #4
- sbcs r, rl, r5
- ldr rl, [rp, #4]
- mov r5, #0
- umlal r4, r5, ul, vl
- str r, [rp], #4
-L(in): ldr ul, [up], #4
- sbcs r, rl, r4
- ldr rl, [rp, #4]
- mov r4, #0
- umlal r5, r4, ul, vl
- str r, [rp], #4
- ldr ul, [up], #4
- sbcs r, rl, r5
- ldr rl, [rp, #4]
- mov r5, #0
- umlal r4, r5, ul, vl
- str r, [rp], #4
- ldr ul, [up], #4
- sbcs r, rl, r4
- ldr rl, [rp, #4]
- mov r4, #0
- umlal r5, r4, ul, vl
- str r, [rp], #4
- sub n, n, #4
- bics r, n, #3
- bne L(loop)
-
- sbcs r, rl, r5
- str r, [rp], #4
-L(return):
- sbc r0, r0, r0
- sub r0, r4, r0
- ldmfd sp!, { r4-r6, pc }
-EPILOGUE(mpn_submul_1)
dnl ARM mpn_udiv_qrnnd -- divide a two limb dividend and a one limb divisor.
dnl Return quotient and store remainder through a supplied pointer.
-dnl Copyright 2001 Free Software Foundation, Inc.
+dnl Copyright 2001, 2012 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
teq r12, #0
bne L(oop)
- str n1, [ rem_ptr ] C store remainder
+ str n1, [rem_ptr] C store remainder
adc r0, n0, n0 C quotient: add last carry from divstep
- mov pc, lr
+ bx lr
L(_large_divisor):
stmfd sp!, { r8, lr }
addcs n0, n0, #1 C adjust quotient
L(_even_divisor):
- str n1, [ rem_ptr ] C store remainder
+ str n1, [rem_ptr] C store remainder
mov r0, n0 C quotient
ldmfd sp!, { r8, pc }
EPILOGUE(mpn_udiv_qrnnd)
--- /dev/null
+dnl ARM v5 mpn_gcd_1.
+
+dnl Based on the K7 gcd_1.asm, by Kevin Ryde. Rehacked for ARM by Torbjorn
+dnl Granlund.
+
+dnl Copyright 2000, 2001, 2002, 2005, 2009, 2011, 2012 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/bit (approx)
+C StrongARM ?
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 5.9
+C Cortex-A15 ?
+C Numbers measured with: speed -CD -s8-32 -t24 mpn_gcd_1
+
+C TODO
+C * Optimise inner-loop better.
+
+C Threshold of when to call bmod when U is one limb. Should be about
+C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
+define(`BMOD_THRES_LOG2', 6)
+
+C INPUT PARAMETERS
+define(`up', `r0')
+define(`n', `r1')
+define(`v0', `r2')
+
+ifdef(`BMOD_1_TO_MOD_1_THRESHOLD',,
+ `define(`BMOD_1_TO_MOD_1_THRESHOLD',0xffffffff)')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_gcd_1)
+ push {r4, r7, lr}
+ ldr r3, [up] C U low limb
+
+ orr r3, r3, v0
+ rsb r4, r3, #0
+ and r4, r4, r3
+ clz r4, r4 C min(ctz(u0),ctz(v0))
+ rsb r4, r4, #31
+
+ rsb r12, v0, #0
+ and r12, r12, v0
+ clz r12, r12
+ rsb r12, r12, #31
+ lsr v0, v0, r12
+
+ mov r7, v0
+
+ cmp n, #1
+ bne L(nby1)
+
+C Both U and V are single limbs, reduce with bmod if u0 >> v0.
+ ldr r3, [up]
+ cmp v0, r3, lsr #BMOD_THRES_LOG2
+ bhi L(red1)
+
+L(bmod):mov r3, #0 C carry argument
+ bl mpn_modexact_1c_odd
+ b L(red0)
+
+L(nby1):cmp n, #BMOD_1_TO_MOD_1_THRESHOLD
+ blo L(bmod)
+
+ bl mpn_mod_1
+
+L(red0):mov r3, r0
+L(red1):rsbs r12, r3, #0
+ and r12, r12, r3
+ clz r12, r12
+ rsb r12, r12, #31
+ bne L(mid)
+ b L(end)
+
+ ALIGN(8)
+L(top): rsb r12, r12, #31
+ movcc r3, r1 C if x-y < 0
+ movcc r7, r0 C use x,y-x
+L(mid): lsr r3, r3, r12 C
+ mov r0, r3 C
+ sub r1, r7, r3 C
+ rsbs r3, r7, r3 C
+ and r12, r1, r3 C
+ clz r12, r12 C
+ bne L(top) C
+
+L(end): lsl r0, r7, r4
+ pop {r4, r7, pc}
+EPILOGUE()
--- /dev/null
+dnl ARM mpn_mod_1_1p
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM ?
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 7
+C Cortex-A15 6
+
+define(`ap', `r0')
+define(`n', `r1')
+define(`d', `r2')
+define(`cps',`r3')
+
+ASM_START()
+PROLOGUE(mpn_mod_1_1p)
+ push {r4-r10}
+ add r0, r0, r1, asl #2
+ ldr r5, [r0, #-4]!
+ ldr r12, [r0, #-4]!
+ subs r1, r1, #2
+ ble L(4)
+ ldr r8, [r3, #12]
+ mov r4, r12
+ mov r10, r5
+ umull r7, r5, r10, r8
+ sub r1, r1, #1
+ b L(mid)
+
+L(top): adds r12, r6, r7
+ adcs r10, r4, r5
+ sub r1, r1, #1
+ mov r6, #0
+ movcs r6, r8
+ umull r7, r5, r10, r8
+ adds r4, r12, r6
+ subcs r4, r4, r2
+L(mid): ldr r6, [r0, #-4]!
+ teq r1, #0
+ bne L(top)
+
+ adds r12, r6, r7
+ adcs r5, r4, r5
+ subcs r5, r5, r2
+L(4): ldr r1, [r3, #4]
+ cmp r1, #0
+ beq L(7)
+ ldr r4, [r3, #8]
+ umull r0, r6, r5, r4
+ adds r12, r0, r12
+ addcs r6, r6, #1
+ rsb r0, r1, #32
+ mov r0, r12, lsr r0
+ orr r5, r0, r6, asl r1
+ mov r12, r12, asl r1
+ b L(8)
+L(7): cmp r5, r2
+ subcs r5, r5, r2
+L(8): ldr r0, [r3, #0]
+ umull r4, r3, r5, r0
+ add r5, r5, #1
+ adds r0, r4, r12
+ adc r5, r3, r5
+ mul r5, r2, r5
+ sub r12, r12, r5
+ cmp r12, r0
+ addhi r12, r12, r2
+ cmp r2, r12
+ subls r12, r12, r2
+ mov r0, r12, lsr r1
+ pop {r4-r10}
+ bx r14
+EPILOGUE()
+
+PROLOGUE(mpn_mod_1_1p_cps)
+ stmfd sp!, {r4, r5, r6, r14}
+ mov r5, r0
+ clz r4, r1
+ mov r0, r1, asl r4
+ rsb r6, r0, #0
+ bl mpn_invert_limb
+ str r0, [r5, #0]
+ str r4, [r5, #4]
+ cmp r4, #0
+ beq L(2)
+ rsb r1, r4, #32
+ mov r3, #1
+ mov r3, r3, asl r4
+ orr r3, r3, r0, lsr r1
+ mul r3, r6, r3
+ mov r4, r3, lsr r4
+ str r4, [r5, #8]
+L(2): mul r0, r6, r0
+ str r0, [r5, #12]
+ ldmfd sp!, {r4, r5, r6, pc}
+EPILOGUE()
--- /dev/null
+dnl ARM mpn_mod_1s_2p
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM ?
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 4.25
+C Cortex-A15 3
+
+define(`ap', `r0')
+define(`n', `r1')
+define(`d', `r2')
+define(`cps',`r3')
+
+ASM_START()
+PROLOGUE(mpn_mod_1s_2p)
+ push {r4-r10}
+ tst n, #1
+ add r7, r3, #8
+ ldmia r7, {r7, r8, r12} C load B1, B2, B3
+ add ap, ap, n, lsl #2 C put ap at operand end
+ beq L(evn)
+
+L(odd): subs n, n, #1
+ beq L(1)
+ ldmdb ap!, {r4,r6,r9}
+ mov r10, #0
+ umlal r4, r10, r6, r7
+ umlal r4, r10, r9, r8
+ b L(com)
+
+L(evn): ldmdb ap!, {r4,r10}
+L(com): subs n, n, #2
+ ble L(end)
+ ldmdb ap!, {r5,r6}
+ b L(mid)
+
+L(top): mov r9, #0
+ umlal r5, r9, r6, r7 C B1
+ umlal r5, r9, r4, r8 C B2
+ ldmdb ap!, {r4,r6}
+ umlal r5, r9, r10, r12 C B3
+ ble L(xit)
+ mov r10, #0
+ umlal r4, r10, r6, r7 C B1
+ umlal r4, r10, r5, r8 C B2
+ ldmdb ap!, {r5,r6}
+ umlal r4, r10, r9, r12 C B3
+L(mid): subs n, n, #4
+ bge L(top)
+
+ mov r9, #0
+ umlal r5, r9, r6, r7 C B1
+ umlal r5, r9, r4, r8 C B2
+ umlal r5, r9, r10, r12 C B3
+ mov r4, r5
+
+L(end): movge r9, r10 C executed iff coming via xit
+ ldr r6, [r3, #4] C cps[1] = cnt
+ mov r5, #0
+ umlal r4, r5, r9, r7
+ mov r7, r5, lsl r6
+L(x): rsb r1, r6, #32
+ orr r8, r7, r4, lsr r1
+ mov r9, r4, lsl r6
+ ldr r5, [r3, #0]
+ add r0, r8, #1
+ umull r12, r1, r8, r5
+ adds r4, r12, r9
+ adc r1, r1, r0
+ mul r5, r2, r1
+ sub r9, r9, r5
+ cmp r9, r4
+ addhi r9, r9, r2
+ cmp r2, r9
+ subls r9, r9, r2
+ mov r0, r9, lsr r6
+ pop {r4-r10}
+ bx r14
+
+L(xit): mov r10, #0
+ umlal r4, r10, r6, r7 C B1
+ umlal r4, r10, r5, r8 C B2
+ umlal r4, r10, r9, r12 C B3
+ b L(end)
+
+L(1): ldr r6, [r3, #4] C cps[1] = cnt
+ ldr r4, [ap, #-4] C ap[0]
+ mov r7, #0
+ b L(x)
+EPILOGUE()
+
+PROLOGUE(mpn_mod_1s_2p_cps)
+ push {r4-r8, r14}
+ clz r4, r1
+ mov r5, r1, lsl r4 C b <<= cnt
+ mov r6, r0 C r6 = cps
+ mov r0, r5
+ bl mpn_invert_limb
+ rsb r3, r4, #32
+ mov r3, r0, lsr r3
+ mov r2, #1
+ orr r3, r3, r2, lsl r4
+ rsb r1, r5, #0
+ mul r2, r1, r3
+ umull r3, r12, r2, r0
+ add r12, r2, r12
+ mvn r12, r12
+ mul r1, r5, r12
+ cmp r1, r3
+ addhi r1, r1, r5
+ umull r12, r7, r1, r0
+ add r7, r1, r7
+ mvn r7, r7
+ mul r3, r5, r7
+ cmp r3, r12
+ addhi r3, r3, r5
+ mov r5, r2, lsr r4
+ mov r7, r1, lsr r4
+ mov r8, r3, lsr r4
+ stmia r6, {r0,r4,r5,r7,r8} C fill cps
+ pop {r4-r8, pc}
+EPILOGUE()
--- /dev/null
+dnl ARM mpn_addmul_1.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM: -
+C XScale -
+C Cortex-A8 ?
+C Cortex-A9 3.25
+C Cortex-A15 4
+
+C TODO
+C * Micro-optimise feed-in code.
+C * Optimise for n=1,2 by delaying register saving.
+C * Try using ldm/stm.
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n', `r2')
+define(`v0',`r3')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ stmfd sp!, { r4, r5, r6, r7 }
+
+ ands r6, n, #3
+ mov r12, #0
+ beq L(fi0)
+ cmp r6, #2
+ bcc L(fi1)
+ beq L(fi2)
+
+L(fi3): ldr r4, [up], #4
+ ldr r6, [rp, #0]
+ ldr r5, [up], #4
+ b L(lo3)
+
+L(fi0): ldr r5, [up], #4
+ ldr r7, [rp], #4
+ ldr r4, [up], #4
+ b L(lo0)
+
+L(fi1): ldr r4, [up], #4
+ ldr r6, [rp], #8
+ subs n, n, #1
+ beq L(1)
+ ldr r5, [up], #4
+ b L(lo1)
+
+L(fi2): ldr r5, [up], #4
+ ldr r7, [rp], #12
+ ldr r4, [up], #4
+ b L(lo2)
+
+ ALIGN(16)
+L(top): ldr r6, [rp, #-8]
+ ldr r5, [up], #4
+ str r7, [rp, #-12]
+L(lo1): umaal r6, r12, r4, v0
+ ldr r7, [rp, #-4]
+ ldr r4, [up], #4
+ str r6, [rp, #-8]
+L(lo0): umaal r7, r12, r5, v0
+ ldr r6, [rp, #0]
+ ldr r5, [up], #4
+ str r7, [rp, #-4]
+L(lo3): umaal r6, r12, r4, v0
+ ldr r7, [rp, #4]
+ ldr r4, [up], #4
+ str r6, [rp], #16
+L(lo2): umaal r7, r12, r5, v0
+ subs n, n, #4
+ bhi L(top)
+
+ ldr r6, [rp, #-8]
+ str r7, [rp, #-12]
+L(1): umaal r6, r12, r4, v0
+ str r6, [rp, #-8]
+ mov r0, r12
+ ldmfd sp!, { r4, r5, r6, r7 }
+ bx lr
+EPILOGUE()
--- /dev/null
+dnl ARM mpn_addmul_2.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM: -
+C XScale -
+C Cortex-A8 ?
+C Cortex-A9 2.38
+C Cortex-A15 2.5
+
+C TODO
+C * Consider using more registers for the r[] loads, allowing better load-use
+C scheduling for a 6% speedup (on A9). Free: r10, r11, r14
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n', `r2')
+define(`vp',`r3')
+
+define(`v0',`r6')
+define(`v1',`r7')
+define(`u0',`r3')
+define(`u1',`r9')
+
+define(`cya',`r8')
+define(`cyb',`r12')
+
+
+ASM_START()
+PROLOGUE(mpn_addmul_2)
+ push { r4, r5, r6, r7, r8, r9 }
+
+ ldm vp, { v0, v1 }
+ mov cya, #0
+ mov cyb, #0
+
+ tst n, #1
+ beq L(evn)
+L(odd): ldr r5, [rp, #0]
+ ldr u0, [up, #0]
+ ldr r4, [rp, #4]
+ tst n, #2
+ beq L(fi1)
+L(fi3): sub up, up, #12
+ sub rp, rp, #16
+ b L(lo3)
+L(fi1): sub n, n, #1
+ sub up, up, #4
+ sub rp, rp, #8
+ b L(lo1)
+L(evn): ldr r4, [rp, #0]
+ ldr u1, [up, #0]
+ ldr r5, [rp, #4]
+ tst n, #2
+ bne L(fi2)
+L(fi0): sub up, up, #8
+ sub rp, rp, #12
+ b L(lo0)
+L(fi2): subs n, n, #2
+ sub rp, rp, #4
+ bls L(end)
+
+ ALIGN(16)
+L(top): ldr u0, [up, #4]
+ umaal r4, cya, u1, v0
+ str r4, [rp, #4]
+ ldr r4, [rp, #12]
+ umaal r5, cyb, u1, v1
+L(lo1): ldr u1, [up, #8]
+ umaal r5, cya, u0, v0
+ str r5, [rp, #8]
+ ldr r5, [rp, #16]
+ umaal r4, cyb, u0, v1
+L(lo0): ldr u0, [up, #12]
+ umaal r4, cya, u1, v0
+ str r4, [rp, #12]
+ ldr r4, [rp, #20]
+ umaal r5, cyb, u1, v1
+L(lo3): ldr u1, [up, #16]!
+ umaal r5, cya, u0, v0
+ str r5, [rp, #16]!
+ ldr r5, [rp, #8]
+ umaal r4, cyb, u0, v1
+ subs n, n, #4
+ bhi L(top)
+
+L(end): umaal r4, cya, u1, v0
+ ldr u0, [up, #4]
+ umaal r5, cyb, u1, v1
+ str r4, [rp, #4]
+ umaal r5, cya, u0, v0
+ umaal cya, cyb, u0, v1
+ str r5, [rp, #8]
+ str cya, [rp, #12]
+ mov r0, cyb
+
+ pop { r4, r5, r6, r7, r8, r9 }
+ bx r14
+EPILOGUE()
--- /dev/null
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2009, 2010, 2012 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* 700MHz ARM11 (raspberry pi) */
+
+#define DIVREM_1_NORM_THRESHOLD 0 /* preinv always */
+#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_1P_METHOD 2
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 7
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD MP_SIZE_T_MAX
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 29
+#define USE_PREINV_DIVREM_1 1 /* preinv always */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD 33
+
+#define MUL_TOOM22_THRESHOLD 36
+#define MUL_TOOM33_THRESHOLD 117
+#define MUL_TOOM44_THRESHOLD 462
+#define MUL_TOOM6H_THRESHOLD 0 /* always */
+#define MUL_TOOM8H_THRESHOLD 620
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 130
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 573
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 209
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 209
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 305
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 50
+#define SQR_TOOM3_THRESHOLD 181
+#define SQR_TOOM4_THRESHOLD 686
+#define SQR_TOOM6_THRESHOLD 0 /* always */
+#define SQR_TOOM8_THRESHOLD 915
+
+#define MULMID_TOOM42_THRESHOLD 72
+
+#define MULMOD_BNM1_THRESHOLD 25
+#define SQRMOD_BNM1_THRESHOLD 30
+
+#define MUL_FFT_MODF_THRESHOLD 476 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 476, 5}, { 21, 6}, { 11, 5}, { 25, 6}, \
+ { 13, 5}, { 27, 6}, { 25, 7}, { 13, 6}, \
+ { 28, 7}, { 15, 6}, { 32, 7}, { 17, 6}, \
+ { 35, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \
+ { 47, 7}, { 29, 8}, { 15, 7}, { 35, 8}, \
+ { 19, 7}, { 43, 8}, { 23, 7}, { 51, 8}, \
+ { 27, 7}, { 55, 8}, { 31, 7}, { 63, 8}, \
+ { 43, 9}, { 23, 8}, { 55, 9}, { 31, 8}, \
+ { 71, 9}, { 39, 8}, { 83, 9}, { 47, 8}, \
+ { 95, 9}, { 55,10}, { 31, 9}, { 79,10}, \
+ { 47, 9}, { 103,11}, { 31,10}, { 63, 9}, \
+ { 135,10}, { 79, 9}, { 159,10}, { 95, 9}, \
+ { 191,10}, { 111,11}, { 63,10}, { 127, 9}, \
+ { 255,10}, { 143, 9}, { 287,10}, { 159,11}, \
+ { 95,10}, { 191, 9}, { 383,12}, { 4096,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 63
+#define MUL_FFT_THRESHOLD 4736
+
+#define SQR_FFT_MODF_THRESHOLD 464 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 464, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \
+ { 29, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \
+ { 36, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \
+ { 47, 7}, { 29, 8}, { 15, 7}, { 35, 8}, \
+ { 19, 7}, { 43, 8}, { 23, 7}, { 49, 8}, \
+ { 27, 7}, { 55, 8}, { 31, 7}, { 63, 8}, \
+ { 35, 7}, { 71, 8}, { 43, 9}, { 23, 8}, \
+ { 55, 9}, { 31, 8}, { 71, 9}, { 39, 8}, \
+ { 83, 9}, { 47, 8}, { 95, 9}, { 55,10}, \
+ { 31, 9}, { 79,10}, { 47, 9}, { 103,11}, \
+ { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \
+ { 159,10}, { 95, 9}, { 191,10}, { 111,11}, \
+ { 63,10}, { 127, 9}, { 255,10}, { 143, 9}, \
+ { 287,10}, { 159,11}, { 95,10}, { 191, 9}, \
+ { 383,12}, { 4096,13}, { 8192,14}, { 16384,15}, \
+ { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 61
+#define SQR_FFT_THRESHOLD 3776
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 67
+#define MULLO_MUL_N_THRESHOLD 8907
+
+#define DC_DIV_QR_THRESHOLD 40
+#define DC_DIVAPPR_Q_THRESHOLD 156
+#define DC_BDIV_QR_THRESHOLD 71
+#define DC_BDIV_Q_THRESHOLD 208
+
+#define INV_MULMOD_BNM1_THRESHOLD 70
+#define INV_NEWTON_THRESHOLD 151
+#define INV_APPR_THRESHOLD 150
+
+#define BINV_NEWTON_THRESHOLD 375
+#define REDC_1_TO_REDC_2_THRESHOLD 5
+#define REDC_2_TO_REDC_N_THRESHOLD 134
+
+#define MU_DIV_QR_THRESHOLD 2130
+#define MU_DIVAPPR_Q_THRESHOLD 2130
+#define MUPI_DIV_QR_THRESHOLD 80
+#define MU_BDIV_QR_THRESHOLD 1787
+#define MU_BDIV_Q_THRESHOLD 2130
+
+#define POWM_SEC_TABLE 7,32,460,1705
+
+#define MATRIX22_STRASSEN_THRESHOLD 19
+#define HGCD_THRESHOLD 85
+#define HGCD_APPR_THRESHOLD 119
+#define HGCD_REDUCE_THRESHOLD 3389
+#define GCD_DC_THRESHOLD 333
+#define GCDEXT_DC_THRESHOLD 309
+#define JACOBI_BASE_METHOD 1
+
+#define GET_STR_DC_THRESHOLD 21
+#define GET_STR_PRECOMPUTE_THRESHOLD 41
+#define SET_STR_DC_THRESHOLD 527
+#define SET_STR_PRECOMPUTE_THRESHOLD 1323
+
+#define FAC_DSC_THRESHOLD 414
+#define FAC_ODD_THRESHOLD 154
--- /dev/null
+dnl ARM mpn_mul_1.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM: -
+C XScale -
+C Cortex-A8 ?
+C Cortex-A9 3.25
+C Cortex-A15 ?
+
+C TODO
+C * Micro-optimise feed-in code.
+C * Optimise for n=1,2 by delaying register saving.
+C * Try using ldm/stm.
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n', `r2')
+define(`v0',`r3')
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+ stmfd sp!, { r4, r5, r6, r7 }
+
+ ands r6, n, #3
+ mov r12, #0
+ beq L(fi0)
+ cmp r6, #2
+ bcc L(fi1)
+ beq L(fi2)
+
+L(fi3): ldr r4, [up], #4
+ mov r6, #0
+ ldr r5, [up], #4
+ b L(lo3)
+
+L(fi0): ldr r5, [up], #4
+ add rp, rp, #4
+ mov r7, #0
+ ldr r4, [up], #4
+ b L(lo0)
+
+L(fi1): ldr r4, [up], #4
+ mov r6, #0
+ add rp, rp, #8
+ subs n, n, #1
+ beq L(1)
+ ldr r5, [up], #4
+ b L(lo1)
+
+L(fi2): ldr r5, [up], #4
+ add rp, rp, #12
+ mov r7, #0
+ ldr r4, [up], #4
+ b L(lo2)
+
+ ALIGN(16)
+L(top): mov r6, #0
+ ldr r5, [up], #4
+ str r7, [rp, #-12]
+L(lo1): umaal r6, r12, r4, v0
+ mov r7, #0
+ ldr r4, [up], #4
+ str r6, [rp, #-8]
+L(lo0): umaal r7, r12, r5, v0
+ mov r6, #0
+ ldr r5, [up], #4
+ str r7, [rp, #-4]
+L(lo3): umaal r6, r12, r4, v0
+ mov r7, #0
+ ldr r4, [up], #4
+ str r6, [rp], #16
+L(lo2): umaal r7, r12, r5, v0
+ subs n, n, #4
+ bhi L(top)
+
+ mov r6, #0
+ str r7, [rp, #-12]
+L(1): umaal r6, r12, r4, v0
+ str r6, [rp, #-8]
+ mov r0, r12
+ ldmfd sp!, { r4, r5, r6, r7 }
+ bx lr
+EPILOGUE()
--- /dev/null
+dnl ARM mpn_mul_2.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM: -
+C XScale -
+C Cortex-A8 ?
+C Cortex-A9 2.25
+C Cortex-A15 ?
+
+C TODO
+C * This is a trivial edit of the addmul_2 code. Check for simplifications,
+C and possible speedups to 2.0 c/l.
+
+define(`rp',`r0')
+define(`up',`r1')
+define(`n', `r2')
+define(`vp',`r3')
+
+define(`v0',`r6')
+define(`v1',`r7')
+define(`u0',`r3')
+define(`u1',`r9')
+
+define(`cya',`r8')
+define(`cyb',`r12')
+
+
+ASM_START()
+PROLOGUE(mpn_mul_2)
+ push { r4, r5, r6, r7, r8, r9 }
+
+ ldm vp, { v0, v1 }
+ mov cya, #0
+ mov cyb, #0
+
+ tst n, #1
+ beq L(evn)
+L(odd): mov r5, #0
+ ldr u0, [up, #0]
+ mov r4, #0
+ tst n, #2
+ beq L(fi1)
+L(fi3): sub up, up, #12
+ sub rp, rp, #16
+ b L(lo3)
+L(fi1): sub n, n, #1
+ sub up, up, #4
+ sub rp, rp, #8
+ b L(lo1)
+L(evn): mov r4, #0
+ ldr u1, [up, #0]
+ mov r5, #0
+ tst n, #2
+ bne L(fi2)
+L(fi0): sub up, up, #8
+ sub rp, rp, #12
+ b L(lo0)
+L(fi2): subs n, n, #2
+ sub rp, rp, #4
+ bls L(end)
+
+ ALIGN(16)
+L(top): ldr u0, [up, #4]
+ umaal r4, cya, u1, v0
+ str r4, [rp, #4]
+ mov r4, #0
+ umaal r5, cyb, u1, v1
+L(lo1): ldr u1, [up, #8]
+ umaal r5, cya, u0, v0
+ str r5, [rp, #8]
+ mov r5, #0
+ umaal r4, cyb, u0, v1
+L(lo0): ldr u0, [up, #12]
+ umaal r4, cya, u1, v0
+ str r4, [rp, #12]
+ mov r4, #0
+ umaal r5, cyb, u1, v1
+L(lo3): ldr u1, [up, #16]!
+ umaal r5, cya, u0, v0
+ str r5, [rp, #16]!
+ mov r5, #0
+ umaal r4, cyb, u0, v1
+ subs n, n, #4
+ bhi L(top)
+
+L(end): umaal r4, cya, u1, v0
+ ldr u0, [up, #4]
+ umaal r5, cyb, u1, v1
+ str r4, [rp, #4]
+ umaal r5, cya, u0, v0
+ umaal cya, cyb, u0, v1
+ str r5, [rp, #8]
+ str cya, [rp, #12]
+ mov r0, cyb
+
+ pop { r4, r5, r6, r7, r8, r9 }
+ bx r14
+EPILOGUE()
--- /dev/null
+dnl ARM v6 mpn_sqr_basecase.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C Code structure:
+C
+C
+C m_2(0m4) m_2(2m4) m_2(1m4) m_2(3m4)
+C | | | |
+C | | | |
+C | | | |
+C \|/ \|/ \|/ \|/
+C ____________ ____________
+C / \ / \
+C \|/ \ \|/ \
+C am_2(3m4) am_2(1m4) am_2(0m4) am_2(2m4)
+C \ /|\ \ /|\
+C \____________/ \____________/
+C \ /
+C \ /
+C \ /
+C tail(0m2) tail(1m2)
+C \ /
+C \ /
+C sqr_diag_addlsh1
+
+C TODO
+C * Further tweak counter and updates in outer loops. (This could save
+C perhaps 5n cycles).
+C * Try to use fewer register. Perhaps coalesce r9 branch target and n_saved.
+C (This could save 2-3 cycles for n > 4.)
+C * Optimise sqr_diag_addlsh1 loop. (This could save O(n) cycles.)
+C * Implement larger final corners (xit/tix). Also stop loops earlier
+C suppressing writes of upper-most rp[] values. (This could save 10-20
+C cycles for n > 4.)
+C * Is the branch really faster than discrete branches?
+
+define(`rp', r0)
+define(`up', r1)
+define(`n', r2)
+
+define(`v0', r3)
+define(`v1', r6)
+define(`i', r8)
+define(`n_saved', r14)
+define(`cya', r11)
+define(`cyb', r12)
+define(`u0', r7)
+define(`u1', r9)
+
+ASM_START()
+PROLOGUE(mpn_sqr_basecase)
+ and r12, n, #3
+ cmp n, #4
+ addgt r12, r12, #4
+ add pc, pc, r12, lsl #2
+ nop
+ b L(4)
+ b L(1)
+ b L(2)
+ b L(3)
+ b L(0m4)
+ b L(1m4)
+ b L(2m4)
+ b L(3m4)
+
+
+L(1m4): push {r4-r10,r11,r14}
+ mov n_saved, n
+ sub i, n, #4
+ sub n, n, #2
+ add r10, pc, #L(am2_2m4)-.-8
+ ldm up, {v0,v1,u0}
+ sub up, up, #4
+ mov cyb, #0
+ mov r5, #0
+ umull r4, cya, v1, v0
+ str r4, [rp], #-12
+ mov r4, #0
+ b L(ko0)
+
+L(3m4): push {r4-r10,r11,r14}
+ mov n_saved, n
+ sub i, n, #4
+ sub n, n, #2
+ add r10, pc, #L(am2_0m4)-.-8
+ ldm up, {v0,v1,u0}
+ add up, up, #4
+ mov cyb, #0
+ mov r5, #0
+ umull r4, cya, v1, v0
+ str r4, [rp], #-4
+ mov r4, #0
+ b L(ko2)
+
+L(2m4): push {r4-r10,r11,r14}
+ mov n_saved, n
+ sub i, n, #4
+ sub n, n, #2
+ add r10, pc, #L(am2_3m4)-.-8
+ ldm up, {v0,v1,u1}
+ mov cyb, #0
+ mov r4, #0
+ umull r5, cya, v1, v0
+ str r5, [rp], #-8
+ mov r5, #0
+ b L(ko1)
+
+L(0m4): push {r4-r10,r11,r14}
+ mov n_saved, n
+ sub i, n, #4
+ sub n, n, #2
+ add r10, pc, #L(am2_1m4)-.-8
+ ldm up, {v0,v1,u1}
+ mov cyb, #0
+ mov r4, #0
+ add up, up, #8
+ umull r5, cya, v1, v0
+ str r5, [rp, #0]
+ mov r5, #0
+
+L(top): ldr u0, [up, #4]
+ umaal r4, cya, u1, v0
+ str r4, [rp, #4]
+ mov r4, #0
+ umaal r5, cyb, u1, v1
+L(ko2): ldr u1, [up, #8]
+ umaal r5, cya, u0, v0
+ str r5, [rp, #8]
+ mov r5, #0
+ umaal r4, cyb, u0, v1
+L(ko1): ldr u0, [up, #12]
+ umaal r4, cya, u1, v0
+ str r4, [rp, #12]
+ mov r4, #0
+ umaal r5, cyb, u1, v1
+L(ko0): ldr u1, [up, #16]!
+ umaal r5, cya, u0, v0
+ str r5, [rp, #16]!
+ mov r5, #0
+ umaal r4, cyb, u0, v1
+ subs i, i, #4
+ bhi L(top)
+ bx r10
+
+L(evnloop):
+ subs i, n, #4
+ sub n, n, #2
+ blt L(tix)
+ ldm up, {v0,v1,u0}
+ add up, up, #4
+ mov cya, #0
+ mov cyb, #0
+ ldm rp, {r4,r5}
+ sub rp, rp, #4
+ umaal r4, cya, v1, v0
+ str r4, [rp, #4]
+ ldr r4, [rp, #12]
+ b L(lo2)
+L(ua2): ldr u0, [up, #4]
+ umaal r4, cya, u1, v0
+ str r4, [rp, #4]
+ ldr r4, [rp, #12]
+ umaal r5, cyb, u1, v1
+L(lo2): ldr u1, [up, #8]
+ umaal r5, cya, u0, v0
+ str r5, [rp, #8]
+ ldr r5, [rp, #16]
+ umaal r4, cyb, u0, v1
+ ldr u0, [up, #12]
+ umaal r4, cya, u1, v0
+ str r4, [rp, #12]
+ ldr r4, [rp, #20]
+ umaal r5, cyb, u1, v1
+ ldr u1, [up, #16]!
+ umaal r5, cya, u0, v0
+ str r5, [rp, #16]!
+ ldr r5, [rp, #8]
+ umaal r4, cyb, u0, v1
+ subs i, i, #4
+ bhi L(ua2)
+L(am2_0m4):
+ umaal r4, cya, u1, v0
+ ldr u0, [up, #4]
+ umaal r5, cyb, u1, v1
+ str r4, [rp, #4]
+ umaal r5, cya, u0, v0
+ umaal cya, cyb, u0, v1
+ str r5, [rp, #8]
+ str cya, [rp, #12]
+ str cyb, [rp, #16]
+ sub up, up, n, lsl #2
+ sub rp, rp, n, lsl #2
+ add up, up, #8
+ sub i, n, #4
+ sub n, n, #2
+ ldm up, {v0,v1,u0}
+ sub up, up, #4
+ mov cya, #0
+ mov cyb, #0
+ ldr r4, [rp, #24]
+ ldr r5, [rp, #28]
+ add rp, rp, #12
+ umaal r4, cya, v1, v0
+ str r4, [rp, #12]
+ ldr r4, [rp, #20]
+ b L(lo0)
+L(ua0): ldr u0, [up, #4]
+ umaal r4, cya, u1, v0
+ str r4, [rp, #4]
+ ldr r4, [rp, #12]
+ umaal r5, cyb, u1, v1
+ ldr u1, [up, #8]
+ umaal r5, cya, u0, v0
+ str r5, [rp, #8]
+ ldr r5, [rp, #16]
+ umaal r4, cyb, u0, v1
+ ldr u0, [up, #12]
+ umaal r4, cya, u1, v0
+ str r4, [rp, #12]
+ ldr r4, [rp, #20]
+ umaal r5, cyb, u1, v1
+L(lo0): ldr u1, [up, #16]!
+ umaal r5, cya, u0, v0
+ str r5, [rp, #16]!
+ ldr r5, [rp, #8]
+ umaal r4, cyb, u0, v1
+ subs i, i, #4
+ bhi L(ua0)
+L(am2_2m4):
+ umaal r4, cya, u1, v0
+ ldr u0, [up, #4]
+ umaal r5, cyb, u1, v1
+ str r4, [rp, #4]
+ umaal r5, cya, u0, v0
+ umaal cya, cyb, u0, v1
+ str r5, [rp, #8]
+ str cya, [rp, #12]
+ str cyb, [rp, #16]
+ sub up, up, n, lsl #2
+ sub rp, rp, n, lsl #2
+ add up, up, #8
+ add rp, rp, #24
+ b L(evnloop)
+
+
+L(oddloop):
+ subs i, n, #4
+ sub n, n, #2
+ blt L(xit)
+ ldm up, {v0,v1,u1}
+ mov cya, #0
+ mov cyb, #0
+ sub rp, rp, #8
+ ldr r5, [rp, #8]
+ ldr r4, [rp, #12]
+ umaal r5, cya, v1, v0
+ str r5, [rp, #8]
+ ldr r5, [rp, #16]
+ b L(lo1)
+L(ua1): ldr u0, [up, #4]
+ umaal r4, cya, u1, v0
+ str r4, [rp, #4]
+ ldr r4, [rp, #12]
+ umaal r5, cyb, u1, v1
+ ldr u1, [up, #8]
+ umaal r5, cya, u0, v0
+ str r5, [rp, #8]
+ ldr r5, [rp, #16]
+ umaal r4, cyb, u0, v1
+L(lo1): ldr u0, [up, #12]
+ umaal r4, cya, u1, v0
+ str r4, [rp, #12]
+ ldr r4, [rp, #20]
+ umaal r5, cyb, u1, v1
+ ldr u1, [up, #16]!
+ umaal r5, cya, u0, v0
+ str r5, [rp, #16]!
+ ldr r5, [rp, #8]
+ umaal r4, cyb, u0, v1
+ subs i, i, #4
+ bhi L(ua1)
+L(am2_3m4):
+ umaal r4, cya, u1, v0
+ ldr u0, [up, #4]
+ umaal r5, cyb, u1, v1
+ str r4, [rp, #4]
+ umaal r5, cya, u0, v0
+ umaal cya, cyb, u0, v1
+ str r5, [rp, #8]
+ str cya, [rp, #12]
+ str cyb, [rp, #16]
+ sub up, up, n, lsl #2
+ sub rp, rp, n, lsl #2
+ add up, up, #8
+ add rp, rp, #24
+ subs i, n, #4
+ sub n, n, #2
+ ldm up, {v0,v1,u1}
+ mov cya, #0
+ mov cyb, #0
+ ldr r5, [rp, #0]
+ ldr r4, [rp, #4]
+ add up, up, #8
+ umaal r5, cya, v1, v0
+ str r5, [rp, #0]
+ ldr r5, [rp, #8]
+ bls L(e3)
+L(ua3): ldr u0, [up, #4]
+ umaal r4, cya, u1, v0
+ str r4, [rp, #4]
+ ldr r4, [rp, #12]
+ umaal r5, cyb, u1, v1
+ ldr u1, [up, #8]
+ umaal r5, cya, u0, v0
+ str r5, [rp, #8]
+ ldr r5, [rp, #16]
+ umaal r4, cyb, u0, v1
+ ldr u0, [up, #12]
+ umaal r4, cya, u1, v0
+ str r4, [rp, #12]
+ ldr r4, [rp, #20]
+ umaal r5, cyb, u1, v1
+ ldr u1, [up, #16]!
+ umaal r5, cya, u0, v0
+ str r5, [rp, #16]!
+ ldr r5, [rp, #8]
+ umaal r4, cyb, u0, v1
+ subs i, i, #4
+ bhi L(ua3)
+L(e3):
+L(am2_1m4):
+ umaal r4, cya, u1, v0
+ ldr u0, [up, #4]
+ umaal r5, cyb, u1, v1
+ str r4, [rp, #4]
+ umaal r5, cya, u0, v0
+ umaal cya, cyb, u0, v1
+ str r5, [rp, #8]
+ str cya, [rp, #12]
+ str cyb, [rp, #16]
+ sub up, up, n, lsl #2
+ sub rp, rp, n, lsl #2
+ add up, up, #8
+ add rp, rp, #24
+ b L(oddloop)
+
+L(xit): ldm up!, {v0,u0}
+ ldr cya, [rp], #12
+ mov cyb, #0
+ umaal cya, cyb, u0, v0
+ b L(sqr_diag_addlsh1)
+
+L(tix): ldm up!, {v0,v1,u0}
+ ldm rp, {r4,r5}
+ mov cya, #0
+ mov cyb, #0
+ umaal r4, cya, v1, v0
+ umaal r5, cya, u0, v0
+ stm rp, {r4,r5}
+ umaal cya, cyb, u0, v1
+ add rp, rp, #20
+C b L(sqr_diag_addlsh1)
+
+
+define(`w0', r6)
+define(`w1', r7)
+define(`w2', r8)
+define(`rbx', r9)
+
+L(sqr_diag_addlsh1):
+ str cya, [rp, #-12]
+ str cyb, [rp, #-8]
+ sub n, n_saved, #1
+ sub up, up, n_saved, lsl #2
+ sub rp, rp, n_saved, lsl #3
+ ldr r3, [up], #4
+ umull w1, r5, r3, r3
+ mov w2, #0
+C cmn r0, #0 C clear cy (already clear by luck)
+ b L(lm)
+
+L(tsd): adds w0, w0, rbx
+ adcs w1, w1, r4
+ str w0, [rp, #0]
+L(lm): ldr w0, [rp, #4]
+ str w1, [rp, #4]
+ ldr w1, [rp, #8]!
+ add rbx, r5, w2
+ adcs w0, w0, w0
+ ldr r3, [up], #4
+ adcs w1, w1, w1
+ mov w2, #0
+ adc w2, w2, w2
+ umull r4, r5, r3, r3
+ subs n, n, #1
+ bne L(tsd)
+
+ adds w0, w0, rbx
+ adcs w1, w1, r4
+ adc w2, r5, w2
+ stm rp, {w0,w1,w2}
+
+ pop {r4-r10,r11,pc}
+
+
+C Straight line code for n <= 4
+
+L(1): ldr r3, [up, #0]
+ umull r1, r2, r3, r3
+ stm rp, {r1,r2}
+ bx r14
+
+L(2): push {r4-r5}
+ ldm up, {r5,r12}
+ umull r1, r2, r5, r5
+ umull r3, r4, r12, r12
+ umull r5, r12, r5, r12
+ adds r5, r5, r5
+ adcs r12, r12, r12
+ adc r4, r4, #0
+ adds r2, r2, r5
+ adcs r3, r3, r12
+ adc r4, r4, #0
+ stm rp, {r1,r2,r3,r4}
+ pop {r4-r5}
+ bx r14
+
+L(3): push {r4-r11}
+ ldm up, {r7,r8,r9}
+ umull r1, r2, r7, r7
+ umull r3, r4, r8, r8
+ umull r5, r6, r9, r9
+ umull r10, r11, r7, r8
+ mov r12, #0
+ umlal r11, r12, r7, r9
+ mov r7, #0
+ umlal r12, r7, r8, r9
+ adds r10, r10, r10
+ adcs r11, r11, r11
+ adcs r12, r12, r12
+ adcs r7, r7, r7
+ adc r6, r6, #0
+ adds r2, r2, r10
+ adcs r3, r3, r11
+ adcs r4, r4, r12
+ adcs r5, r5, r7
+ adc r6, r6, #0
+ stm rp, {r1,r2,r3,r4,r5,r6}
+ pop {r4-r11}
+ bx r14
+
+L(4): push {r4-r11, r14}
+ ldm up, {r9,r10,r11,r12}
+ umull r1, r2, r9, r9
+ umull r3, r4, r10, r10
+ umull r5, r6, r11, r11
+ umull r7, r8, r12, r12
+ stm rp, {r1,r2,r3,r4,r5,r6,r7}
+ umull r1, r2, r9, r10
+ mov r3, #0
+ umlal r2, r3, r9, r11
+ mov r4, #0
+ umlal r3, r4, r9, r12
+ mov r5, #0
+ umlal r3, r5, r10, r11
+ umaal r4, r5, r10, r12
+ mov r6, #0
+ umlal r5, r6, r11, r12
+ adds r1, r1, r1
+ adcs r2, r2, r2
+ adcs r3, r3, r3
+ adcs r4, r4, r4
+ adcs r5, r5, r5
+ adcs r6, r6, r6
+ adc r7, r8, #0
+ add rp, rp, #4
+ ldm rp, {r8,r9,r10,r11,r12,r14}
+ adds r1, r1, r8
+ adcs r2, r2, r9
+ adcs r3, r3, r10
+ adcs r4, r4, r11
+ adcs r5, r5, r12
+ adcs r6, r6, r14
+ adc r7, r7, #0
+ stm rp, {r1,r2,r3,r4,r5,r6,r7}
+ pop {r4-r11, pc}
+EPILOGUE()
--- /dev/null
+dnl ARM v6t2 mpn_divrem_1 and mpn_preinv_divrem_1.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C norm unorm frac
+C StrongARM ?
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 13 14 13
+C Cortex-A15 ?
+
+C TODO
+C * Optimise inner-loops better, they could likely run a cycle or two faster.
+C * Decrease register usage, streamline non-loop code.
+
+define(`qp_arg', `r0')
+define(`fn', `r1')
+define(`up_arg', `r2')
+define(`n_arg', `r3')
+define(`d_arg', `0')
+define(`dinv_arg',`4')
+define(`cnt_arg', `8')
+
+define(`n', `r9')
+define(`qp', `r5')
+define(`up', `r6')
+define(`cnt', `r7')
+define(`tnc', `r10')
+define(`dinv', `r0')
+define(`d', `r4')
+
+ASM_START()
+PROLOGUE(mpn_preinv_divrem_1)
+ stmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+ ldr d, [sp, #9*4+d_arg]
+ ldr cnt, [sp, #9*4+cnt_arg]
+ str r1, [sp, #9*4+d_arg] C reuse d stack slot for fn
+ sub n, r3, #1
+ add r3, r1, n
+ cmp d, #0
+ add qp, qp_arg, r3, lsl #2 C put qp at Q[] end
+ add up, up_arg, n, lsl #2 C put up at U[] end
+ ldr dinv, [sp, #9*4+dinv_arg]
+ blt L(nent)
+ b L(uent)
+EPILOGUE()
+
+PROLOGUE(mpn_divrem_1)
+ stmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+ sub n, r3, #1
+ ldr d, [sp, #9*4+d_arg] C d
+ str r1, [sp, #9*4+d_arg] C reuse d stack slot for fn
+ add r3, r1, n
+ cmp d, #0
+ add qp, qp_arg, r3, lsl #2 C put qp at Q[] end
+ add up, up_arg, n, lsl #2 C put up at U[] end
+ blt L(normalised)
+
+L(unnorm):
+ clz cnt, d
+ mov r0, d, lsl cnt C pass d << cnt
+ bl mpn_invert_limb
+L(uent):
+ mov d, d, lsl cnt C d <<= cnt
+ cmp n, #0
+ mov r1, #0 C r
+ blt L(frac)
+
+ ldr r11, [up, #0]
+
+ rsb tnc, cnt, #32
+ mov r1, r11, lsr tnc
+ mov r11, r11, lsl cnt
+ beq L(uend)
+
+ ldr r3, [up, #-4]!
+ orr r2, r11, r3, lsr tnc
+ b L(mid)
+
+L(utop):
+ mls r1, d, r8, r11
+ mov r11, r3, lsl cnt
+ ldr r3, [up, #-4]!
+ cmp r1, r2
+ addhi r1, r1, d
+ subhi r8, r8, #1
+ orr r2, r11, r3, lsr tnc
+ cmp r1, d
+ bcs L(ufx)
+L(uok): str r8, [qp], #-4
+L(mid): add r8, r1, #1
+ mov r11, r2
+ umlal r2, r8, r1, dinv
+ subs n, n, #1
+ bne L(utop)
+
+ mls r1, d, r8, r11
+ mov r11, r3, lsl cnt
+ cmp r1, r2
+ addhi r1, r1, d
+ subhi r8, r8, #1
+ cmp r1, d
+ rsbcs r1, d, r1
+ addcs r8, r8, #1
+ str r8, [qp], #-4
+
+L(uend):add r8, r1, #1
+ mov r2, r11
+ umlal r2, r8, r1, dinv
+ mls r1, d, r8, r11
+ cmp r1, r2
+ addhi r1, r1, d
+ subhi r8, r8, #1
+ cmp r1, d
+ rsbcs r1, d, r1
+ addcs r8, r8, #1
+ str r8, [qp], #-4
+L(frac):
+ ldr r2, [sp, #9*4+d_arg] C fn
+ cmp r2, #0
+ beq L(fend)
+
+L(ftop):mov r6, #0
+ add r3, r1, #1
+ umlal r6, r3, r1, dinv
+ mov r8, #0
+ mls r1, d, r3, r8
+ cmp r1, r6
+ addhi r1, r1, d
+ subhi r3, r3, #1
+ subs r2, r2, #1
+ str r3, [qp], #-4
+ bne L(ftop)
+
+L(fend):mov r11, r1, lsr cnt
+L(rtn): mov r0, r11
+ ldmfd sp!, {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+
+L(normalised):
+ mov r0, d
+ bl mpn_invert_limb
+L(nent):
+ cmp n, #0
+ mov r11, #0 C r
+ blt L(nend)
+
+ ldr r11, [up, #0]
+ cmp r11, d
+ movlo r2, #0 C hi q limb
+ movhs r2, #1 C hi q limb
+ subhs r11, r11, d
+
+ str r2, [qp], #-4
+ cmp n, #0
+ beq L(nend)
+
+L(ntop):ldr r1, [up, #-4]!
+ add r12, r11, #1
+ umlal r1, r12, r11, dinv
+ ldr r3, [up, #0]
+ mls r11, d, r12, r3
+ cmp r11, r1
+ addhi r11, r11, d
+ subhi r12, r12, #1
+ cmp d, r11
+ bls L(nfx)
+L(nok): str r12, [qp], #-4
+ subs n, n, #1
+ bne L(ntop)
+
+L(nend):mov r1, r11 C r
+ mov cnt, #0 C shift cnt
+ b L(frac)
+
+L(nfx): add r12, r12, #1
+ rsb r11, d, r11
+ b L(nok)
+L(ufx): rsb r1, d, r1
+ add r8, r8, #1
+ b L(uok)
+EPILOGUE()
--- /dev/null
+dnl ARM v6t2 mpn_gcd_1.
+
+dnl Based on the K7 gcd_1.asm, by Kevin Ryde. Rehacked for ARM by Torbjorn
+dnl Granlund.
+
+dnl Copyright 2000, 2001, 2002, 2005, 2009, 2011, 2012 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/bit (approx)
+C StrongARM ?
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 5.30
+C Cortex-A15 ?
+C Numbers measured with: speed -CD -s8-32 -t24 mpn_gcd_1
+
+C TODO
+C * Optimise inner-loop better.
+
+C Threshold of when to call bmod when U is one limb. Should be about
+C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
+define(`BMOD_THRES_LOG2', 7)
+
+C INPUT PARAMETERS
+define(`up', `r0')
+define(`n', `r1')
+define(`v0', `r2')
+
+ifdef(`BMOD_1_TO_MOD_1_THRESHOLD',,
+ `define(`BMOD_1_TO_MOD_1_THRESHOLD',0xffffffff)')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_gcd_1)
+ push {r4, r7, lr}
+ ldr r3, [up] C U low limb
+
+ orr r3, r3, v0
+ rbit r4, r3
+ clz r4, r4 C min(ctz(u0),ctz(v0))
+
+ rbit r12, v0
+ clz r12, r12
+ lsr v0, v0, r12
+
+ mov r7, v0
+
+ cmp n, #1
+ bne L(nby1)
+
+C Both U and V are single limbs, reduce with bmod if u0 >> v0.
+ ldr r3, [up]
+ cmp v0, r3, lsr #BMOD_THRES_LOG2
+ bhi L(red1)
+
+L(bmod):mov r3, #0 C carry argument
+ bl mpn_modexact_1c_odd
+ b L(red0)
+
+L(nby1):cmp n, #BMOD_1_TO_MOD_1_THRESHOLD
+ blo L(bmod)
+
+ bl mpn_mod_1
+
+L(red0):mov r3, r0
+L(red1):cmp r3, #0
+ rbit r12, r3
+ clz r12, r12
+ bne L(mid)
+ b L(end)
+
+ ALIGN(8)
+L(top): movcs r3, r1 C if x-y < 0
+ movcs r7, r0 C use x,y-x
+L(mid): lsr r3, r3, r12 C
+ mov r0, r3 C
+ subs r1, r7, r3 C
+ rsb r3, r7, r3 C
+ rbit r12, r1
+ clz r12, r12 C
+ bne L(top) C
+
+L(end): lsl r0, r7, r4
+ pop {r4, r7, pc}
+EPILOGUE()
--- /dev/null
+dnl ARM mpn_modexact_1c_odd
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C StrongARM ?
+C XScale ?
+C Cortex-A8 ?
+C Cortex-A9 9
+C Cortex-A15 ?
+
+define(`up', `r0')
+define(`n', `r1')
+define(`d', `r2')
+define(`cy', `r3')
+
+ .protected binvert_limb_table
+ASM_START()
+PROLOGUE(mpn_modexact_1c_odd)
+ stmfd sp!, {r4, r5, r6, r7}
+
+ LEA( r4, binvert_limb_table)
+
+ ldr r6, [up], #4 C up[0]
+
+ ubfx r12, d, #1, #7
+ ldrb r4, [r4, r12]
+ smulbb r12, r4, r4
+ mul r12, d, r12
+ rsb r12, r12, r4, asl #1
+ mul r4, r12, r12
+ mul r4, d, r4
+ rsb r4, r4, r12, asl #1 C r4 = inverse
+
+ subs n, n, #1
+ sub r6, r6, cy
+ mul r6, r6, r4
+ beq L(end)
+
+ rsb r5, r4, #0 C r5 = -inverse
+
+L(top): ldr r7, [up], #4
+ mov r12, #0
+ umaal r12, cy, r6, d
+ mul r6, r7, r4
+ mla r6, cy, r5, r6
+ subs n, n, #1
+ bne L(top)
+
+L(end): mov r12, #0
+ umaal r12, cy, r6, d
+ mov r0, cy
+
+ ldmfd sp!, {r4, r5, r6, r7}
+ bx r14
+EPILOGUE()
--- /dev/null
+/* gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2009, 2010, 2012 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* 1700MHz Cortex-A15 */
+
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 9
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD MP_SIZE_T_MAX
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 8
+#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD 15
+
+#define MUL_TOOM22_THRESHOLD 31
+#define MUL_TOOM33_THRESHOLD 109
+#define MUL_TOOM44_THRESHOLD 288
+#define MUL_TOOM6H_THRESHOLD 632
+#define MUL_TOOM8H_THRESHOLD 0 /* always */
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 113
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 199
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 189
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 211
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 287
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 51
+#define SQR_TOOM3_THRESHOLD 169
+#define SQR_TOOM4_THRESHOLD 662
+#define SQR_TOOM6_THRESHOLD 951
+#define SQR_TOOM8_THRESHOLD 1005
+
+#define MULMID_TOOM42_THRESHOLD 44
+
+#define MULMOD_BNM1_THRESHOLD 17
+#define SQRMOD_BNM1_THRESHOLD 30
+
+#define MUL_FFT_MODF_THRESHOLD 525 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 505, 5}, { 27, 6}, { 28, 7}, { 15, 6}, \
+ { 33, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \
+ { 41, 7}, { 21, 8}, { 11, 7}, { 23, 6}, \
+ { 47, 7}, { 27, 8}, { 15, 7}, { 35, 8}, \
+ { 19, 7}, { 41, 8}, { 23, 7}, { 51, 8}, \
+ { 27, 9}, { 15, 8}, { 31, 7}, { 63, 8}, \
+ { 43, 9}, { 23, 8}, { 55, 9}, { 31, 8}, \
+ { 71, 9}, { 39, 8}, { 79, 9}, { 47, 8}, \
+ { 99, 9}, { 55,10}, { 31, 9}, { 79,10}, \
+ { 47, 9}, { 103,11}, { 31,10}, { 63, 9}, \
+ { 135,10}, { 79, 9}, { 159,10}, { 95, 9}, \
+ { 191,10}, { 111,11}, { 63,10}, { 127, 9}, \
+ { 255,10}, { 143, 9}, { 287,10}, { 159,11}, \
+ { 95,10}, { 191, 9}, { 383,12}, { 63,11}, \
+ { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \
+ { 543,10}, { 287,11}, { 159,10}, { 319, 9}, \
+ { 639,10}, { 351,11}, { 191,10}, { 383, 9}, \
+ { 767,10}, { 415,12}, { 127,11}, { 255,10}, \
+ { 543,11}, { 287,10}, { 607,11}, { 319,10}, \
+ { 671,11}, { 351,12}, { 191,11}, { 383,10}, \
+ { 799,11}, { 415,13}, { 127,12}, { 255,11}, \
+ { 543,10}, { 1087,11}, { 607,10}, { 1215,12}, \
+ { 319,11}, { 735,12}, { 383,11}, { 799,10}, \
+ { 1599,11}, { 831,12}, { 447,11}, { 959,13}, \
+ { 255,12}, { 511,11}, { 1087,12}, { 575,11}, \
+ { 1215,12}, { 703,13}, { 383,12}, { 959,14}, \
+ { 255,13}, { 511,12}, { 1215,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 110
+#define MUL_FFT_THRESHOLD 5760
+
+#define SQR_FFT_MODF_THRESHOLD 535 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 535, 5}, { 27, 6}, { 16, 5}, { 33, 6}, \
+ { 29, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \
+ { 35, 7}, { 19, 6}, { 41, 7}, { 21, 6}, \
+ { 43, 8}, { 11, 6}, { 45, 7}, { 23, 6}, \
+ { 47, 7}, { 25, 6}, { 51, 7}, { 27, 6}, \
+ { 55, 7}, { 29, 8}, { 15, 7}, { 35, 8}, \
+ { 19, 7}, { 43, 8}, { 23, 7}, { 51, 8}, \
+ { 27, 9}, { 15, 8}, { 31, 7}, { 63, 8}, \
+ { 35, 7}, { 71, 8}, { 43, 9}, { 23, 8}, \
+ { 55, 9}, { 31, 8}, { 71, 9}, { 39, 8}, \
+ { 83, 9}, { 47, 8}, { 95, 9}, { 55,10}, \
+ { 31, 9}, { 79,10}, { 47, 9}, { 103,11}, \
+ { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \
+ { 159,10}, { 95, 9}, { 191,10}, { 111,11}, \
+ { 63,10}, { 159,11}, { 95,10}, { 191,12}, \
+ { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \
+ { 271, 9}, { 543,11}, { 159,10}, { 319, 9}, \
+ { 639,10}, { 335,11}, { 191,10}, { 383, 9}, \
+ { 767,10}, { 415,12}, { 127,11}, { 255,10}, \
+ { 511,11}, { 287,10}, { 607,11}, { 319,10}, \
+ { 639,12}, { 191,11}, { 383,10}, { 767,11}, \
+ { 415,13}, { 127,12}, { 255,11}, { 543,10}, \
+ { 1087,11}, { 607,10}, { 1215,12}, { 319,11}, \
+ { 735,12}, { 383,11}, { 831,12}, { 447,11}, \
+ { 959,13}, { 255,12}, { 511,11}, { 1087,12}, \
+ { 575,11}, { 1215,12}, { 703,13}, { 383,12}, \
+ { 959,14}, { 255,13}, { 511,12}, { 1215,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 111
+#define SQR_FFT_THRESHOLD 4928
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 27
+#define MULLO_MUL_N_THRESHOLD 8907
+
+#define DC_DIV_QR_THRESHOLD 31
+#define DC_DIVAPPR_Q_THRESHOLD 45
+#define DC_BDIV_QR_THRESHOLD 29
+#define DC_BDIV_Q_THRESHOLD 50
+
+#define INV_MULMOD_BNM1_THRESHOLD 66
+#define INV_NEWTON_THRESHOLD 171
+#define INV_APPR_THRESHOLD 65
+
+#define BINV_NEWTON_THRESHOLD 300
+#define REDC_1_TO_REDC_2_THRESHOLD 12
+#define REDC_2_TO_REDC_N_THRESHOLD 99
+
+#define MU_DIV_QR_THRESHOLD 1895
+#define MU_DIVAPPR_Q_THRESHOLD 1895
+#define MUPI_DIV_QR_THRESHOLD 54
+#define MU_BDIV_QR_THRESHOLD 1470
+#define MU_BDIV_Q_THRESHOLD 1895
+
+#define POWM_SEC_TABLE 6,44,548,1604
+
+#define MATRIX22_STRASSEN_THRESHOLD 22
+#define HGCD_THRESHOLD 40
+#define HGCD_APPR_THRESHOLD 50
+#define HGCD_REDUCE_THRESHOLD 3389
+#define GCD_DC_THRESHOLD 278
+#define GCDEXT_DC_THRESHOLD 180
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 18
+#define GET_STR_PRECOMPUTE_THRESHOLD 34
+#define SET_STR_DC_THRESHOLD 198
+#define SET_STR_PRECOMPUTE_THRESHOLD 541
+
+#define FAC_DSC_THRESHOLD 303
+#define FAC_ODD_THRESHOLD 28
dnl
dnl m4 macros for gmp assembly code, shared by all CPUs.
-dnl Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software
-dnl Foundation, Inc.
+dnl Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2011 Free
+dnl Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
dnl Called: deflit_emptyargcheck(macroname,$#,`$1')
define(deflit_emptyargcheck,
`ifelse(eval($2==1 && !m4_dollarhash_1_if_noparen_p && m4_length(`$3')==0),1,
-`m4_error(`dont use a deflit as $1() because it loses the brackets (see deflit in asm-incl.m4 for more information)
+`m4_error(`dont use a deflit as $1() because it loses the brackets (see deflit in asm-defs.m4 for more information)
')')')
m4_not_for_expansion(`OPERATION_add_n')
m4_not_for_expansion(`OPERATION_sub_n')
+dnl aors_err1_n
+m4_not_for_expansion(`OPERATION_add_err1_n')
+m4_not_for_expansion(`OPERATION_sub_err1_n')
+
+dnl aors_err2_n
+m4_not_for_expansion(`OPERATION_add_err2_n')
+m4_not_for_expansion(`OPERATION_sub_err2_n')
+
+dnl aors_err3_n
+m4_not_for_expansion(`OPERATION_add_err3_n')
+m4_not_for_expansion(`OPERATION_sub_err3_n')
+
dnl aorsmul_1
m4_not_for_expansion(`OPERATION_addmul_1')
m4_not_for_expansion(`OPERATION_submul_1')
define(define_mpn,
m4_assert_numargs(1)
-`define(`mpn_$1',`MPN(`$1')')')
+`deflit(`mpn_$1',`MPN(`$1')')')
define_mpn(add)
define_mpn(add_1)
+define_mpn(add_err1_n)
+define_mpn(add_err2_n)
+define_mpn(add_err3_n)
define_mpn(add_n)
define_mpn(add_nc)
+define_mpn(addcnd_n)
define_mpn(addlsh1_n)
+define_mpn(addlsh1_nc)
define_mpn(addlsh2_n)
+define_mpn(addlsh2_nc)
define_mpn(addlsh_n)
+define_mpn(addlsh_nc)
+define_mpn(addlsh1_n_ip1)
+define_mpn(addlsh1_nc_ip1)
+define_mpn(addlsh2_n_ip1)
+define_mpn(addlsh2_nc_ip1)
+define_mpn(addlsh_n_ip1)
+define_mpn(addlsh_nc_ip1)
+define_mpn(addlsh1_n_ip2)
+define_mpn(addlsh1_nc_ip2)
+define_mpn(addlsh2_n_ip2)
+define_mpn(addlsh2_nc_ip2)
+define_mpn(addlsh_n_ip2)
+define_mpn(addlsh_nc_ip2)
define_mpn(addmul_1)
define_mpn(addmul_1c)
define_mpn(addmul_2)
define_mpn(addmul_3)
define_mpn(addmul_4)
+define_mpn(addmul_5)
+define_mpn(addmul_6)
+define_mpn(addmul_7)
+define_mpn(addmul_8)
+define_mpn(addmul_2s)
define_mpn(add_n_sub_n)
define_mpn(add_n_sub_nc)
define_mpn(addaddmul_1msb0)
define_mpn(copyi)
define_mpn(count_leading_zeros)
define_mpn(count_trailing_zeros)
+define_mpn(div_qr_2)
+define_mpn(div_qr_2n_pi1)
+define_mpn(div_qr_2u_pi1)
+define_mpn(div_qr_2n_pi2)
+define_mpn(div_qr_2u_pi2)
define_mpn(divexact_1)
define_mpn(divexact_by3c)
define_mpn(divrem)
define_mpn(get_str)
define_mpn(hamdist)
define_mpn(invert_limb)
+define_mpn(invert_limb_table)
define_mpn(ior_n)
define_mpn(iorn_n)
define_mpn(lshift)
define_mpn(mul_2)
define_mpn(mul_3)
define_mpn(mul_4)
+define_mpn(mul_5)
+define_mpn(mul_6)
define_mpn(mul_basecase)
define_mpn(mul_n)
define_mpn(mullo_basecase)
+define_mpn(mulmid_basecase)
define_mpn(perfect_square_p)
define_mpn(popcount)
define_mpn(preinv_divrem_1)
define_mpn(redc_1)
define_mpn(redc_2)
define_mpn(rsblsh1_n)
+define_mpn(rsblsh1_nc)
define_mpn(rsblsh2_n)
+define_mpn(rsblsh2_nc)
define_mpn(rsblsh_n)
+define_mpn(rsblsh_nc)
define_mpn(rsh1add_n)
define_mpn(rsh1add_nc)
define_mpn(rsh1sub_n)
define_mpn(set_str)
define_mpn(sqr_basecase)
define_mpn(sqr_diagonal)
+define_mpn(sqr_diag_addlsh1)
define_mpn(sub_n)
+define_mpn(subcnd_n)
define_mpn(sublsh1_n)
+define_mpn(sublsh1_nc)
+define_mpn(sublsh1_n_ip1)
+define_mpn(sublsh1_nc_ip1)
define_mpn(sublsh2_n)
+define_mpn(sublsh2_nc)
+define_mpn(sublsh2_n_ip1)
+define_mpn(sublsh2_nc_ip1)
+define_mpn(sublsh_n)
+define_mpn(sublsh_nc)
+define_mpn(sublsh_n_ip1)
+define_mpn(sublsh_nc_ip1)
define_mpn(sqrtrem)
define_mpn(sub)
define_mpn(sub_1)
+define_mpn(sub_err1_n)
+define_mpn(sub_err2_n)
+define_mpn(sub_err3_n)
define_mpn(sub_n)
define_mpn(sub_nc)
define_mpn(submul_1)
define_mpn(submul_1c)
+define_mpn(tabselect)
define_mpn(umul_ppmm)
define_mpn(umul_ppmm_r)
define_mpn(udiv_qrnnd)
)
+dnl Usage: ABI_SUPPORT(abi)
+dnl
+dnl A dummy macro which is grepped for by ./configure to know what ABIs
+dnl are supported in an asm file.
+dnl
+dnl If multiple non-standard ABIs are supported, several ABI_SUPPORT
+dnl declarations should be used:
+dnl
+dnl ABI_SUPPORT(FOOABI)
+dnl ABI_SUPPORT(BARABI)
+
+define(ABI_SUPPORT,
+m4_assert_numargs(1)
+)
+
+
dnl Usage: GMP_NUMB_MASK
dnl
dnl A bit mask for the number part of a limb. Eg. with 6 bit nails in a
`m4_hex_lowmask(GMP_NUMB_BITS)')
+dnl Usage: m4append(`variable',`value-to-append')
+
+define(`m4append',
+`define(`$1', defn(`$1')`$2')
+'
+)
+
divert`'dnl
--- /dev/null
+/* mpn_add_err1_n -- add_n with one error term
+
+ Contributed by David Harvey.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/*
+ Computes:
+
+ (1) {rp,n} := {up,n} + {vp,n} (just like mpn_add_n) with incoming carry cy,
+ return value is carry out.
+
+ (2) Let c[i+1] = carry from i-th limb addition (c[0] = cy).
+ Computes c[1]*yp[n-1] + ... + c[n]*yp[0], stores two-limb result at ep.
+
+ Requires n >= 1.
+
+ None of the outputs may overlap each other or any of the inputs, except
+ that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_add_err1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+ mp_ptr ep, mp_srcptr yp,
+ mp_size_t n, mp_limb_t cy)
+{
+ mp_limb_t el, eh, ul, vl, yl, zl, rl, sl, cy1, cy2;
+
+ ASSERT (n >= 1);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+ ASSERT (! MPN_OVERLAP_P (rp, n, yp, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 2, up, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 2, vp, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 2, yp, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 2, rp, n));
+
+ yp += n - 1;
+ el = eh = 0;
+
+ do
+ {
+ yl = *yp--;
+ ul = *up++;
+ vl = *vp++;
+
+ /* ordinary add_n */
+ ADDC_LIMB (cy1, sl, ul, vl);
+ ADDC_LIMB (cy2, rl, sl, cy);
+ cy = cy1 | cy2;
+ *rp++ = rl;
+
+ /* update (eh:el) */
+ zl = (-cy) & yl;
+ el += zl;
+ eh += el < zl;
+ }
+ while (--n);
+
+#if GMP_NAIL_BITS != 0
+ eh = (eh << GMP_NAIL_BITS) + (el >> GMP_NUMB_BITS);
+ el &= GMP_NUMB_MASK;
+#endif
+
+ ep[0] = el;
+ ep[1] = eh;
+
+ return cy;
+}
--- /dev/null
+/* mpn_add_err2_n -- add_n with two error terms
+
+ Contributed by David Harvey.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/*
+ Computes:
+
+ (1) {rp,n} := {up,n} + {vp,n} (just like mpn_add_n) with incoming carry cy,
+ return value is carry out.
+
+ (2) Let c[i+1] = carry from i-th limb addition (c[0] = cy).
+ Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
+ c[1]*yp2[n-1] + ... + c[n]*yp2[0],
+ stores two-limb results at {ep,2} and {ep+2,2} respectively.
+
+ Requires n >= 1.
+
+ None of the outputs may overlap each other or any of the inputs, except
+ that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_add_err2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+ mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2,
+ mp_size_t n, mp_limb_t cy)
+{
+ mp_limb_t el1, eh1, el2, eh2, ul, vl, yl1, yl2, zl1, zl2, rl, sl, cy1, cy2;
+
+ ASSERT (n >= 1);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+ ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
+ ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 4, up, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 4, vp, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 4, yp1, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 4, yp2, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 4, rp, n));
+
+ yp1 += n - 1;
+ yp2 += n - 1;
+ el1 = eh1 = 0;
+ el2 = eh2 = 0;
+
+ do
+ {
+ yl1 = *yp1--;
+ yl2 = *yp2--;
+ ul = *up++;
+ vl = *vp++;
+
+ /* ordinary add_n */
+ ADDC_LIMB (cy1, sl, ul, vl);
+ ADDC_LIMB (cy2, rl, sl, cy);
+ cy = cy1 | cy2;
+ *rp++ = rl;
+
+ /* update (eh1:el1) */
+ zl1 = (-cy) & yl1;
+ el1 += zl1;
+ eh1 += el1 < zl1;
+
+ /* update (eh2:el2) */
+ zl2 = (-cy) & yl2;
+ el2 += zl2;
+ eh2 += el2 < zl2;
+ }
+ while (--n);
+
+#if GMP_NAIL_BITS != 0
+ eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
+ el1 &= GMP_NUMB_MASK;
+ eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
+ el2 &= GMP_NUMB_MASK;
+#endif
+
+ ep[0] = el1;
+ ep[1] = eh1;
+ ep[2] = el2;
+ ep[3] = eh2;
+
+ return cy;
+}
--- /dev/null
+/* mpn_add_err3_n -- add_n with three error terms
+
+ Contributed by David Harvey.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/*
+ Computes:
+
+ (1) {rp,n} := {up,n} + {vp,n} (just like mpn_add_n) with incoming carry cy,
+ return value is carry out.
+
+ (2) Let c[i+1] = carry from i-th limb addition (c[0] = cy).
+ Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
+ c[1]*yp2[n-1] + ... + c[n]*yp2[0],
+ c[1]*yp3[n-1] + ... + c[n]*yp3[0],
+ stores two-limb results at {ep,2}, {ep+2,2} and {ep+4,2} respectively.
+
+ Requires n >= 1.
+
+ None of the outputs may overlap each other or any of the inputs, except
+ that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_add_err3_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+ mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2, mp_srcptr yp3,
+ mp_size_t n, mp_limb_t cy)
+{
+ mp_limb_t el1, eh1, el2, eh2, el3, eh3, ul, vl, yl1, yl2, yl3, zl1, zl2, zl3, rl, sl, cy1, cy2;
+
+ ASSERT (n >= 1);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+ ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
+ ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
+ ASSERT (! MPN_OVERLAP_P (rp, n, yp3, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 6, up, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 6, vp, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 6, yp1, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 6, yp2, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 6, yp3, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 6, rp, n));
+
+ yp1 += n - 1;
+ yp2 += n - 1;
+ yp3 += n - 1;
+ el1 = eh1 = 0;
+ el2 = eh2 = 0;
+ el3 = eh3 = 0;
+
+ do
+ {
+ yl1 = *yp1--;
+ yl2 = *yp2--;
+ yl3 = *yp3--;
+ ul = *up++;
+ vl = *vp++;
+
+ /* ordinary add_n */
+ ADDC_LIMB (cy1, sl, ul, vl);
+ ADDC_LIMB (cy2, rl, sl, cy);
+ cy = cy1 | cy2;
+ *rp++ = rl;
+
+ /* update (eh1:el1) */
+ zl1 = (-cy) & yl1;
+ el1 += zl1;
+ eh1 += el1 < zl1;
+
+ /* update (eh2:el2) */
+ zl2 = (-cy) & yl2;
+ el2 += zl2;
+ eh2 += el2 < zl2;
+
+ /* update (eh3:el3) */
+ zl3 = (-cy) & yl3;
+ el3 += zl3;
+ eh3 += el3 < zl3;
+ }
+ while (--n);
+
+#if GMP_NAIL_BITS != 0
+ eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
+ el1 &= GMP_NUMB_MASK;
+ eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
+ el2 &= GMP_NUMB_MASK;
+ eh3 = (eh3 << GMP_NAIL_BITS) + (el3 >> GMP_NUMB_BITS);
+ el3 &= GMP_NUMB_MASK;
+#endif
+
+ ep[0] = el1;
+ ep[1] = eh1;
+ ep[2] = el2;
+ ep[3] = eh2;
+ ep[4] = el3;
+ ep[5] = eh3;
+
+ return cy;
+}
--- /dev/null
+/* mpn_add_n_sub_n -- Add and Subtract two limb vectors of equal, non-zero length.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1999, 2000, 2001, 2006 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#ifndef L1_CACHE_SIZE
+#define L1_CACHE_SIZE 8192 /* only 68040 has less than this */
+#endif
+
+#define PART_SIZE (L1_CACHE_SIZE / BYTES_PER_MP_LIMB / 6)
+
+
+/* mpn_add_n_sub_n.
+ r1[] = s1[] + s2[]
+ r2[] = s1[] - s2[]
+ All operands have n limbs.
+ In-place operations allowed. */
+mp_limb_t
+mpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p, mp_srcptr s1p, mp_srcptr s2p, mp_size_t n)
+{
+ mp_limb_t acyn, acyo; /* carry for add */
+ mp_limb_t scyn, scyo; /* carry for subtract */
+ mp_size_t off; /* offset in operands */
+ mp_size_t this_n; /* size of current chunk */
+
+ /* We alternatingly add and subtract in chunks that fit into the (L1)
+ cache. Since the chunks are several hundred limbs, the function call
+ overhead is insignificant, but we get much better locality. */
+
+ /* We have three variant of the inner loop, the proper loop is chosen
+ depending on whether r1 or r2 are the same operand as s1 or s2. */
+
+ if (r1p != s1p && r1p != s2p)
+ {
+ /* r1 is not identical to either input operand. We can therefore write
+ to r1 directly, without using temporary storage. */
+ acyo = 0;
+ scyo = 0;
+ for (off = 0; off < n; off += PART_SIZE)
+ {
+ this_n = MIN (n - off, PART_SIZE);
+#if HAVE_NATIVE_mpn_add_nc
+ acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
+#else
+ acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
+ acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
+#endif
+#if HAVE_NATIVE_mpn_sub_nc
+ scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
+#else
+ scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
+ scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
+#endif
+ }
+ }
+ else if (r2p != s1p && r2p != s2p)
+ {
+ /* r2 is not identical to either input operand. We can therefore write
+ to r2 directly, without using temporary storage. */
+ acyo = 0;
+ scyo = 0;
+ for (off = 0; off < n; off += PART_SIZE)
+ {
+ this_n = MIN (n - off, PART_SIZE);
+#if HAVE_NATIVE_mpn_sub_nc
+ scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
+#else
+ scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
+ scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
+#endif
+#if HAVE_NATIVE_mpn_add_nc
+ acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
+#else
+ acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
+ acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
+#endif
+ }
+ }
+ else
+ {
+ /* r1 and r2 are identical to s1 and s2 (r1==s1 and r2==s2 or vice versa)
+ Need temporary storage. */
+ mp_limb_t tp[PART_SIZE];
+ acyo = 0;
+ scyo = 0;
+ for (off = 0; off < n; off += PART_SIZE)
+ {
+ this_n = MIN (n - off, PART_SIZE);
+#if HAVE_NATIVE_mpn_add_nc
+ acyo = mpn_add_nc (tp, s1p + off, s2p + off, this_n, acyo);
+#else
+ acyn = mpn_add_n (tp, s1p + off, s2p + off, this_n);
+ acyo = acyn + mpn_add_1 (tp, tp, this_n, acyo);
+#endif
+#if HAVE_NATIVE_mpn_sub_nc
+ scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
+#else
+ scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
+ scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
+#endif
+ MPN_COPY (r1p + off, tp, this_n);
+ }
+ }
+
+ return 2 * acyo + scyo;
+}
+
+#ifdef MAIN
+#include <stdlib.h>
+#include <stdio.h>
+#include "timing.h"
+
+long cputime ();
+
+int
+main (int argc, char **argv)
+{
+ mp_ptr r1p, r2p, s1p, s2p;
+ double t;
+ mp_size_t n;
+
+ n = strtol (argv[1], 0, 0);
+
+ r1p = malloc (n * BYTES_PER_MP_LIMB);
+ r2p = malloc (n * BYTES_PER_MP_LIMB);
+ s1p = malloc (n * BYTES_PER_MP_LIMB);
+ s2p = malloc (n * BYTES_PER_MP_LIMB);
+ TIME (t,(mpn_add_n(r1p,s1p,s2p,n),mpn_sub_n(r1p,s1p,s2p,n)));
+ printf (" separate add and sub: %.3f\n", t);
+ TIME (t,mpn_add_n_sub_n(r1p,r2p,s1p,s2p,n));
+ printf ("combined addsub separate variables: %.3f\n", t);
+ TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,s2p,n));
+ printf (" combined addsub r1 overlap: %.3f\n", t);
+ TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,s2p,n));
+ printf (" combined addsub r2 overlap: %.3f\n", t);
+ TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,r2p,n));
+ printf (" combined addsub in-place: %.3f\n", t);
+
+ return 0;
+}
+#endif
--- /dev/null
+/* mpn_addcnd_n -- Compute R = U + V if CND != 0 or R = U if CND == 0.
+ Both cases should take the same time and perform the exact same memory
+ accesses, since this function is intended to be used where side-channel
+ attack resilience is relevant.
+
+ THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+Copyright 1992, 1993, 1994, 1996, 2000, 2002, 2008, 2009, 2011 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+mp_limb_t
+mpn_addcnd_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t cnd)
+{
+ mp_limb_t ul, vl, sl, rl, cy, cy1, cy2, mask;
+
+ ASSERT (n >= 1);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+
+ mask = -(mp_limb_t) (cnd != 0);
+ cy = 0;
+ do
+ {
+ ul = *up++;
+ vl = *vp++ & mask;
+#if GMP_NAIL_BITS == 0
+ sl = ul + vl;
+ cy1 = sl < ul;
+ rl = sl + cy;
+ cy2 = rl < sl;
+ cy = cy1 | cy2;
+ *rp++ = rl;
+#else
+ rl = ul + vl;
+ rl += cy;
+ cy = rl >> GMP_NUMB_BITS;
+ *rp++ = rl & GMP_NUMB_MASK;
+#endif
+ }
+ while (--n != 0);
+
+ return cy;
+}
+++ /dev/null
-/* mpn_add_n_sub_n -- Add and Subtract two limb vectors of equal, non-zero length.
-
- THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
- SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
- GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 1999, 2000, 2001, 2006 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-#ifndef L1_CACHE_SIZE
-#define L1_CACHE_SIZE 8192 /* only 68040 has less than this */
-#endif
-
-#define PART_SIZE (L1_CACHE_SIZE / BYTES_PER_MP_LIMB / 6)
-
-
-/* mpn_add_n_sub_n.
- r1[] = s1[] + s2[]
- r2[] = s1[] - s2[]
- All operands have n limbs.
- In-place operations allowed. */
-mp_limb_t
-mpn_add_n_sub_n (mp_ptr r1p, mp_ptr r2p, mp_srcptr s1p, mp_srcptr s2p, mp_size_t n)
-{
- mp_limb_t acyn, acyo; /* carry for add */
- mp_limb_t scyn, scyo; /* carry for subtract */
- mp_size_t off; /* offset in operands */
- mp_size_t this_n; /* size of current chunk */
-
- /* We alternatingly add and subtract in chunks that fit into the (L1)
- cache. Since the chunks are several hundred limbs, the function call
- overhead is insignificant, but we get much better locality. */
-
- /* We have three variant of the inner loop, the proper loop is chosen
- depending on whether r1 or r2 are the same operand as s1 or s2. */
-
- if (r1p != s1p && r1p != s2p)
- {
- /* r1 is not identical to either input operand. We can therefore write
- to r1 directly, without using temporary storage. */
- acyo = 0;
- scyo = 0;
- for (off = 0; off < n; off += PART_SIZE)
- {
- this_n = MIN (n - off, PART_SIZE);
-#if HAVE_NATIVE_mpn_add_nc
- acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
-#else
- acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
- acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
-#endif
-#if HAVE_NATIVE_mpn_sub_nc
- scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
-#else
- scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
- scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
-#endif
- }
- }
- else if (r2p != s1p && r2p != s2p)
- {
- /* r2 is not identical to either input operand. We can therefore write
- to r2 directly, without using temporary storage. */
- acyo = 0;
- scyo = 0;
- for (off = 0; off < n; off += PART_SIZE)
- {
- this_n = MIN (n - off, PART_SIZE);
-#if HAVE_NATIVE_mpn_sub_nc
- scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
-#else
- scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
- scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
-#endif
-#if HAVE_NATIVE_mpn_add_nc
- acyo = mpn_add_nc (r1p + off, s1p + off, s2p + off, this_n, acyo);
-#else
- acyn = mpn_add_n (r1p + off, s1p + off, s2p + off, this_n);
- acyo = acyn + mpn_add_1 (r1p + off, r1p + off, this_n, acyo);
-#endif
- }
- }
- else
- {
- /* r1 and r2 are identical to s1 and s2 (r1==s1 and r2==s2 or vice versa)
- Need temporary storage. */
- mp_limb_t tp[PART_SIZE];
- acyo = 0;
- scyo = 0;
- for (off = 0; off < n; off += PART_SIZE)
- {
- this_n = MIN (n - off, PART_SIZE);
-#if HAVE_NATIVE_mpn_add_nc
- acyo = mpn_add_nc (tp, s1p + off, s2p + off, this_n, acyo);
-#else
- acyn = mpn_add_n (tp, s1p + off, s2p + off, this_n);
- acyo = acyn + mpn_add_1 (tp, tp, this_n, acyo);
-#endif
-#if HAVE_NATIVE_mpn_sub_nc
- scyo = mpn_sub_nc (r2p + off, s1p + off, s2p + off, this_n, scyo);
-#else
- scyn = mpn_sub_n (r2p + off, s1p + off, s2p + off, this_n);
- scyo = scyn + mpn_sub_1 (r2p + off, r2p + off, this_n, scyo);
-#endif
- MPN_COPY (r1p + off, tp, this_n);
- }
- }
-
- return 2 * acyo + scyo;
-}
-
-#ifdef MAIN
-#include <stdlib.h>
-#include <stdio.h>
-#include "timing.h"
-
-long cputime ();
-
-int
-main (int argc, char **argv)
-{
- mp_ptr r1p, r2p, s1p, s2p;
- double t;
- mp_size_t n;
-
- n = strtol (argv[1], 0, 0);
-
- r1p = malloc (n * BYTES_PER_MP_LIMB);
- r2p = malloc (n * BYTES_PER_MP_LIMB);
- s1p = malloc (n * BYTES_PER_MP_LIMB);
- s2p = malloc (n * BYTES_PER_MP_LIMB);
- TIME (t,(mpn_add_n(r1p,s1p,s2p,n),mpn_sub_n(r1p,s1p,s2p,n)));
- printf (" separate add and sub: %.3f\n", t);
- TIME (t,mpn_add_n_sub_n(r1p,r2p,s1p,s2p,n));
- printf ("combined addsub separate variables: %.3f\n", t);
- TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,s2p,n));
- printf (" combined addsub r1 overlap: %.3f\n", t);
- TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,s2p,n));
- printf (" combined addsub r2 overlap: %.3f\n", t);
- TIME (t,mpn_add_n_sub_n(r1p,r2p,r1p,r2p,n));
- printf (" combined addsub in-place: %.3f\n", t);
-
- return 0;
-}
-#endif
SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-Copyright 2006, 2007, 2009 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2009, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
/* Computes Q = N / D mod B^n,
- R = N - QD. */
+ R = N - QD. */
mp_limb_t
mpn_bdiv_qr (mp_ptr qp, mp_ptr rp,
mp_limb_t di;
mp_limb_t rh;
+ ASSERT (nn > dn);
if (BELOW_THRESHOLD (dn, DC_BDIV_QR_THRESHOLD) ||
BELOW_THRESHOLD (nn - dn, DC_BDIV_QR_THRESHOLD))
{
SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-Copyright (C) 2004, 2005, 2006, 2007, 2009 Free Software Foundation, Inc.
+Copyright (C) 2004, 2005, 2006, 2007, 2009, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
r[k+1] = r[k] + r[k] - r[k]*(u*r[k])
*/
-/* This is intended for constant THRESHOLDs only, where the compiler can
- completely fold the result. */
-#define LOG2C(n) \
- (((n) >= 0x1) + ((n) >= 0x2) + ((n) >= 0x4) + ((n) >= 0x8) + \
- ((n) >= 0x10) + ((n) >= 0x20) + ((n) >= 0x40) + ((n) >= 0x80) + \
- ((n) >= 0x100) + ((n) >= 0x200) + ((n) >= 0x400) + ((n) >= 0x800) + \
- ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000))
-
#if TUNE_PROGRAM_BUILD
#define NPOWS \
((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)))
--- /dev/null
+/* mpn_broot -- Compute hensel sqrt
+
+ Contributed to the GNU project by Niels Möller
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Computes a^e (mod B). Uses right-to-left binary algorithm, since
+ typical use will have e small. */
+static mp_limb_t
+powlimb (mp_limb_t a, mp_limb_t e)
+{
+ mp_limb_t r = 1;
+ mp_limb_t s = a;
+
+ for (r = 1, s = a; e > 0; e >>= 1, s *= s)
+ if (e & 1)
+ r *= s;
+
+ return r;
+}
+
+/* Computes a^{1/k - 1} (mod B^n). Both a and k must be odd.
+
+ Iterates
+
+ r' <-- r - r * (a^{k-1} r^k - 1) / n
+
+ If
+
+ a^{k-1} r^k = 1 (mod 2^m),
+
+ then
+
+ a^{k-1} r'^k = 1 (mod 2^{2m}),
+
+ Compute the update term as
+
+ r' = r - (a^{k-1} r^{k+1} - r) / k
+
+ where we still have cancelation of low limbs.
+
+ */
+void
+mpn_broot_invm1 (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
+{
+ mp_size_t sizes[GMP_LIMB_BITS * 2];
+ mp_ptr akm1, tp, rnp, ep, scratch;
+ mp_limb_t a0, r0, km1, kp1h, kinv;
+ mp_size_t rn;
+ unsigned i;
+
+ TMP_DECL;
+
+ ASSERT (n > 0);
+ ASSERT (ap[0] & 1);
+ ASSERT (k & 1);
+ ASSERT (k >= 3);
+
+ TMP_MARK;
+
+ akm1 = TMP_ALLOC_LIMBS (4*n);
+ tp = akm1 + n;
+
+ km1 = k-1;
+ /* FIXME: Could arrange the iteration so we don't need to compute
+ this up front, computing a^{k-1} * r^k as (a r)^{k-1} * r. Note
+ that we can use wraparound also for a*r, since the low half is
+ unchanged from the previous iteration. Or possibly mulmid. Also,
+ a r = a^{1/k}, so we get that value too, for free? */
+ mpn_powlo (akm1, ap, &km1, 1, n, tp); /* 3 n scratch space */
+
+ a0 = ap[0];
+ binvert_limb (kinv, k);
+
+ /* 4 bits: a^{1/k - 1} (mod 16):
+
+ a % 8
+ 1 3 5 7
+ k%4 +-------
+ 1 |1 1 1 1
+ 3 |1 9 9 1
+ */
+ r0 = 1 + (((k << 2) & ((a0 << 1) ^ (a0 << 2))) & 8);
+ r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7f)); /* 8 bits */
+ r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k & 0x7fff)); /* 16 bits */
+ r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k)); /* 32 bits */
+#if GMP_NUMB_BITS > 32
+ {
+ unsigned prec = 32;
+ do
+ {
+ r0 = kinv * r0 * (k+1 - akm1[0] * powlimb (r0, k));
+ prec *= 2;
+ }
+ while (prec < GMP_NUMB_BITS);
+ }
+#endif
+
+ rp[0] = r0;
+ if (n == 1)
+ {
+ TMP_FREE;
+ return;
+ }
+
+ /* For odd k, (k+1)/2 = k/2+1, and the latter avoids overflow. */
+ kp1h = k/2 + 1;
+
+ /* FIXME: Special case for two limb iteration. */
+ rnp = TMP_ALLOC_LIMBS (2*n + 1);
+ ep = rnp + n;
+
+ /* FIXME: Possible to this on the fly with some bit fiddling. */
+ for (i = 0; n > 1; n = (n + 1)/2)
+ sizes[i++] = n;
+
+ rn = 1;
+
+ while (i-- > 0)
+ {
+ /* Compute x^{k+1}. */
+ mpn_sqr (ep, rp, rn); /* For odd n, writes n+1 limbs in the
+ final iteration.*/
+ mpn_powlo (rnp, ep, &kp1h, 1, sizes[i], tp);
+
+ /* Multiply by a^{k-1}. Can use wraparound; low part equals
+ r. */
+
+ mpn_mullo_n (ep, rnp, akm1, sizes[i]);
+ ASSERT (mpn_cmp (ep, rp, rn) == 0);
+
+ ASSERT (sizes[i] <= 2*rn);
+ mpn_pi1_bdiv_q_1 (rp + rn, ep + rn, sizes[i] - rn, k, kinv, 0);
+ mpn_neg (rp + rn, rp + rn, sizes[i] - rn);
+ rn = sizes[i];
+ }
+ TMP_FREE;
+}
+
+/* Computes a^{1/k} (mod B^n). Both a and k must be odd. */
+void
+mpn_broot (mp_ptr rp, mp_srcptr ap, mp_size_t n, mp_limb_t k)
+{
+ mp_ptr tp;
+ TMP_DECL;
+
+ ASSERT (n > 0);
+ ASSERT (ap[0] & 1);
+ ASSERT (k & 1);
+
+ if (k == 1)
+ {
+ MPN_COPY (rp, ap, n);
+ return;
+ }
+
+ TMP_MARK;
+ tp = TMP_ALLOC_LIMBS (n);
+
+ mpn_broot_invm1 (tp, ap, n, k);
+ mpn_mullo_n (rp, tp, ap, n);
+
+ TMP_FREE;
+}
--- /dev/null
+/* mpn_brootinv, compute r such that r^k * y = 1 (mod 2^b).
+
+ Contributed to the GNU project by Martin Boij (as part of perfpow.c).
+
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Computes a^e (mod B). Uses right-to-left binary algorithm, since
+ typical use will have e small. */
+static mp_limb_t
+powlimb (mp_limb_t a, mp_limb_t e)
+{
+ mp_limb_t r = 1;
+ mp_limb_t s = a;
+
+ for (r = 1, s = a; e > 0; e >>= 1, s *= s)
+ if (e & 1)
+ r *= s;
+
+ return r;
+}
+
+/* Compute r such that r^k * y = 1 (mod B^n).
+
+ Iterates
+ r' <-- k^{-1} ((k+1) r - r^{k+1} y) (mod 2^b)
+ using Hensel lifting, each time doubling the number of known bits in r.
+
+ Works just for odd k. Else the Hensel lifting degenerates.
+
+ FIXME:
+
+ (1) Make it work for k == GMP_LIMB_MAX (k+1 below overflows).
+
+ (2) Rewrite iteration as
+ r' <-- r - k^{-1} r (r^k y - 1)
+ and take advantage of the zero low part of r^k y - 1.
+
+ (3) Use wrap-around trick.
+
+ (4) Use a small table to get starting value.
+
+ Scratch need: 5*bn, where bn = ceil (bnb / GMP_NUMB_BITS).
+*/
+
+void
+mpn_brootinv (mp_ptr rp, mp_srcptr yp, mp_size_t bn, mp_limb_t k, mp_ptr tp)
+{
+ mp_ptr tp2, tp3;
+ mp_limb_t kinv, k2, r0, y0;
+ mp_size_t order[GMP_LIMB_BITS + 1];
+ int i, d;
+
+ ASSERT (bn > 0);
+ ASSERT ((k & 1) != 0);
+
+ tp2 = tp + bn;
+ tp3 = tp + 2 * bn;
+ k2 = k + 1;
+
+ binvert_limb (kinv, k);
+
+ /* 4-bit initial approximation:
+
+ y%16 | 1 3 5 7 9 11 13 15,
+ k%4 +-----------------------------
+ 1 | 1 11 13 7 9 3 5 15
+ 3 | 1 3 5 7 9 11 13 15
+
+ */
+ y0 = yp[0];
+
+ r0 = y0 ^ (((y0 << 1) ^ (y0 << 2)) & ~(k << 2) & 8); /* 4 bits */
+ r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2 & 0x7f)); /* 8 bits */
+ r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2 & 0xffff)); /* 16 bits */
+ r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2)); /* 32 bits */
+#if GMP_NUMB_BITS > 32
+ {
+ unsigned prec = 32;
+ do
+ {
+ r0 = kinv * (k2 * r0 - y0 * powlimb(r0, k2));
+ prec *= 2;
+ }
+ while (prec < GMP_NUMB_BITS);
+ }
+#endif
+
+ rp[0] = r0;
+ if (bn == 1)
+ return;
+
+ /* This initialization doesn't matter for the result (any garbage is
+ cancelled in the iteration), but proper initialization makes
+ valgrind happier. */
+ MPN_ZERO (rp+1, bn-1);
+
+ d = 0;
+ for (; bn > 1; bn = (bn + 1) >> 1)
+ order[d++] = bn;
+
+ for (i = d - 1; i >= 0; i--)
+ {
+ bn = order[i];
+
+ mpn_mul_1 (tp, rp, bn, k2);
+
+ mpn_powlo (tp2, rp, &k2, 1, bn, tp3);
+ mpn_mullo_n (rp, yp, tp2, bn);
+
+ mpn_sub_n (tp2, tp, rp, bn);
+ mpn_pi1_bdiv_q_1 (rp, tp2, bn, k, kinv, 0);
+ }
+}
--- /dev/null
+/* mpn_bsqrt, a^{1/2} (mod 2^n).
+
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+void
+mpn_bsqrt (mp_ptr rp, mp_srcptr ap, mp_bitcnt_t nb, mp_ptr tp)
+{
+ mp_ptr sp;
+ mp_size_t n;
+
+ ASSERT (nb > 0);
+
+ n = nb / GMP_NUMB_BITS;
+ sp = tp + n;
+
+ mpn_bsqrtinv (sp, ap, nb, tp);
+ mpn_mullo_n (rp, sp, ap, n);
+}
--- /dev/null
+/* mpn_bsqrtinv, compute r such that r^2 * y = 1 (mod 2^{b+1}).
+
+ Contributed to the GNU project by Martin Boij (as part of perfpow.c).
+
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Compute r such that r^2 * y = 1 (mod 2^{b+1}).
+ Return non-zero if such an integer r exists.
+
+ Iterates
+ r' <-- (3r - r^3 y) / 2
+ using Hensel lifting. Since we divide by two, the Hensel lifting is
+ somewhat degenerates. Therefore, we lift from 2^b to 2^{b+1}-1.
+
+ FIXME:
+ (1) Simplify to do precision book-keeping in limbs rather than bits.
+
+ (2) Rewrite iteration as
+ r' <-- r - r (r^2 y - 1) / 2
+ and take advantage of zero low part of r^2 y - 1.
+
+ (3) Use wrap-around trick.
+
+ (4) Use a small table to get starting value.
+*/
+int
+mpn_bsqrtinv (mp_ptr rp, mp_srcptr yp, mp_bitcnt_t bnb, mp_ptr tp)
+{
+ mp_ptr tp2, tp3;
+ mp_limb_t k;
+ mp_size_t bn, order[GMP_LIMB_BITS + 1];
+ int i, d;
+
+ ASSERT (bnb > 0);
+
+ bn = 1 + bnb / GMP_LIMB_BITS;
+
+ tp2 = tp + bn;
+ tp3 = tp + 2 * bn;
+ k = 3;
+
+ rp[0] = 1;
+ if (bnb == 1)
+ {
+ if ((yp[0] & 3) != 1)
+ return 0;
+ }
+ else
+ {
+ if ((yp[0] & 7) != 1)
+ return 0;
+
+ d = 0;
+ for (; bnb != 2; bnb = (bnb + 2) >> 1)
+ order[d++] = bnb;
+
+ for (i = d - 1; i >= 0; i--)
+ {
+ bnb = order[i];
+ bn = 1 + bnb / GMP_LIMB_BITS;
+
+ mpn_mul_1 (tp, rp, bn, k);
+
+ mpn_powlo (tp2, rp, &k, 1, bn, tp3);
+ mpn_mullo_n (rp, yp, tp2, bn);
+
+#if HAVE_NATIVE_mpn_rsh1sub_n
+ mpn_rsh1sub_n (rp, tp, rp, bn);
+#else
+ mpn_sub_n (tp2, tp, rp, bn);
+ mpn_rshift (rp, tp2, bn, 1);
+#endif
+ }
+ }
+ return 1;
+}
--- /dev/null
+/* Const tables shared among combinatoric functions.
+
+ THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
+ BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* Entry i contains (i!/2^t) where t is chosen such that the parenthesis
+ is an odd integer. */
+const mp_limb_t __gmp_oddfac_table[] = { ONE_LIMB_ODD_FACTORIAL_TABLE, ONE_LIMB_ODD_FACTORIAL_EXTTABLE };
+
+/* Entry i contains ((2i+1)!!/2^t) where t is chosen such that the parenthesis
+ is an odd integer. */
+const mp_limb_t __gmp_odd2fac_table[] = { ONE_LIMB_ODD_DOUBLEFACTORIAL_TABLE };
+
+/* Entry i contains 2i-popc(2i). */
+const unsigned char __gmp_fac2cnt_table[] = { TABLE_2N_MINUS_POPC_2N };
+
+const mp_limb_t __gmp_limbroots_table[] = { NTH_ROOT_NUMB_MASK_TABLE };
--- /dev/null
+/* mpn_div_qr_2 -- Divide natural numbers, producing both remainder and
+ quotient. The divisor is two limbs.
+
+ Contributed to the GNU project by Torbjorn Granlund and Niels Möller
+
+ THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES. IT IS
+ ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS
+ ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP
+ RELEASE.
+
+
+Copyright 1993, 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2011 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef DIV_QR_2_PI2_THRESHOLD
+/* Disabled unless explicitly tuned. */
+#define DIV_QR_2_PI2_THRESHOLD MP_LIMB_T_MAX
+#endif
+
+#ifndef SANITY_CHECK
+#define SANITY_CHECK 0
+#endif
+
+/* Define some longlong.h-style macros, but for wider operations.
+ * add_sssaaaa is like longlong.h's add_ssaaaa but the propagating
+ carry-out into an additional sum opeand.
+ * add_csaac accepts two addends and a carry in, and generates a sum
+ and a carry out. A little like a "full adder".
+*/
+#if defined (__GNUC__) && ! defined (__INTEL_COMPILER)
+
+#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ __asm__ ("add\t%7, %k2\n\tadc\t%5, %k1\n\tadc\t$0, %k0" \
+ : "=r" (s2), "=&r" (s1), "=&r" (s0) \
+ : "0" ((USItype)(s2)), \
+ "1" ((USItype)(a1)), "g" ((USItype)(b1)), \
+ "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
+#define add_csaac(co, s, a, b, ci) \
+ __asm__ ("bt\t$0, %2\n\tadc\t%5, %k1\n\tadc\t%k0, %k0" \
+ : "=r" (co), "=r" (s) \
+ : "rm" ((USItype)(ci)), "0" (CNST_LIMB(0)), \
+ "%1" ((USItype)(a)), "g" ((USItype)(b)))
+#endif
+
+#if defined (__amd64__) && W_TYPE_SIZE == 64
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ __asm__ ("add\t%7, %q2\n\tadc\t%5, %q1\n\tadc\t$0, %q0" \
+ : "=r" (s2), "=&r" (s1), "=&r" (s0) \
+ : "0" ((UDItype)(s2)), \
+ "1" ((UDItype)(a1)), "rme" ((UDItype)(b1)), \
+ "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
+#define add_csaac(co, s, a, b, ci) \
+ __asm__ ("bt\t$0, %2\n\tadc\t%5, %q1\n\tadc\t%q0, %q0" \
+ : "=r" (co), "=r" (s) \
+ : "rm" ((UDItype)(ci)), "0" (CNST_LIMB(0)), \
+ "%1" ((UDItype)(a)), "g" ((UDItype)(b)))
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
+/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
+ processor running in 32-bit mode, since the carry flag then gets the 32-bit
+ carry. */
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ __asm__ ("add%I7c\t%2,%6,%7\n\tadde\t%1,%4,%5\n\taddze\t%0,%0" \
+ : "=r" (s2), "=&r" (s1), "=&r" (s0) \
+ : "r" (s2), "r" (a1), "r" (b1), "%r" (a0), "rI" (b0))
+#endif
+
+#endif /* __GNUC__ */
+
+#ifndef add_sssaaaa
+#define add_sssaaaa(s2, s1, s0, a1, a0, b1, b0) \
+ do { \
+ UWtype __s0, __s1, __c0, __c1; \
+ __s0 = (a0) + (b0); \
+ __s1 = (a1) + (b1); \
+ __c0 = __s0 < (a0); \
+ __c1 = __s1 < (a1); \
+ (s0) = __s0; \
+ __s1 = __s1 + __c0; \
+ (s1) = __s1; \
+ (s2) += __c1 + (__s1 < __c0); \
+ } while (0)
+#endif
+
+#ifndef add_csaac
+#define add_csaac(co, s, a, b, ci) \
+ do { \
+ UWtype __s, __c; \
+ __s = (a) + (b); \
+ __c = __s < (a); \
+ __s = __s + (ci); \
+ (s) = __s; \
+ (co) = __c + (__s < (ci)); \
+ } while (0)
+#endif
+
+/* Typically used with r1, r0 same as n3, n2. Other types of overlap
+ between inputs and outputs not supported. */
+#define udiv_qr_4by2(q1,q0, r1,r0, n3,n2,n1,n0, d1,d0, di1,di0) \
+ do { \
+ mp_limb_t _q3, _q2a, _q2, _q1, _q2c, _q1c, _q1d, _q0; \
+ mp_limb_t _t1, _t0; \
+ mp_limb_t _c, _mask; \
+ \
+ umul_ppmm (_q3,_q2a, n3, di1); \
+ umul_ppmm (_q2,_q1, n2, di1); \
+ umul_ppmm (_q2c,_q1c, n3, di0); \
+ add_sssaaaa (_q3,_q2,_q1, _q2,_q1, _q2c,_q1c); \
+ umul_ppmm (_q1d,_q0, n2, di0); \
+ add_sssaaaa (_q3,_q2,_q1, _q2,_q1, _q2a,_q1d); \
+ \
+ add_ssaaaa (r1, r0, n3, n2, 0, 1); /* FIXME: combine as in x86_64 asm */ \
+ \
+ /* [q3,q2,q1,q0] += [n3,n3,n1,n0] */ \
+ add_csaac (_c, _q0, _q0, n0, 0); \
+ add_csaac (_c, _q1, _q1, n1, _c); \
+ add_csaac (_c, _q2, _q2, r0, _c); \
+ _q3 = _q3 + r1 + _c; \
+ \
+ umul_ppmm (_t1,_t0, _q2, d0); \
+ _t1 += _q2 * d1 + _q3 * d0; \
+ \
+ sub_ddmmss (r1, r0, n1, n0, _t1, _t0); \
+ \
+ _mask = -(mp_limb_t) (r1 >= _q1 & (r1 > _q1 | r0 >= _q0)); /* (r1,r0) >= (q1,q0) */ \
+ add_ssaaaa (r1, r0, r1, r0, d1 & _mask, d0 & _mask); \
+ sub_ddmmss (_q3, _q2, _q3, _q2, 0, -_mask); \
+ \
+ if (UNLIKELY (r1 >= d1)) \
+ { \
+ if (r1 > d1 || r0 >= d0) \
+ { \
+ sub_ddmmss (r1, r0, r1, r0, d1, d0); \
+ add_ssaaaa (_q3, _q2, _q3, _q2, 0, 1); \
+ } \
+ } \
+ (q1) = _q3; \
+ (q0) = _q2; \
+ } while (0)
+
+static void
+invert_4by2 (mp_ptr di, mp_limb_t d1, mp_limb_t d0)
+{
+ mp_limb_t v1, v0, p1, t1, t0, p0, mask;
+ invert_limb (v1, d1);
+ p1 = d1 * v1;
+ /* <1, v1> * d1 = <B-1, p1> */
+ p1 += d0;
+ if (p1 < d0)
+ {
+ v1--;
+ mask = -(mp_limb_t) (p1 >= d1);
+ p1 -= d1;
+ v1 += mask;
+ p1 -= mask & d1;
+ }
+ /* <1, v1> * d1 + d0 = <B-1, p1> */
+ umul_ppmm (t1, p0, d0, v1);
+ p1 += t1;
+ if (p1 < t1)
+ {
+ if (UNLIKELY (p1 >= d1))
+ {
+ if (p1 > d1 || p0 >= d0)
+ {
+ sub_ddmmss (p1, p0, p1, p0, d1, d0);
+ v1--;
+ }
+ }
+ sub_ddmmss (p1, p0, p1, p0, d1, d0);
+ v1--;
+ }
+ /* Now v1 is the 3/2 inverse, <1, v1> * <d1, d0> = <B-1, p1, p0>,
+ * with <p1, p0> + <d1, d0> >= B^2.
+ *
+ * The 4/2 inverse is (B^4 - 1) / <d1, d0> = <1, v1, v0>. The
+ * partial remainder after <1, v1> is
+ *
+ * B^4 - 1 - B <1, v1> <d1, d0> = <B-1, B-1, B-1, B-1> - <B-1, p1, p0, 0>
+ * = <~p1, ~p0, B-1>
+ */
+ udiv_qr_3by2 (v0, t1, t0, ~p1, ~p0, MP_LIMB_T_MAX, d1, d0, v1);
+ di[0] = v0;
+ di[1] = v1;
+
+#if SANITY_CHECK
+ {
+ mp_limb_t tp[4];
+ mp_limb_t dp[2];
+ dp[0] = d0;
+ dp[1] = d1;
+ mpn_mul_n (tp, dp, di, 2);
+ ASSERT_ALWAYS (mpn_add_n (tp+2, tp+2, dp, 2) == 0);
+ ASSERT_ALWAYS (tp[2] == MP_LIMB_T_MAX);
+ ASSERT_ALWAYS (tp[3] == MP_LIMB_T_MAX);
+ ASSERT_ALWAYS (mpn_add_n (tp, tp, dp, 2) == 1);
+ }
+#endif
+}
+
+static mp_limb_t
+mpn_div_qr_2n_pi2 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+ mp_limb_t d1, mp_limb_t d0, mp_limb_t di1, mp_limb_t di0)
+{
+ mp_limb_t qh;
+ mp_size_t i;
+ mp_limb_t r1, r0;
+
+ ASSERT (nn >= 2);
+ ASSERT (d1 & GMP_NUMB_HIGHBIT);
+
+ r1 = np[nn-1];
+ r0 = np[nn-2];
+
+ qh = 0;
+ if (r1 >= d1 && (r1 > d1 || r0 >= d0))
+ {
+#if GMP_NAIL_BITS == 0
+ sub_ddmmss (r1, r0, r1, r0, d1, d0);
+#else
+ r0 = r0 - d0;
+ r1 = r1 - d1 - (r0 >> GMP_LIMB_BITS - 1);
+ r0 &= GMP_NUMB_MASK;
+#endif
+ qh = 1;
+ }
+
+ for (i = nn - 2; i >= 2; i -= 2)
+ {
+ mp_limb_t n1, n0, q1, q0;
+ n1 = np[i-1];
+ n0 = np[i-2];
+ udiv_qr_4by2 (q1, q0, r1, r0, r1, r0, n1, n0, d1, d0, di1, di0);
+ qp[i-1] = q1;
+ qp[i-2] = q0;
+ }
+
+ if (i > 0)
+ {
+ mp_limb_t q;
+ udiv_qr_3by2 (q, r1, r0, r1, r0, np[0], d1, d0, di1);
+ qp[0] = q;
+ }
+ rp[1] = r1;
+ rp[0] = r0;
+
+ return qh;
+}
+
+
+/* Divide num {np,nn} by den {dp,2} and write the nn-2 least
+ significant quotient limbs at qp and the 2 long remainder at np.
+ Return the most significant limb of the quotient.
+
+ Preconditions:
+ 1. qp must either not overlap with the input operands at all, or
+ qp >= np + 2 must hold true. (This means that it's possible to put
+ the quotient in the high part of {np,nn}, right above the remainder.
+ 2. nn >= 2. */
+
+mp_limb_t
+mpn_div_qr_2 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+ mp_srcptr dp)
+{
+ mp_limb_t d1;
+ mp_limb_t d0;
+ gmp_pi1_t dinv;
+
+ ASSERT (nn >= 2);
+ ASSERT (! MPN_OVERLAP_P (qp, nn-2, np, nn) || qp >= np + 2);
+ ASSERT_MPN (np, nn);
+ ASSERT_MPN (dp, 2);
+
+ d1 = dp[1]; d0 = dp[0];
+
+ ASSERT (d1 > 0);
+
+ if (UNLIKELY (d1 & GMP_NUMB_HIGHBIT))
+ {
+ if (BELOW_THRESHOLD (nn, DIV_QR_2_PI2_THRESHOLD))
+ {
+ gmp_pi1_t dinv;
+ invert_pi1 (dinv, d1, d0);
+ return mpn_div_qr_2n_pi1 (qp, rp, np, nn, d1, d0, dinv.inv32);
+ }
+ else
+ {
+ mp_limb_t di[2];
+ invert_4by2 (di, d1, d0);
+ return mpn_div_qr_2n_pi2 (qp, rp, np, nn, d1, d0, di[1], di[0]);
+ }
+ }
+ else
+ {
+ int shift;
+ count_leading_zeros (shift, d1);
+ d1 = (d1 << shift) | (d0 >> (GMP_LIMB_BITS - shift));
+ d0 <<= shift;
+ invert_pi1 (dinv, d1, d0);
+ return mpn_div_qr_2u_pi1 (qp, rp, np, nn, d1, d0, shift, dinv.inv32);
+ }
+}
--- /dev/null
+/* mpn_div_qr_2n_pi1
+
+ Contributed to the GNU project by Torbjorn Granlund and Niels Möller
+
+ THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES. IT IS
+ ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS
+ ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP
+ RELEASE.
+
+
+Copyright 1993, 1994, 1995, 1996, 1999, 2000, 2001, 2002, 2011 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* 3/2 loop, for normalized divisor */
+mp_limb_t
+mpn_div_qr_2n_pi1 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+ mp_limb_t d1, mp_limb_t d0, mp_limb_t di)
+{
+ mp_limb_t qh;
+ mp_size_t i;
+ mp_limb_t r1, r0;
+
+ ASSERT (nn >= 2);
+ ASSERT (d1 & GMP_NUMB_HIGHBIT);
+
+ np += nn - 2;
+ r1 = np[1];
+ r0 = np[0];
+
+ qh = 0;
+ if (r1 >= d1 && (r1 > d1 || r0 >= d0))
+ {
+#if GMP_NAIL_BITS == 0
+ sub_ddmmss (r1, r0, r1, r0, d1, d0);
+#else
+ r0 = r0 - d0;
+ r1 = r1 - d1 - (r0 >> GMP_LIMB_BITS - 1);
+ r0 &= GMP_NUMB_MASK;
+#endif
+ qh = 1;
+ }
+
+ for (i = nn - 2 - 1; i >= 0; i--)
+ {
+ mp_limb_t n0, q;
+ n0 = np[-1];
+ udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di);
+ np--;
+ qp[i] = q;
+ }
+
+ rp[1] = r1;
+ rp[0] = r0;
+
+ return qh;
+}
--- /dev/null
+/* mpn_div_qr_2u_pi1
+
+ Contributed to the GNU project by Niels Möller
+
+ THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES. IT IS
+ ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS
+ ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP
+ RELEASE.
+
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* 3/2 loop, for unnormalized divisor. Caller must pass shifted d1 and
+ d0, while {np,nn} is shifted on the fly. */
+mp_limb_t
+mpn_div_qr_2u_pi1 (mp_ptr qp, mp_ptr rp, mp_srcptr np, mp_size_t nn,
+ mp_limb_t d1, mp_limb_t d0, int shift, mp_limb_t di)
+{
+ mp_limb_t qh;
+ mp_limb_t r2, r1, r0;
+ mp_size_t i;
+
+ ASSERT (nn >= 2);
+ ASSERT (d1 & GMP_NUMB_HIGHBIT);
+ ASSERT (shift > 0);
+
+ r2 = np[nn-1] >> (GMP_LIMB_BITS - shift);
+ r1 = (np[nn-1] << shift) | (np[nn-2] >> (GMP_LIMB_BITS - shift));
+ r0 = np[nn-2] << shift;
+
+ udiv_qr_3by2 (qh, r2, r1, r2, r1, r0, d1, d0, di);
+
+ for (i = nn - 2 - 1; i >= 0; i--)
+ {
+ mp_limb_t q;
+ r0 = np[i];
+ r1 |= r0 >> (GMP_LIMB_BITS - shift);
+ r0 <<= shift;
+ udiv_qr_3by2 (q, r2, r1, r2, r1, r0, d1, d0, di);
+ qp[i] = q;
+ }
+
+ rp[0] = (r1 >> shift) | (r2 << (GMP_LIMB_BITS - shift));
+ rp[1] = r2 >> shift;
+
+ return qh;
+}
{
unsigned shift;
mp_size_t qn;
- mp_ptr tp, wp;
+ mp_ptr tp;
TMP_DECL;
ASSERT (dn > 0);
if (shift > 0)
{
- mp_size_t ss = (dn > qn) ? qn + 1 : dn;
+ mp_ptr wp;
+ mp_size_t ss;
+ ss = (dn > qn) ? qn + 1 : dn;
tp = TMP_ALLOC_LIMBS (ss);
mpn_rshift (tp, dp, ss, shift);
to shift one limb beyond qn. */
wp = TMP_ALLOC_LIMBS (qn + 1);
mpn_rshift (wp, np, qn + 1, shift);
- }
- else
- {
- wp = TMP_ALLOC_LIMBS (qn);
- MPN_COPY (wp, np, qn);
+ np = wp;
}
if (dn > qn)
dn = qn;
tp = TMP_ALLOC_LIMBS (mpn_bdiv_q_itch (qn, dn));
- mpn_bdiv_q (qp, wp, qn, dp, dn, tp);
+ mpn_bdiv_q (qp, np, qn, dp, dn, tp);
TMP_FREE;
}
#include "longlong.h"
-/* Determine whether {ap,an} is divisible by {dp,dn}. Must have both
+/* Determine whether A={ap,an} is divisible by D={dp,dn}. Must have both
operands normalized, meaning high limbs non-zero, except that an==0 is
allowed.
- There usually won't be many low zero bits on d, but the checks for this
+ There usually won't be many low zero bits on D, but the checks for this
are fast and might pick up a few operand combinations, in particular they
- might reduce d to fit the single-limb mod_1/modexact_1 code.
+ might reduce D to fit the single-limb mod_1/modexact_1 code.
Future:
there's no addback, but it would need a multi-precision inverse and so
might be slower than the plain method (on small sizes at least).
- When d must be normalized (shifted to high bit set), it's possible to
- just append a low zero limb to "a" rather than bit-shifting as
- mpn_tdiv_qr does internally, so long as it's already been checked that a
- has at least as many trailing zeros bits as d. Or equivalently, pass
- qxn==1 to mpn_tdiv_qr, if/when it accepts that. */
+ When D must be normalized (shifted to low bit set), it's possible to supress
+ the bit-shifting of A down, as long as it's already been checked that A has
+ at least as many trailing zero bits as D. */
int
mpn_divisible_p (mp_srcptr ap, mp_size_t an,
TMP_MARK;
rp = TMP_ALLOC_LIMBS (an + 1);
- qp = TMP_ALLOC_LIMBS (an - dn + 1); /* FIXME: Could we avoid this */
+ qp = TMP_ALLOC_LIMBS (an - dn + 1); /* FIXME: Could we avoid this? */
count_trailing_zeros (twos, dp[0]);
/* mpn_divrem -- Divide natural numbers, producing both remainder and
- quotient. This is now just a middle layer for calling the new
- internal mpn_tdiv_qr.
+ quotient. This is now just a middle layer calling mpn_tdiv_qr.
Copyright 1993, 1994, 1995, 1996, 1997, 1999, 2000, 2001, 2002, 2005 Free
Software Foundation, Inc.
else
{
/* Most significant bit of divisor == 0. */
- int norm;
+ int cnt;
/* Skip a division if high < divisor (high quotient 0). Testing here
before normalizing will still skip as often as possible. */
&& BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
goto plain;
- count_leading_zeros (norm, d);
- d <<= norm;
- r <<= norm;
+ count_leading_zeros (cnt, d);
+ d <<= cnt;
+ r <<= cnt;
if (UDIV_NEEDS_NORMALIZATION
&& BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
{
+ mp_limb_t nshift;
if (un != 0)
{
n1 = up[un - 1] << GMP_NAIL_BITS;
- r |= (n1 >> (GMP_LIMB_BITS - norm));
+ r |= (n1 >> (GMP_LIMB_BITS - cnt));
for (i = un - 2; i >= 0; i--)
{
n0 = up[i] << GMP_NAIL_BITS;
- udiv_qrnnd (*qp, r, r,
- (n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)),
- d);
+ nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+ udiv_qrnnd (*qp, r, r, nshift, d);
r >>= GMP_NAIL_BITS;
qp--;
n1 = n0;
}
- udiv_qrnnd (*qp, r, r, n1 << norm, d);
+ udiv_qrnnd (*qp, r, r, n1 << cnt, d);
r >>= GMP_NAIL_BITS;
qp--;
}
r >>= GMP_NAIL_BITS;
qp--;
}
- return r >> norm;
+ return r >> cnt;
}
else
{
- mp_limb_t dinv;
+ mp_limb_t dinv, nshift;
invert_limb (dinv, d);
if (un != 0)
{
n1 = up[un - 1] << GMP_NAIL_BITS;
- r |= (n1 >> (GMP_LIMB_BITS - norm));
+ r |= (n1 >> (GMP_LIMB_BITS - cnt));
for (i = un - 2; i >= 0; i--)
{
n0 = up[i] << GMP_NAIL_BITS;
- udiv_qrnnd_preinv (*qp, r, r,
- ((n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm))),
- d, dinv);
+ nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+ udiv_qrnnd_preinv (*qp, r, r, nshift, d, dinv);
r >>= GMP_NAIL_BITS;
qp--;
n1 = n0;
}
- udiv_qrnnd_preinv (*qp, r, r, n1 << norm, d, dinv);
+ udiv_qrnnd_preinv (*qp, r, r, n1 << cnt, d, dinv);
r >>= GMP_NAIL_BITS;
qp--;
}
r >>= GMP_NAIL_BITS;
qp--;
}
- return r >> norm;
+ return r >> cnt;
}
}
}
#include "longlong.h"
-/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
- meaning the quotient size where that should happen, the quotient size
- being how many udiv divisions will be done.
-
- The default is to use preinv always, CPUs where this doesn't suit have
- tuned thresholds. Note in particular that preinv should certainly be
- used if that's the only division available (USE_PREINV_ALWAYS). */
-
-#ifndef DIVREM_2_THRESHOLD
-#define DIVREM_2_THRESHOLD 0
-#endif
-
-
-/* Divide num (NP/NSIZE) by den (DP/2) and write
- the NSIZE-2 least significant quotient limbs at QP
- and the 2 long remainder at NP. If QEXTRA_LIMBS is
- non-zero, generate that many fraction bits and append them after the
- other quotient limbs.
- Return the most significant limb of the quotient, this is always 0 or 1.
+/* Divide num {np,nn} by den {dp,2} and write the nn-2 least significant
+ quotient limbs at qp and the 2 long remainder at np. If qxn is non-zero,
+ generate that many fraction bits and append them after the other quotient
+ limbs. Return the most significant limb of the quotient, this is always 0
+ or 1.
Preconditions:
- 0. NSIZE >= 2.
1. The most significant bit of the divisor must be set.
- 2. QP must either not overlap with the input operands at all, or
- QP + 2 >= NP must hold true. (This means that it's
- possible to put the quotient in the high part of NUM, right after the
- remainder in NUM.
- 3. NSIZE >= 2, even if QEXTRA_LIMBS is non-zero. */
+ 2. qp must either not overlap with the input operands at all, or
+ qp >= np + 2 must hold true. (This means that it's possible to put
+ the quotient in the high part of {np,nn}, right above the remainder.
+ 3. nn >= 2, even if qxn is non-zero. */
mp_limb_t
mpn_divrem_2 (mp_ptr qp, mp_size_t qxn,
mp_ptr np, mp_size_t nn,
mp_srcptr dp)
{
- mp_limb_t most_significant_q_limb = 0;
+ mp_limb_t most_significant_q_limb;
mp_size_t i;
- mp_limb_t n1, n0, n2;
- mp_limb_t d1, d0;
- mp_limb_t d1inv;
- int use_preinv;
+ mp_limb_t r1, r0, d1, d0;
+ gmp_pi1_t di;
ASSERT (nn >= 2);
ASSERT (qxn >= 0);
ASSERT (dp[1] & GMP_NUMB_HIGHBIT);
- ASSERT (! MPN_OVERLAP_P (qp, nn-2+qxn, np, nn) || qp+2 >= np);
+ ASSERT (! MPN_OVERLAP_P (qp, nn-2+qxn, np, nn) || qp >= np+2);
ASSERT_MPN (np, nn);
ASSERT_MPN (dp, 2);
np += nn - 2;
d1 = dp[1];
d0 = dp[0];
- n1 = np[1];
- n0 = np[0];
+ r1 = np[1];
+ r0 = np[0];
- if (n1 >= d1 && (n1 > d1 || n0 >= d0))
+ most_significant_q_limb = 0;
+ if (r1 >= d1 && (r1 > d1 || r0 >= d0))
{
#if GMP_NAIL_BITS == 0
- sub_ddmmss (n1, n0, n1, n0, d1, d0);
+ sub_ddmmss (r1, r0, r1, r0, d1, d0);
#else
- n0 = n0 - d0;
- n1 = n1 - d1 - (n0 >> GMP_LIMB_BITS - 1);
- n0 &= GMP_NUMB_MASK;
+ r0 = r0 - d0;
+ r1 = r1 - d1 - (r0 >> GMP_LIMB_BITS - 1);
+ r0 &= GMP_NUMB_MASK;
#endif
most_significant_q_limb = 1;
}
- use_preinv = ABOVE_THRESHOLD (qxn + nn - 2, DIVREM_2_THRESHOLD);
- if (use_preinv)
- invert_limb (d1inv, d1);
-
- for (i = qxn + nn - 2 - 1; i >= 0; i--)
- {
- mp_limb_t q;
- mp_limb_t r;
-
- if (i >= qxn)
- np--;
- else
- np[0] = 0;
+ invert_pi1 (di, d1, d0);
- if (n1 == d1)
- {
- /* Q should be either 111..111 or 111..110. Need special handling
- of this rare case as normal division would give overflow. */
- q = GMP_NUMB_MASK;
-
- r = (n0 + d1) & GMP_NUMB_MASK;
- if (r < d1) /* Carry in the addition? */
- {
-#if GMP_NAIL_BITS == 0
- add_ssaaaa (n1, n0, r - d0, np[0], 0, d0);
-#else
- n0 = np[0] + d0;
- n1 = (r - d0 + (n0 >> GMP_NUMB_BITS)) & GMP_NUMB_MASK;
- n0 &= GMP_NUMB_MASK;
-#endif
- qp[i] = q;
- continue;
- }
- n1 = d0 - (d0 != 0);
- n0 = -d0 & GMP_NUMB_MASK;
- }
- else
- {
- if (use_preinv)
- udiv_qrnnd_preinv (q, r, n1, n0, d1, d1inv);
- else
- udiv_qrnnd (q, r, n1, n0 << GMP_NAIL_BITS, d1 << GMP_NAIL_BITS);
- r >>= GMP_NAIL_BITS;
- umul_ppmm (n1, n0, d0, q << GMP_NAIL_BITS);
- n0 >>= GMP_NAIL_BITS;
- }
+ qp += qxn;
- n2 = np[0];
+ for (i = nn - 2 - 1; i >= 0; i--)
+ {
+ mp_limb_t n0, q;
+ n0 = np[-1];
+ udiv_qr_3by2 (q, r1, r0, r1, r0, n0, d1, d0, di.inv32);
+ np--;
+ qp[i] = q;
+ }
- q_test:
- if (n1 > r || (n1 == r && n0 > n2))
+ if (UNLIKELY (qxn != 0))
+ {
+ qp -= qxn;
+ for (i = qxn - 1; i >= 0; i--)
{
- /* The estimated Q was too large. */
- q--;
-
-#if GMP_NAIL_BITS == 0
- sub_ddmmss (n1, n0, n1, n0, 0, d0);
-#else
- n0 = n0 - d0;
- n1 = n1 - (n0 >> GMP_LIMB_BITS - 1);
- n0 &= GMP_NUMB_MASK;
-#endif
- r += d1;
- if (r >= d1) /* If not carry, test Q again. */
- goto q_test;
+ mp_limb_t q;
+ udiv_qr_3by2 (q, r1, r0, r1, r0, 0, d1, d0, di.inv32);
+ qp[i] = q;
}
-
- qp[i] = q;
-#if GMP_NAIL_BITS == 0
- sub_ddmmss (n1, n0, r, n2, n1, n0);
-#else
- n0 = n2 - n0;
- n1 = r - n1 - (n0 >> GMP_LIMB_BITS - 1);
- n0 &= GMP_NUMB_MASK;
-#endif
}
- np[1] = n1;
- np[0] = n0;
+
+ np[1] = r1;
+ np[0] = r0;
return most_significant_q_limb;
}
/* mpn/gcd.c: mpn_gcd for gcd of two odd integers.
Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003,
-2004, 2005, 2008 Free Software Foundation, Inc.
+2004, 2005, 2008, 2010, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#define CHOOSE_P(n) (2*(n) / 3)
#endif
+struct gcd_ctx
+{
+ mp_ptr gp;
+ mp_size_t gn;
+};
+
+static void
+gcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
+ mp_srcptr qp, mp_size_t qn, int d)
+{
+ struct gcd_ctx *ctx = (struct gcd_ctx *) p;
+ MPN_COPY (ctx->gp, gp, gn);
+ ctx->gn = gn;
+}
+
+#if GMP_NAIL_BITS > 0
+/* Nail supports should be easy, replacing the sub_ddmmss with nails
+ * logic. */
+#error Nails not supported.
+#endif
+
+/* Use binary algorithm to compute G <-- GCD (U, V) for usize, vsize == 2.
+ Both U and V must be odd. */
+static inline mp_size_t
+gcd_2 (mp_ptr gp, mp_srcptr up, mp_srcptr vp)
+{
+ mp_limb_t u0, u1, v0, v1;
+ mp_size_t gn;
+
+ u0 = up[0];
+ u1 = up[1];
+ v0 = vp[0];
+ v1 = vp[1];
+
+ ASSERT (u0 & 1);
+ ASSERT (v0 & 1);
+
+ /* Check for u0 != v0 needed to ensure that argument to
+ * count_trailing_zeros is non-zero. */
+ while (u1 != v1 && u0 != v0)
+ {
+ unsigned long int r;
+ if (u1 > v1)
+ {
+ sub_ddmmss (u1, u0, u1, u0, v1, v0);
+ count_trailing_zeros (r, u0);
+ u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r);
+ u1 >>= r;
+ }
+ else /* u1 < v1. */
+ {
+ sub_ddmmss (v1, v0, v1, v0, u1, u0);
+ count_trailing_zeros (r, v0);
+ v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r);
+ v1 >>= r;
+ }
+ }
+
+ gp[0] = u0, gp[1] = u1, gn = 1 + (u1 != 0);
+
+ /* If U == V == GCD, done. Otherwise, compute GCD (V, |U - V|). */
+ if (u1 == v1 && u0 == v0)
+ return gn;
+
+ v0 = (u0 == v0) ? ((u1 > v1) ? u1-v1 : v1-u1) : ((u0 > v0) ? u0-v0 : v0-u0);
+ gp[0] = mpn_gcd_1 (gp, gn, v0);
+
+ return 1;
+}
+
mp_size_t
mpn_gcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t n)
{
mp_size_t scratch;
mp_size_t matrix_scratch;
- mp_size_t gn;
+ struct gcd_ctx ctx;
mp_ptr tp;
TMP_DECL;
+ ASSERT (usize >= n);
+ ASSERT (n > 0);
+ ASSERT (vp[n-1] > 0);
+
/* FIXME: Check for small sizes first, before setting up temporary
storage etc. */
- talloc = MPN_GCD_LEHMER_N_ITCH(n);
+ talloc = MPN_GCD_SUBDIV_STEP_ITCH(n);
/* For initial division */
scratch = usize - n + 1;
if (mpn_zero_p (up, n))
{
MPN_COPY (gp, vp, n);
- TMP_FREE;
- return n;
+ ctx.gn = n;
+ goto done;
}
}
+ ctx.gp = gp;
+
#if TUNE_GCD_P
while (CHOOSE_P (n) > 0)
#else
else
{
/* Temporary storage n */
- n = mpn_gcd_subdiv_step (gp, &gn, up, vp, n, tp);
+ n = mpn_gcd_subdiv_step (up, vp, n, 0, gcd_hook, &ctx, tp);
if (n == 0)
- {
- TMP_FREE;
- return gn;
- }
+ goto done;
}
}
- gn = mpn_gcd_lehmer_n (gp, up, vp, n, tp);
- TMP_FREE;
- return gn;
-}
-
-#ifdef TUNE_GCD_P
-#include <stdio.h>
-#include <string.h>
-#include <time.h>
-#include "speed.h"
-
-static int
-compare_double(const void *ap, const void *bp)
-{
- double a = * (const double *) ap;
- double b = * (const double *) bp;
-
- if (a < b)
- return -1;
- else if (a > b)
- return 1;
- else
- return 0;
-}
-
-static double
-median (double *v, size_t n)
-{
- qsort(v, n, sizeof(*v), compare_double);
-
- return v[n/2];
-}
-
-#define TIME(res, code) do { \
- double time_measurement[5]; \
- unsigned time_i; \
- \
- for (time_i = 0; time_i < 5; time_i++) \
- { \
- speed_starttime(); \
- code; \
- time_measurement[time_i] = speed_endtime(); \
- } \
- res = median(time_measurement, 5); \
-} while (0)
-
-int
-main(int argc, char *argv)
-{
- gmp_randstate_t rands;
- mp_size_t n;
- mp_ptr ap;
- mp_ptr bp;
- mp_ptr up;
- mp_ptr vp;
- mp_ptr gp;
- mp_ptr tp;
- TMP_DECL;
+ while (n > 2)
+ {
+ struct hgcd_matrix1 M;
+ mp_limb_t uh, ul, vh, vl;
+ mp_limb_t mask;
- /* Unbuffered so if output is redirected to a file it isn't lost if the
- program is killed part way through. */
- setbuf (stdout, NULL);
- setbuf (stderr, NULL);
+ mask = up[n-1] | vp[n-1];
+ ASSERT (mask > 0);
- gmp_randinit_default (rands);
+ if (mask & GMP_NUMB_HIGHBIT)
+ {
+ uh = up[n-1]; ul = up[n-2];
+ vh = vp[n-1]; vl = vp[n-2];
+ }
+ else
+ {
+ int shift;
- TMP_MARK;
+ count_leading_zeros (shift, mask);
+ uh = MPN_EXTRACT_NUMB (shift, up[n-1], up[n-2]);
+ ul = MPN_EXTRACT_NUMB (shift, up[n-2], up[n-3]);
+ vh = MPN_EXTRACT_NUMB (shift, vp[n-1], vp[n-2]);
+ vl = MPN_EXTRACT_NUMB (shift, vp[n-2], vp[n-3]);
+ }
- ap = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
- bp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
- up = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
- vp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
- gp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
- tp = TMP_ALLOC_LIMBS (MPN_GCD_LEHMER_N_ITCH (P_TABLE_SIZE));
+ /* Try an mpn_hgcd2 step */
+ if (mpn_hgcd2 (uh, ul, vh, vl, &M))
+ {
+ n = mpn_matrix22_mul1_inverse_vector (&M, tp, up, vp, n);
+ MP_PTR_SWAP (up, tp);
+ }
+ else
+ {
+ /* mpn_hgcd2 has failed. Then either one of a or b is very
+ small, or the difference is very small. Perform one
+ subtraction followed by one division. */
- mpn_random (ap, P_TABLE_SIZE);
- mpn_random (bp, P_TABLE_SIZE);
+ /* Temporary storage n */
+ n = mpn_gcd_subdiv_step (up, vp, n, 0, &gcd_hook, &ctx, tp);
+ if (n == 0)
+ goto done;
+ }
+ }
- memset (p_table, 0, sizeof(p_table));
+ ASSERT(up[n-1] | vp[n-1]);
- for (n = 100; n++; n < P_TABLE_SIZE)
+ if (n == 1)
{
- mp_size_t p;
- mp_size_t best_p;
- double best_time;
- double lehmer_time;
-
- if (ap[n-1] == 0)
- ap[n-1] = 1;
-
- if (bp[n-1] == 0)
- bp[n-1] = 1;
-
- p_table[n] = 0;
- TIME(lehmer_time, {
- MPN_COPY (up, ap, n);
- MPN_COPY (vp, bp, n);
- mpn_gcd_lehmer_n (gp, up, vp, n, tp);
- });
+ *gp = mpn_gcd_1(up, 1, vp[0]);
+ ctx.gn = 1;
+ goto done;
+ }
- best_time = lehmer_time;
- best_p = 0;
+ /* Due to the calling convention for mpn_gcd, at most one can be
+ even. */
- for (p = n * 0.48; p < n * 0.77; p++)
- {
- double t;
+ if (! (up[0] & 1))
+ MP_PTR_SWAP (up, vp);
- p_table[n] = p;
+ ASSERT (up[0] & 1);
- TIME(t, {
- MPN_COPY (up, ap, n);
- MPN_COPY (vp, bp, n);
- mpn_gcd (gp, up, n, vp, n);
- });
+ if (vp[0] == 0)
+ {
+ *gp = mpn_gcd_1 (up, 2, vp[1]);
+ ctx.gn = 1;
+ goto done;
+ }
+ else if (! (vp[0] & 1))
+ {
+ int r;
+ count_trailing_zeros (r, vp[0]);
+ vp[0] = ((vp[1] << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (vp[0] >> r);
+ vp[1] >>= r;
+ }
- if (t < best_time)
- {
- best_time = t;
- best_p = p;
- }
- }
- printf("%6d %6d %5.3g", n, best_p, (double) best_p / n);
- if (best_p > 0)
- {
- double speedup = 100 * (lehmer_time - best_time) / lehmer_time;
- printf(" %5.3g%%", speedup);
- if (speedup < 1.0)
- {
- printf(" (ignored)");
- best_p = 0;
- }
- }
- printf("\n");
+ ctx.gn = gcd_2(gp, up, vp);
- p_table[n] = best_p;
- }
+done:
TMP_FREE;
- gmp_randclear(rands);
- return 0;
+ return ctx.gn;
}
-#endif /* TUNE_GCD_P */
/* mpn_gcd_1 -- mpn and limb greatest common divisor.
-Copyright 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
+Copyright 1994, 1996, 2000, 2001, 2009, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#define USE_ZEROTAB 0
#if USE_ZEROTAB
-static const unsigned char zerotab[16] = {
+#define MAXSHIFT 4
+#define MASK ((1 << MAXSHIFT) - 1)
+static const unsigned char zerotab[1 << MAXSHIFT] =
+{
+#if MAXSHIFT > 4
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+#endif
4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
};
#endif
while (ulimb != vlimb)
{
int c;
- mp_limb_t t = ulimb - vlimb;
- mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (t);
+ mp_limb_t t;
+ mp_limb_t vgtu;
+
+ t = ulimb - vlimb;
+ vgtu = LIMB_HIGHBIT_TO_MASK (t);
/* v <-- min (u, v) */
vlimb += (vgtu & t);
#if USE_ZEROTAB
/* Number of trailing zeros is the same no matter if we look at
* t or ulimb, but using t gives more parallelism. */
- c = zerotab[t & 15];
+ c = zerotab[t & MASK];
- while (UNLIKELY (c == 4))
+ while (UNLIKELY (c == MAXSHIFT))
{
- ulimb >>= 4;
+ ulimb >>= MAXSHIFT;
if (0)
strip_u_maybe:
vlimb >>= 1;
- c = zerotab[ulimb & 15];
+ c = zerotab[ulimb & MASK];
}
#else
if (0)
+++ /dev/null
-/* gcd_lehmer.c.
-
- THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
- SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
- GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2003, 2004, 2005, 2008 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* Use binary algorithm to compute G <-- GCD (U, V) for usize, vsize == 2.
- Both U and V must be odd. */
-static inline mp_size_t
-gcd_2 (mp_ptr gp, mp_srcptr up, mp_srcptr vp)
-{
- mp_limb_t u0, u1, v0, v1;
- mp_size_t gn;
-
- u0 = up[0];
- u1 = up[1];
- v0 = vp[0];
- v1 = vp[1];
-
- ASSERT (u0 & 1);
- ASSERT (v0 & 1);
-
- /* Check for u0 != v0 needed to ensure that argument to
- * count_trailing_zeros is non-zero. */
- while (u1 != v1 && u0 != v0)
- {
- unsigned long int r;
- if (u1 > v1)
- {
- u1 -= v1 + (u0 < v0);
- u0 = (u0 - v0) & GMP_NUMB_MASK;
- count_trailing_zeros (r, u0);
- u0 = ((u1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (u0 >> r);
- u1 >>= r;
- }
- else /* u1 < v1. */
- {
- v1 -= u1 + (v0 < u0);
- v0 = (v0 - u0) & GMP_NUMB_MASK;
- count_trailing_zeros (r, v0);
- v0 = ((v1 << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (v0 >> r);
- v1 >>= r;
- }
- }
-
- gp[0] = u0, gp[1] = u1, gn = 1 + (u1 != 0);
-
- /* If U == V == GCD, done. Otherwise, compute GCD (V, |U - V|). */
- if (u1 == v1 && u0 == v0)
- return gn;
-
- v0 = (u0 == v0) ? ((u1 > v1) ? u1-v1 : v1-u1) : ((u0 > v0) ? u0-v0 : v0-u0);
- gp[0] = mpn_gcd_1 (gp, gn, v0);
-
- return 1;
-}
-
-/* Temporary storage: n */
-mp_size_t
-mpn_gcd_lehmer_n (mp_ptr gp, mp_ptr ap, mp_ptr bp, mp_size_t n, mp_ptr tp)
-{
- /* Relax this requirement, and normalize at the start? Must disallow
- A = B = 0, though. */
- ASSERT(ap[n-1] > 0 || bp[n-1] > 0);
-
- while (n > 2)
- {
- struct hgcd_matrix1 M;
- mp_limb_t ah, al, bh, bl;
- mp_limb_t mask;
-
- mask = ap[n-1] | bp[n-1];
- ASSERT (mask > 0);
-
- if (mask & GMP_NUMB_HIGHBIT)
- {
- ah = ap[n-1]; al = ap[n-2];
- bh = bp[n-1]; bl = bp[n-2];
- }
- else
- {
- int shift;
-
- count_leading_zeros (shift, mask);
- ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
- al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
- bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
- bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
- }
-
- /* Try an mpn_nhgcd2 step */
- if (mpn_hgcd2 (ah, al, bh, bl, &M))
- {
- n = mpn_hgcd_mul_matrix1_inverse_vector (&M, tp, ap, bp, n);
- MP_PTR_SWAP (ap, tp);
- }
- else
- {
- /* mpn_hgcd2 has failed. Then either one of a or b is very
- small, or the difference is very small. Perform one
- subtraction followed by one division. */
- mp_size_t gn;
-
- /* Temporary storage n */
- n = mpn_gcd_subdiv_step (gp, &gn, ap, bp, n, tp);
- if (n == 0)
- return gn;
- }
- }
-
- if (n == 1)
- {
- *gp = mpn_gcd_1(ap, 1, bp[0]);
- return 1;
- }
-
- /* Due to the calling convention for mpn_gcd, at most one can be
- even. */
-
- if (! (ap[0] & 1))
- MP_PTR_SWAP (ap, bp);
-
- ASSERT (ap[0] & 1);
-
- if (bp[0] == 0)
- {
- *gp = mpn_gcd_1 (ap, 2, bp[1]);
- return 1;
- }
- else if (! (bp[0] & 1))
- {
- int r;
- count_trailing_zeros (r, bp[0]);
- bp[0] = ((bp[1] << (GMP_NUMB_BITS - r)) & GMP_NUMB_MASK) | (bp[0] >> r);
- bp[1] >>= r;
- }
-
- return gcd_2(gp, ap, bp);
-}
SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2003, 2004, 2005, 2008 Free Software Foundation, Inc.
+Copyright 2003, 2004, 2005, 2008, 2010, 2011 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+#include <stdlib.h> /* for NULL */
+
#include "gmp.h"
#include "gmp-impl.h"
#include "longlong.h"
/* Used when mpn_hgcd or mpn_hgcd2 has failed. Then either one of a or
b is small, or the difference is small. Perform one subtraction
- followed by one division. If the gcd is found, stores it in gp and
- *gn, and returns zero. Otherwise, compute the reduced a and b, and
- return the new size. */
+ followed by one division. The normal case is to compute the reduced
+ a and b, and return the new size.
+
+ If s == 0 (used for gcd and gcdext), returns zero if the gcd is
+ found.
+
+ If s > 0, don't reduce to size <= s, and return zero if no
+ reduction is possible (if either a, b or |a-b| is of size <= s). */
+
+/* The hook function is called as
+
+ hook(ctx, gp, gn, qp, qn, d)
+
+ in the following cases:
+
+ + If A = B at the start, G is the gcd, Q is NULL, d = -1.
+
+ + If one input is zero at the start, G is the gcd, Q is NULL,
+ d = 0 if A = G and d = 1 if B = G.
+
+ Otherwise, if d = 0 we have just subtracted a multiple of A from B,
+ and if d = 1 we have subtracted a multiple of B from A.
+
+ + If A = B after subtraction, G is the gcd, Q is NULL.
+
+ + If we get a zero remainder after division, G is the gcd, Q is the
+ quotient.
+
+ + Otherwise, G is NULL, Q is the quotient (often 1).
+
+ */
-/* FIXME: Check when the smaller number is a single limb, and invoke
- * mpn_gcd_1. */
mp_size_t
-mpn_gcd_subdiv_step (mp_ptr gp, mp_size_t *gn,
- mp_ptr ap, mp_ptr bp, mp_size_t n, mp_ptr tp)
+mpn_gcd_subdiv_step (mp_ptr ap, mp_ptr bp, mp_size_t n, mp_size_t s,
+ gcd_subdiv_step_hook *hook, void *ctx,
+ mp_ptr tp)
{
- mp_size_t an, bn;
+ static const mp_limb_t one = CNST_LIMB(1);
+ mp_size_t an, bn, qn;
+
+ int swapped;
ASSERT (n > 0);
ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
MPN_NORMALIZE (ap, an);
MPN_NORMALIZE (bp, bn);
- if (UNLIKELY (an == 0))
+ swapped = 0;
+
+ /* Arrange so that a < b, subtract b -= a, and maintain
+ normalization. */
+ if (an == bn)
{
- return_b:
- MPN_COPY (gp, bp, bn);
- *gn = bn;
- return 0;
+ int c;
+ MPN_CMP (c, ap, bp, an);
+ if (UNLIKELY (c == 0))
+ {
+ /* For gcdext, return the smallest of the two cofactors, so
+ pass d = -1. */
+ if (s == 0)
+ hook (ctx, ap, an, NULL, 0, -1);
+ return 0;
+ }
+ else if (c > 0)
+ {
+ MP_PTR_SWAP (ap, bp);
+ swapped ^= 1;
+ }
}
- else if (UNLIKELY (bn == 0))
+ else
{
- return_a:
- MPN_COPY (gp, ap, an);
- *gn = an;
+ if (an > bn)
+ {
+ MPN_PTR_SWAP (ap, an, bp, bn);
+ swapped ^= 1;
+ }
+ }
+ if (an <= s)
+ {
+ if (s == 0)
+ hook (ctx, bp, bn, NULL, 0, swapped ^ 1);
return 0;
}
- /* Arrange so that a > b, subtract an -= bn, and maintain
- normalization. */
- if (an < bn)
- MPN_PTR_SWAP (ap, an, bp, bn);
- else if (an == bn)
+ ASSERT_NOCARRY (mpn_sub (bp, bp, bn, ap, an));
+ MPN_NORMALIZE (bp, bn);
+ ASSERT (bn > 0);
+
+ if (bn <= s)
{
- int c;
- MPN_CMP (c, ap, bp, an);
- if (UNLIKELY (c == 0))
- goto return_a;
- else if (c < 0)
- MP_PTR_SWAP (ap, bp);
+ /* Undo subtraction. */
+ mp_limb_t cy = mpn_add (bp, ap, an, bp, bn);
+ if (cy > 0)
+ bp[an] = cy;
+ return 0;
}
- ASSERT_NOCARRY (mpn_sub (ap, ap, an, bp, bn));
- MPN_NORMALIZE (ap, an);
- ASSERT (an > 0);
-
- /* Arrange so that a > b, and divide a = q b + r */
- /* FIXME: an < bn happens when we have cancellation. If that is the
- common case, then we could reverse the roles of a and b to avoid
- the swap. */
- if (an < bn)
- MPN_PTR_SWAP (ap, an, bp, bn);
- else if (an == bn)
+ /* Arrange so that a < b */
+ if (an == bn)
{
int c;
MPN_CMP (c, ap, bp, an);
if (UNLIKELY (c == 0))
- goto return_a;
- else if (c < 0)
- MP_PTR_SWAP (ap, bp);
+ {
+ if (s > 0)
+ /* Just record subtraction and return */
+ hook (ctx, NULL, 0, &one, 1, swapped);
+ else
+ /* Found gcd. */
+ hook (ctx, bp, bn, NULL, 0, swapped);
+ return 0;
+ }
+
+ hook (ctx, NULL, 0, &one, 1, swapped);
+
+ if (c > 0)
+ {
+ MP_PTR_SWAP (ap, bp);
+ swapped ^= 1;
+ }
}
+ else
+ {
+ hook (ctx, NULL, 0, &one, 1, swapped);
- mpn_tdiv_qr (tp, ap, 0, ap, an, bp, bn);
+ if (an > bn)
+ {
+ MPN_PTR_SWAP (ap, an, bp, bn);
+ swapped ^= 1;
+ }
+ }
- if (mpn_zero_p (ap, bn))
- goto return_b;
+ mpn_tdiv_qr (tp, bp, 0, bp, bn, ap, an);
+ qn = bn - an + 1;
+ bn = an;
+ MPN_NORMALIZE (bp, bn);
+
+ if (UNLIKELY (bn <= s))
+ {
+ if (s == 0)
+ {
+ hook (ctx, ap, an, tp, qn, swapped);
+ return 0;
+ }
+
+ /* Quotient is one too large, so decrement it and add back A. */
+ if (bn > 0)
+ {
+ mp_limb_t cy = mpn_add (bp, ap, an, bp, bn);
+ if (cy)
+ bp[an++] = cy;
+ }
+ else
+ MPN_COPY (bp, ap, an);
+
+ MPN_DECR_U (tp, qn, 1);
+ }
- return bn;
+ hook (ctx, NULL, 0, tp, qn, swapped);
+ return an;
}
/* mpn_gcdext -- Extended Greatest Common Divisor.
-Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free Software
-Foundation, Inc.
+Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009, 2012 Free
+Software Foundation, Inc.
This file is part of the GNU MP Library.
return n;
}
-#define COMPUTE_V_ITCH(n) (2*(n) + 1)
+#define COMPUTE_V_ITCH(n) (2*(n))
/* Computes |v| = |(g - u a)| / b, where u may be positive or
- negative, and v is of the opposite sign. a, b are of size n, u and
+ negative, and v is of the opposite sign. max(a, b) is of size n, u and
v at most size n, and v must have space for n+1 limbs. */
static mp_size_t
compute_v (mp_ptr vp,
size = ABS (usize);
ASSERT (size <= n);
+ ASSERT (up[size-1] > 0);
an = n;
MPN_NORMALIZE (ap, an);
+ ASSERT (gn <= an);
if (an >= size)
mpn_mul (tp, ap, an, up, size);
mpn_mul (tp, up, size, ap, an);
size += an;
- size -= tp[size - 1] == 0;
-
- ASSERT (gn <= size);
if (usize > 0)
{
return 0;
}
else
- { /* usize < 0 */
- /* |v| = v = (c - u a) / b = (c + |u| a) / b */
- mp_limb_t cy = mpn_add (tp, tp, size, gp, gn);
- if (cy)
- tp[size++] = cy;
+ { /* |v| = v = (g - u a) / b = (g + |u| a) / b. Since g <= a,
+ (g + |u| a) always fits in (|usize| + an) limbs. */
+
+ ASSERT_NOCARRY (mpn_add (tp, tp, size, gp, gn));
+ size -= (tp[size - 1] == 0);
}
/* Now divide t / b. There must be no remainder */
For the lehmer call after the loop, Let T denote
GCDEXT_DC_THRESHOLD. For the gcdext_lehmer call, we need T each for
u, a and b, and 4T+3 scratch space. Next, for compute_v, we need T
- for u, T+1 for v and 2T + 1 scratch space. In all, 7T + 3 is
+ for u, T+1 for v and 2T scratch space. In all, 7T + 3 is
sufficient for both operations.
*/
mp_size_t matrix_scratch;
mp_size_t ualloc = n + 1;
+ struct gcdext_ctx ctx;
mp_size_t un;
mp_ptr u0;
mp_ptr u1;
ASSERT (an >= n);
ASSERT (n > 0);
+ ASSERT (bp[n-1] > 0);
TMP_MARK;
u0 = tp; tp += ualloc;
u1 = tp; tp += ualloc;
+ ctx.gp = gp;
+ ctx.up = up;
+ ctx.usize = usizep;
+
{
/* For the first hgcd call, there are no u updates, and it makes
some sense to use a different choice for p. */
/* mpn_hgcd has failed. Then either one of a or b is very
small, or the difference is very small. Perform one
subtraction followed by one division. */
- mp_size_t gn;
- mp_size_t updated_un = 1;
-
u1[0] = 1;
- /* Temporary storage 2n + 1 */
- n = mpn_gcdext_subdiv_step (gp, &gn, up, usizep, ap, bp, n,
- u0, u1, &updated_un, tp, tp + n);
+ ctx.u0 = u0;
+ ctx.u1 = u1;
+ ctx.tp = tp + n; /* ualloc */
+ ctx.un = 1;
+
+ /* Temporary storage n */
+ n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
if (n == 0)
{
TMP_FREE;
- return gn;
+ return ctx.gn;
}
- un = updated_un;
+ un = ctx.un;
ASSERT (un < ualloc);
}
}
/* mpn_hgcd has failed. Then either one of a or b is very
small, or the difference is very small. Perform one
subtraction followed by one division. */
- mp_size_t gn;
- mp_size_t updated_un = un;
+ ctx.u0 = u0;
+ ctx.u1 = u1;
+ ctx.tp = tp + n; /* ualloc */
+ ctx.un = un;
- /* Temporary storage 2n + 1 */
- n = mpn_gcdext_subdiv_step (gp, &gn, up, usizep, ap, bp, n,
- u0, u1, &updated_un, tp, tp + n);
+ /* Temporary storage n */
+ n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
if (n == 0)
{
TMP_FREE;
- return gn;
+ return ctx.gn;
}
- un = updated_un;
+ un = ctx.un;
ASSERT (un < ualloc);
}
}
+ /* We have A = ... a + ... b
+ B = u0 a + u1 b
+
+ a = u1 A + ... B
+ b = -u0 A + ... B
+
+ with bounds
+
+ |u0|, |u1| <= B / min(a, b)
+
+ We always have u1 > 0, and u0 == 0 is possible only if u1 == 1,
+ in which case the only reduction done so far is a = A - k B for
+ some k.
+
+ Compute g = u a + v b = (u u1 - v u0) A + (...) B
+ Here, u, v are bounded by
+
+ |u| <= b,
+ |v| <= a
+ */
+
+ ASSERT ( (ap[n-1] | bp[n-1]) > 0);
if (UNLIKELY (mpn_cmp (ap, bp, n) == 0))
{
TMP_FREE;
return n;
}
- else if (mpn_zero_p (u0, un))
+ else if (UNLIKELY (u0[0] == 0) && un == 1)
{
mp_size_t gn;
- ASSERT (un == 1);
ASSERT (u1[0] == 1);
/* g = u a + v b = (u u1 - v u0) A + (...) B = u A + (...) B */
}
else
{
- /* We have A = ... a + ... b
- B = u0 a + u1 b
-
- a = u1 A + ... B
- b = -u0 A + ... B
-
- with bounds
-
- |u0|, |u1| <= B / min(a, b)
-
- Compute g = u a + v b = (u u1 - v u0) A + (...) B
- Here, u, v are bounded by
-
- |u| <= b,
- |v| <= a
- */
-
mp_size_t u0n;
mp_size_t u1n;
mp_size_t lehmer_un;
u0n = un;
MPN_NORMALIZE (u0, u0n);
+ ASSERT (u0n > 0);
+
if (lehmer_un == 0)
{
/* u == 0 ==> v = g / b == 1 ==> g = - u0 A + (...) B */
u1n = un;
MPN_NORMALIZE (u1, u1n);
-
- /* It's possible that u0 = 1, u1 = 0 */
- if (u1n == 0)
- {
- ASSERT (un == 1);
- ASSERT (u0[0] == 1);
-
- /* u1 == 0 ==> u u1 + v u0 = v */
- MPN_COPY (up, lehmer_vp, lehmer_vn);
- *usizep = negate ? lehmer_vn : - lehmer_vn;
-
- TMP_FREE;
- return gn;
- }
+ ASSERT (u1n > 0);
ASSERT (lehmer_un + u1n <= ualloc);
ASSERT (lehmer_vn + u0n <= ualloc);
- /* Now u0, u1, u are non-zero. We may still have v == 0 */
+ /* We may still have v == 0 */
/* Compute u u0 */
if (lehmer_un <= u1n)
/* mpn_gcdext -- Extended Greatest Common Divisor.
-Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free Software
-Foundation, Inc.
+Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009, 2012 Free
+Software Foundation, Inc.
This file is part of the GNU MP Library.
#include "gmp-impl.h"
#include "longlong.h"
-/* Temporary storage: 3*(n+1) for u. n+1 for the matrix-vector
- multiplications (if hgcd2 succeeds). If hgcd fails, n+1 limbs are
- needed for the division, with most n for the quotient, and n+1 for
- the product q u0. In all, 4n + 3. */
+/* Here, d is the index of the cofactor to update. FIXME: Could use qn
+ = 0 for the common case q = 1. */
+void
+mpn_gcdext_hook (void *p, mp_srcptr gp, mp_size_t gn,
+ mp_srcptr qp, mp_size_t qn, int d)
+{
+ struct gcdext_ctx *ctx = (struct gcdext_ctx *) p;
+ mp_size_t un = ctx->un;
+
+ if (gp)
+ {
+ mp_srcptr up;
+
+ ASSERT (gn > 0);
+ ASSERT (gp[gn-1] > 0);
+
+ MPN_COPY (ctx->gp, gp, gn);
+ ctx->gn = gn;
+
+ if (d < 0)
+ {
+ int c;
+
+ /* Must return the smallest cofactor, +u1 or -u0 */
+ MPN_CMP (c, ctx->u0, ctx->u1, un);
+ ASSERT (c != 0 || (un == 1 && ctx->u0[0] == 1 && ctx->u1[0] == 1));
+
+ d = c < 0;
+ }
+
+ up = d ? ctx->u0 : ctx->u1;
+
+ MPN_NORMALIZE (up, un);
+ MPN_COPY (ctx->up, up, un);
+
+ *ctx->usize = d ? -un : un;
+ }
+ else
+ {
+ mp_limb_t cy;
+ mp_ptr u0 = ctx->u0;
+ mp_ptr u1 = ctx->u1;
+
+ ASSERT (d >= 0);
+
+ if (d)
+ MP_PTR_SWAP (u0, u1);
+
+ qn -= (qp[qn-1] == 0);
+
+ /* Update u0 += q * u1 */
+ if (qn == 1)
+ {
+ mp_limb_t q = qp[0];
+
+ if (q == 1)
+ /* A common case. */
+ cy = mpn_add_n (u0, u0, u1, un);
+ else
+ cy = mpn_addmul_1 (u0, u1, un, q);
+ }
+ else
+ {
+ mp_size_t u1n;
+ mp_ptr tp;
+
+ u1n = un;
+ MPN_NORMALIZE (u1, u1n);
+
+ if (u1n == 0)
+ return;
+
+ /* Should always have u1n == un here, and u1 >= u0. The
+ reason is that we alternate adding u0 to u1 and u1 to u0
+ (corresponding to subtractions a - b and b - a), and we
+ can get a large quotient only just after a switch, which
+ means that we'll add (a multiple of) the larger u to the
+ smaller. */
+
+ tp = ctx->tp;
+
+ if (qn > u1n)
+ mpn_mul (tp, qp, qn, u1, u1n);
+ else
+ mpn_mul (tp, u1, u1n, qp, qn);
+
+ u1n += qn;
+ u1n -= tp[u1n-1] == 0;
+
+ if (u1n >= un)
+ {
+ cy = mpn_add (u0, tp, u1n, u0, un);
+ un = u1n;
+ }
+ else
+ /* Note: Unlikely case, maybe never happens? */
+ cy = mpn_add (u0, u0, un, tp, u1n);
+
+ }
+ u0[un] = cy;
+ ctx->un = un + (cy > 0);
+ }
+}
+
+/* Temporary storage: 3*(n+1) for u. If hgcd2 succeeds, we need n for
+ the matrix-vector multiplication adjusting a, b. If hgcd fails, we
+ need at most n for the quotient and n+1 for the u update (reusing
+ the extra u). In all, 4n + 3. */
mp_size_t
mpn_gcdext_lehmer_n (mp_ptr gp, mp_ptr up, mp_size_t *usize,
* which correspond to the first column of the inverse
*
* M^{-1} = (u1, -v1; -u0, v0)
+ *
+ * This implies that
+ *
+ * a = u1 A (mod B)
+ * b = -u0 A (mod B)
+ *
+ * where A, B denotes the input values.
*/
+ struct gcdext_ctx ctx;
mp_size_t un;
mp_ptr u0;
mp_ptr u1;
u1[0] = 1; un = 1;
+ ctx.gp = gp;
+ ctx.up = up;
+ ctx.usize = usize;
+
/* FIXME: Handle n == 2 differently, after the loop? */
while (n >= 2)
{
/* Try an mpn_nhgcd2 step */
if (mpn_hgcd2 (ah, al, bh, bl, &M))
{
- n = mpn_hgcd_mul_matrix1_inverse_vector (&M, tp, ap, bp, n);
+ n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
MP_PTR_SWAP (ap, tp);
un = mpn_hgcd_mul_matrix1_vector(&M, u2, u0, u1, un);
MP_PTR_SWAP (u0, u2);
/* mpn_hgcd2 has failed. Then either one of a or b is very
small, or the difference is very small. Perform one
subtraction followed by one division. */
- mp_size_t gn;
- mp_size_t updated_un = un;
+ ctx.u0 = u0;
+ ctx.u1 = u1;
+ ctx.tp = u2;
+ ctx.un = un;
/* Temporary storage n for the quotient and ualloc for the
new cofactor. */
- n = mpn_gcdext_subdiv_step (gp, &gn, up, usize, ap, bp, n,
- u0, u1, &updated_un, tp, u2);
+ n = mpn_gcd_subdiv_step (ap, bp, n, 0, mpn_gcdext_hook, &ctx, tp);
if (n == 0)
- return gn;
+ return ctx.gn;
- un = updated_un;
+ un = ctx.un;
}
}
ASSERT_ALWAYS (ap[0] > 0);
+++ /dev/null
-/* gcdext_subdiv_step.c.
-
- THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
- SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
- GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-
-Copyright 2003, 2004, 2005, 2008, 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-/* Used when mpn_hgcd or mpn_hgcd2 has failed. Then either one of a or
- b is small, or the difference is small. Perform one subtraction
- followed by one division. If the gcd is found, stores it in gp and
- *gn, and returns zero. Otherwise, compute the reduced a and b,
- return the new size, and cofactors. */
-
-/* Temporary storage: Needs n limbs for the quotient, at qp. tp must
- point to an area large enough for the resulting cofactor, plus one
- limb extra. All in all, 2N + 1 if N is a bound for both inputs and
- outputs. */
-mp_size_t
-mpn_gcdext_subdiv_step (mp_ptr gp, mp_size_t *gn, mp_ptr up, mp_size_t *usizep,
- mp_ptr ap, mp_ptr bp, mp_size_t n,
- mp_ptr u0, mp_ptr u1, mp_size_t *unp,
- mp_ptr qp, mp_ptr tp)
-{
- mp_size_t an, bn, un;
- mp_size_t qn;
- mp_size_t u0n;
-
- int swapped;
-
- an = bn = n;
-
- ASSERT (an > 0);
- ASSERT (ap[an-1] > 0 || bp[an-1] > 0);
-
- MPN_NORMALIZE (ap, an);
- MPN_NORMALIZE (bp, bn);
-
- un = *unp;
-
- swapped = 0;
-
- if (UNLIKELY (an == 0))
- {
- return_b:
- MPN_COPY (gp, bp, bn);
- *gn = bn;
-
- MPN_NORMALIZE (u0, un);
- MPN_COPY (up, u0, un);
-
- *usizep = swapped ? un : -un;
-
- return 0;
- }
- else if (UNLIKELY (bn == 0))
- {
- MPN_COPY (gp, ap, an);
- *gn = an;
-
- MPN_NORMALIZE (u1, un);
- MPN_COPY (up, u1, un);
-
- *usizep = swapped ? -un : un;
-
- return 0;
- }
-
- /* Arrange so that a > b, subtract an -= bn, and maintain
- normalization. */
- if (an < bn)
- {
- MPN_PTR_SWAP (ap, an, bp, bn);
- MP_PTR_SWAP (u0, u1);
- swapped ^= 1;
- }
- else if (an == bn)
- {
- int c;
- MPN_CMP (c, ap, bp, an);
- if (UNLIKELY (c == 0))
- {
- MPN_COPY (gp, ap, an);
- *gn = an;
-
- /* Must return the smallest cofactor, +u1 or -u0 */
- MPN_CMP (c, u0, u1, un);
- ASSERT (c != 0 || (un == 1 && u0[0] == 1 && u1[0] == 1));
-
- if (c < 0)
- {
- MPN_NORMALIZE (u0, un);
- MPN_COPY (up, u0, un);
- swapped ^= 1;
- }
- else
- {
- MPN_NORMALIZE_NOT_ZERO (u1, un);
- MPN_COPY (up, u1, un);
- }
-
- *usizep = swapped ? -un : un;
- return 0;
- }
- else if (c < 0)
- {
- MP_PTR_SWAP (ap, bp);
- MP_PTR_SWAP (u0, u1);
- swapped ^= 1;
- }
- }
- /* Reduce a -= b, u1 += u0 */
- ASSERT_NOCARRY (mpn_sub (ap, ap, an, bp, bn));
- MPN_NORMALIZE (ap, an);
- ASSERT (an > 0);
-
- u1[un] = mpn_add_n (u1, u1, u0, un);
- un += (u1[un] > 0);
-
- /* Arrange so that a > b, and divide a = q b + r */
- if (an < bn)
- {
- MPN_PTR_SWAP (ap, an, bp, bn);
- MP_PTR_SWAP (u0, u1);
- swapped ^= 1;
- }
- else if (an == bn)
- {
- int c;
- MPN_CMP (c, ap, bp, an);
- if (UNLIKELY (c == 0))
- goto return_b;
- else if (c < 0)
- {
- MP_PTR_SWAP (ap, bp);
- MP_PTR_SWAP (u0, u1);
- swapped ^= 1;
- }
- }
-
- /* Reduce a -= q b, u1 += q u0 */
- qn = an - bn + 1;
- mpn_tdiv_qr (qp, ap, 0, ap, an, bp, bn);
-
- if (mpn_zero_p (ap, bn))
- goto return_b;
-
- n = bn;
-
- /* Update u1 += q u0 */
- u0n = un;
- MPN_NORMALIZE (u0, u0n);
-
- if (u0n > 0)
- {
- qn -= (qp[qn - 1] == 0);
-
- if (qn > u0n)
- mpn_mul (tp, qp, qn, u0, u0n);
- else
- mpn_mul (tp, u0, u0n, qp, qn);
-
- if (qn + u0n > un)
- {
- mp_size_t u1n = un;
- un = qn + u0n;
- un -= (tp[un-1] == 0);
- u1[un] = mpn_add (u1, tp, un, u1, u1n);
- }
- else
- {
- u1[un] = mpn_add (u1, u1, un, tp, qn + u0n);
- }
-
- un += (u1[un] > 0);
- }
-
- *unp = un;
- return n;
-}
CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
FUTURE GNU MP RELEASES.
-Copyright 2003, 2004, 2007, 2009 Free Software Foundation, Inc.
+Copyright 2003, 2004, 2007, 2009, 2010, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#define _GMP_IEEE_FLOATS 0
#endif
-#if ! _GMP_IEEE_FLOATS
-/* dummy definition, just to let dead code compile */
-union ieee_double_extract {
- struct {
- int manh, manl, sig, exp;
- } s;
- double d;
-};
-#endif
-
/* To force use of the generic C code for testing, put
"#define _GMP_IEEE_FLOATS 0" at this point. */
-
/* In alpha gcc prior to 3.4, signed DI comparisons involving constants are
rearranged from "x < n" to "x+(-n) < 0", which is of course hopelessly
wrong if that addition overflows.
- The workaround here avoids this bug by ensuring n is not a literal
- constant. Note that this is alpha specific. The offending transformation
- is/was in alpha.c alpha_emit_conditional_branch() under "We want to use
- cmpcc/bcc".
+ The workaround here avoids this bug by ensuring n is not a literal constant.
+ Note that this is alpha specific. The offending transformation is/was in
+ alpha.c alpha_emit_conditional_branch() under "We want to use cmpcc/bcc".
- Bizarrely, it turns out this happens also with Cray cc on
- alphaev5-cray-unicosmk2.0.6.X, and has the same solution. Don't know why
- or how. */
+ Bizarrely, this happens also with Cray cc on alphaev5-cray-unicosmk2.0.6.X,
+ and has the same solution. Don't know why or how. */
#if HAVE_HOST_CPU_FAMILY_alpha \
&& ((defined (__GNUC__) && ! __GMP_GNUC_PREREQ(3,4)) \
#endif
+/* Return the value {ptr,size}*2^exp, and negative if sign<0. Must have
+ size>=1, and a non-zero high limb ptr[size-1].
-/* Return the value {ptr,size}*2^exp, and negative if sign<0.
- Must have size>=1, and a non-zero high limb ptr[size-1].
-
- {ptr,size} is truncated towards zero. This is consistent with other gmp
- conversions, like mpz_set_f or mpz_set_q, and is easy to implement and
- test.
+ When we know the fp format, the result is truncated towards zero. This is
+ consistent with other gmp conversions, like mpz_set_f or mpz_set_q, and is
+ easy to implement and test.
- In the past conversions had attempted (imperfectly) to let the hardware
- float rounding mode take effect, but that gets tricky since multiple
- roundings need to be avoided, or taken into account, and denorms mean the
- effective precision of the mantissa is not constant. (For reference,
- mpz_get_d on IEEE systems was ok, except it operated on the absolute
- value. mpf_get_d and mpq_get_d suffered from multiple roundings and from
- not always using enough bits to get the rounding right.)
+ When we do not know the format, such truncation seems much harder. One
+ would need to defeat any rounding mode, including round-up.
It's felt that GMP is not primarily concerned with hardware floats, and
really isn't enhanced by getting involved with hardware rounding modes
- (which could even be some weird unknown style), so something unambiguous
- and straightforward is best.
+ (which could even be some weird unknown style), so something unambiguous and
+ straightforward is best.
The IEEE code below is the usual case, it knows either a 32-bit or 64-bit
limb and is done with shifts and masks. The 64-bit case in particular
should come out nice and compact.
- The generic code works one bit at a time, which will be quite slow, but
- should support any binary-based "double" and be safe against any rounding
- mode. Note in particular it works on IEEE systems too.
+ The generic code used to work one bit at a time, which was not only slow,
+ but implicitly relied upon denoms for intermediates, since the lowest bits'
+ weight of a perfectly valid fp number underflows in non-denorm. Therefore,
+ the generic code now works limb-per-limb, initially creating a number x such
+ that 1 <= x <= BASE. (BASE is reached only as result of rounding.) Then
+ x's exponent is scaled with explicit code (not ldexp to avoid libm
+ dependency). It is a tap-dance to avoid underflow or overflow, beware!
Traps:
- Hardware traps for overflow to infinity, underflow to zero, or
- unsupported denorms may or may not be taken. The IEEE code works bitwise
- and so probably won't trigger them, the generic code works by float
- operations and so probably will. This difference might be thought less
- than ideal, but again its felt straightforward code is better than trying
- to get intimate with hardware exceptions (of perhaps unknown nature).
+ Hardware traps for overflow to infinity, underflow to zero, or unsupported
+ denorms may or may not be taken. The IEEE code works bitwise and so
+ probably won't trigger them, the generic code works by float operations and
+ so probably will. This difference might be thought less than ideal, but
+ again its felt straightforward code is better than trying to get intimate
+ with hardware exceptions (of perhaps unknown nature).
Not done:
- mpz_get_d in the past handled size==1 with a cast limb->double. This
- might still be worthwhile there (for up to the mantissa many bits), but
- for mpn_get_d here, the cost of applying "exp" to the resulting exponent
- would probably use up any benefit a cast may have over bit twiddling.
- Also, if the exponent is pushed into denorm range then bit twiddling is
- the only option, to ensure the desired truncation is obtained.
+ mpz_get_d in the past handled size==1 with a cast limb->double. This might
+ still be worthwhile there (for up to the mantissa many bits), but for
+ mpn_get_d here, the cost of applying "exp" to the resulting exponent would
+ probably use up any benefit a cast may have over bit twiddling. Also, if
+ the exponent is pushed into denorm range then bit twiddling is the only
+ option, to ensure the desired truncation is obtained.
Other:
cast, neither in the IEEE or generic code. */
+
+#undef FORMAT_RECOGNIZED
+
double
mpn_get_d (mp_srcptr up, mp_size_t size, mp_size_t sign, long exp)
{
+ int lshift, nbits;
+ mp_limb_t x, mhi, mlo;
+
ASSERT (size >= 0);
ASSERT_MPN (up, size);
ASSERT (size == 0 || up[size-1] != 0);
overflow. After this exp can of course be reduced to anywhere within
the {up,size} region without underflow. */
if (UNLIKELY ((unsigned long) (GMP_NUMB_BITS * size)
- > (unsigned long) (LONG_MAX - exp)))
+ > ((unsigned long) LONG_MAX - exp)))
{
- if (_GMP_IEEE_FLOATS)
- goto ieee_infinity;
+#if _GMP_IEEE_FLOATS
+ goto ieee_infinity;
+#endif
/* generic */
exp = LONG_MAX;
exp += GMP_NUMB_BITS * size;
}
+#if _GMP_IEEE_FLOATS
+ {
+ union ieee_double_extract u;
-#if 1
-{
- int lshift, nbits;
- union ieee_double_extract u;
- mp_limb_t x, mhi, mlo;
-#if GMP_LIMB_BITS == 64
- mp_limb_t m;
- up += size;
- m = *--up;
- count_leading_zeros (lshift, m);
+ up += size;
- exp -= (lshift - GMP_NAIL_BITS) + 1;
- m <<= lshift;
+#if GMP_LIMB_BITS == 64
+ mlo = up[-1];
+ count_leading_zeros (lshift, mlo);
- nbits = GMP_LIMB_BITS - lshift;
+ exp -= (lshift - GMP_NAIL_BITS) + 1;
+ mlo <<= lshift;
- if (nbits < 53 && size > 1)
- {
- x = *--up;
- x <<= GMP_NAIL_BITS;
- x >>= nbits;
- m |= x;
- nbits += GMP_NUMB_BITS;
+ nbits = GMP_LIMB_BITS - lshift;
- if (LIMBS_PER_DOUBLE >= 3 && nbits < 53 && size > 2)
+ if (nbits < 53 && size > 1)
{
- x = *--up;
+ x = up[-2];
x <<= GMP_NAIL_BITS;
x >>= nbits;
- m |= x;
+ mlo |= x;
nbits += GMP_NUMB_BITS;
+
+ if (LIMBS_PER_DOUBLE >= 3 && nbits < 53 && size > 2)
+ {
+ x = up[-3];
+ x <<= GMP_NAIL_BITS;
+ x >>= nbits;
+ mlo |= x;
+ nbits += GMP_NUMB_BITS;
+ }
}
- }
- mhi = m >> (32 + 11);
- mlo = m >> 11;
+ mhi = mlo >> (32 + 11);
+ mlo = mlo >> 11; /* later implicitly truncated to 32 bits */
#endif
#if GMP_LIMB_BITS == 32
- up += size;
- x = *--up, size--;
- count_leading_zeros (lshift, x);
+ x = *--up;
+ count_leading_zeros (lshift, x);
- exp -= (lshift - GMP_NAIL_BITS) + 1;
- x <<= lshift;
- mhi = x >> 11;
+ exp -= (lshift - GMP_NAIL_BITS) + 1;
+ x <<= lshift;
+ mhi = x >> 11;
- if (lshift < 11) /* FIXME: never true if NUMB < 20 bits */
- {
- /* All 20 bits in mhi */
- mlo = x << 21;
- /* >= 1 bit in mlo */
- nbits = GMP_LIMB_BITS - lshift - 21;
- }
- else
- {
- if (size != 0)
+ if (lshift < 11) /* FIXME: never true if NUMB < 20 bits */
{
- nbits = GMP_LIMB_BITS - lshift;
-
- x = *--up, size--;
- x <<= GMP_NAIL_BITS;
- mhi |= x >> nbits >> 11;
-
- mlo = x << GMP_LIMB_BITS - nbits - 11;
- nbits = nbits + 11 - GMP_NAIL_BITS;
+ /* All 20 bits in mhi */
+ mlo = x << 21;
+ /* >= 1 bit in mlo */
+ nbits = GMP_LIMB_BITS - lshift - 21;
}
else
{
- mlo = 0;
- goto done;
+ if (size > 1)
+ {
+ nbits = GMP_LIMB_BITS - lshift;
+
+ x = *--up, size--;
+ x <<= GMP_NAIL_BITS;
+ mhi |= x >> nbits >> 11;
+
+ mlo = x << GMP_LIMB_BITS - nbits - 11;
+ nbits = nbits + 11 - GMP_NAIL_BITS;
+ }
+ else
+ {
+ mlo = 0;
+ goto done;
+ }
}
- }
- if (LIMBS_PER_DOUBLE >= 2 && nbits < 32 && size != 0)
- {
- x = *--up, size--;
- x <<= GMP_NAIL_BITS;
- x >>= nbits;
- mlo |= x;
- nbits += GMP_NUMB_BITS;
+ /* Now all needed bits in mhi have been accumulated. Add bits to mlo. */
- if (LIMBS_PER_DOUBLE >= 3 && nbits < 32 && size != 0)
+ if (LIMBS_PER_DOUBLE >= 2 && nbits < 32 && size > 1)
{
- x = *--up, size--;
+ x = up[-1];
x <<= GMP_NAIL_BITS;
x >>= nbits;
mlo |= x;
nbits += GMP_NUMB_BITS;
- if (LIMBS_PER_DOUBLE >= 4 && nbits < 32 && size != 0)
+ if (LIMBS_PER_DOUBLE >= 3 && nbits < 32 && size > 2)
{
- x = *--up;
+ x = up[-2];
x <<= GMP_NAIL_BITS;
x >>= nbits;
mlo |= x;
nbits += GMP_NUMB_BITS;
+
+ if (LIMBS_PER_DOUBLE >= 4 && nbits < 32 && size > 3)
+ {
+ x = up[-3];
+ x <<= GMP_NAIL_BITS;
+ x >>= nbits;
+ mlo |= x;
+ nbits += GMP_NUMB_BITS;
+ }
}
}
- }
- done:;
+ done:;
#endif
- {
- if (UNLIKELY (exp >= CONST_1024))
- {
- /* overflow, return infinity */
- ieee_infinity:
- mhi = 0;
- mlo = 0;
- exp = 1024;
- }
- else if (UNLIKELY (exp <= CONST_NEG_1023))
- {
- int rshift;
-
- if (LIKELY (exp <= CONST_NEG_1022_SUB_53))
- return 0.0; /* denorm underflows to zero */
-
- rshift = -1022 - exp;
- ASSERT (rshift > 0 && rshift < 53);
-#if GMP_LIMB_BITS > 53
- mlo >>= rshift;
- mhi = mlo >> 32;
-#else
- if (rshift >= 32)
- {
- mlo = mhi;
- mhi = 0;
- rshift -= 32;
- }
- lshift = GMP_LIMB_BITS - rshift;
- mlo = (mlo >> rshift) | (rshift == 0 ? 0 : mhi << lshift);
- mhi >>= rshift;
-#endif
- exp = -1023;
- }
- }
- u.s.manh = mhi;
- u.s.manl = mlo;
- u.s.exp = exp + 1023;
- u.s.sig = (sign < 0);
- return u.d;
-}
-#else
-
-
-#define ONE_LIMB (GMP_LIMB_BITS == 64 && 2*GMP_NUMB_BITS >= 53)
-#define TWO_LIMBS (GMP_LIMB_BITS == 32 && 3*GMP_NUMB_BITS >= 53)
-
- if (_GMP_IEEE_FLOATS && (ONE_LIMB || TWO_LIMBS))
- {
- union ieee_double_extract u;
- mp_limb_t m0, m1, m2, rmask;
- int lshift, rshift;
-
- m0 = up[size-1]; /* high limb */
- m1 = (size >= 2 ? up[size-2] : 0); /* second highest limb */
- count_leading_zeros (lshift, m0);
-
- /* relative to just under high non-zero bit */
- exp -= (lshift - GMP_NAIL_BITS) + 1;
-
- if (ONE_LIMB)
- {
- /* lshift to have high of m0 non-zero, and collapse nails */
- rshift = GMP_LIMB_BITS - lshift;
- m1 <<= GMP_NAIL_BITS;
- rmask = GMP_NAIL_BITS == 0 && lshift == 0 ? 0 : MP_LIMB_T_MAX;
- m0 = (m0 << lshift) | ((m1 >> rshift) & rmask);
-
- /* rshift back to have bit 53 of m0 the high non-zero */
- m0 >>= 11;
- }
- else /* TWO_LIMBS */
- {
- m2 = (size >= 3 ? up[size-3] : 0); /* third highest limb */
-
- /* collapse nails from m1 and m2 */
-#if GMP_NAIL_BITS != 0
- m1 = (m1 << GMP_NAIL_BITS) | (m2 >> (GMP_NUMB_BITS-GMP_NAIL_BITS));
- m2 <<= 2*GMP_NAIL_BITS;
-#endif
-
- /* lshift to have high of m0:m1 non-zero, collapse nails from m0 */
- rshift = GMP_LIMB_BITS - lshift;
- rmask = (GMP_NAIL_BITS == 0 && lshift == 0 ? 0 : MP_LIMB_T_MAX);
- m0 = (m0 << lshift) | ((m1 >> rshift) & rmask);
- m1 = (m1 << lshift) | ((m2 >> rshift) & rmask);
-
- /* rshift back to have bit 53 of m0:m1 the high non-zero */
- m1 = (m1 >> 11) | (m0 << (GMP_LIMB_BITS-11));
- m0 >>= 11;
- }
-
if (UNLIKELY (exp >= CONST_1024))
{
/* overflow, return infinity */
ieee_infinity:
- m0 = 0;
- m1 = 0;
+ mhi = 0;
+ mlo = 0;
exp = 1024;
}
else if (UNLIKELY (exp <= CONST_NEG_1023))
{
+ int rshift;
+
if (LIKELY (exp <= CONST_NEG_1022_SUB_53))
return 0.0; /* denorm underflows to zero */
rshift = -1022 - exp;
ASSERT (rshift > 0 && rshift < 53);
- if (ONE_LIMB)
- {
- m0 >>= rshift;
- }
- else /* TWO_LIMBS */
+#if GMP_LIMB_BITS > 53
+ mlo >>= rshift;
+ mhi = mlo >> 32;
+#else
+ if (rshift >= 32)
{
- if (rshift >= 32)
- {
- m1 = m0;
- m0 = 0;
- rshift -= 32;
- }
- lshift = GMP_LIMB_BITS - rshift;
- m1 = (m1 >> rshift) | (rshift == 0 ? 0 : m0 << lshift);
- m0 >>= rshift;
+ mlo = mhi;
+ mhi = 0;
+ rshift -= 32;
}
- exp = -1023;
- }
-
- if (ONE_LIMB)
- {
-#if GMP_LIMB_BITS > 32 /* avoid compiler warning about big shift */
- u.s.manh = m0 >> 32;
+ lshift = GMP_LIMB_BITS - rshift;
+ mlo = (mlo >> rshift) | (rshift == 0 ? 0 : mhi << lshift);
+ mhi >>= rshift;
#endif
- u.s.manl = m0;
- }
- else /* TWO_LIMBS */
- {
- u.s.manh = m0;
- u.s.manl = m1;
+ exp = -1023;
}
-
+ u.s.manh = mhi;
+ u.s.manl = mlo;
u.s.exp = exp + 1023;
u.s.sig = (sign < 0);
return u.d;
}
- else
+#define FORMAT_RECOGNIZED 1
+#endif
+
+#if HAVE_DOUBLE_VAX_D
{
- /* Non-IEEE or strange limb size, do something generic. */
-
- mp_size_t i;
- mp_limb_t limb, bit;
- int shift;
- double base, factor, prev_factor, d, new_d, diff;
-
- /* "limb" is "up[i]" the limb being examined, "bit" is a mask for the
- bit being examined, initially the highest non-zero bit. */
- i = size-1;
- limb = up[i];
- count_leading_zeros (shift, limb);
- bit = GMP_LIMB_HIGHBIT >> shift;
-
- /* relative to just under high non-zero bit */
- exp -= (shift - GMP_NAIL_BITS) + 1;
-
- /* Power up "factor" to 2^exp, being the value of the "bit" in "limb"
- being examined. */
- base = (exp >= 0 ? 2.0 : 0.5);
- exp = ABS (exp);
- factor = 1.0;
- for (;;)
+ union double_extract u;
+
+ up += size;
+
+ mhi = up[-1];
+
+ count_leading_zeros (lshift, mhi);
+ exp -= lshift;
+ mhi <<= lshift;
+
+ mlo = 0;
+ if (size > 1)
{
- if (exp & 1)
+ mlo = up[-2];
+ if (lshift != 0)
+ mhi += mlo >> (GMP_LIMB_BITS - lshift);
+ mlo <<= lshift;
+
+ if (size > 2 && lshift > 8)
{
- prev_factor = factor;
- factor *= base;
- FORCE_DOUBLE (factor);
- if (factor == 0.0)
- return 0.0; /* underflow */
- if (factor == prev_factor)
- {
- d = factor; /* overflow, apparent infinity */
- goto generic_done;
- }
+ x = up[-3];
+ mlo += x >> (GMP_LIMB_BITS - lshift);
}
- exp >>= 1;
- if (exp == 0)
- break;
- base *= base;
}
- /* Add a "factor" for each non-zero bit, working from high to low.
- Stop if any rounding occurs, hence implementing a truncation.
+ if (UNLIKELY (exp >= 128))
+ {
+ /* overflow, return maximum number */
+ mhi = 0xffffffff;
+ mlo = 0xffffffff;
+ exp = 127;
+ }
+ else if (UNLIKELY (exp < -128))
+ {
+ return 0.0; /* underflows to zero */
+ }
- Note no attention is paid to DBL_MANT_DIG, since the effective
- number of bits in the mantissa isn't constant when in denorm range.
- We also encountered an ARM system with apparently somewhat doubtful
- software floats where DBL_MANT_DIG claimed 53 bits but only 32
- actually worked. */
+ u.s.man3 = mhi >> 24; /* drop msb, since implicit */
+ u.s.man2 = mhi >> 8;
+ u.s.man1 = (mhi << 8) + (mlo >> 24);
+ u.s.man0 = mlo >> 8;
+ u.s.exp = exp + 128;
+ u.s.sig = sign < 0;
+ return u.d;
+ }
+#define FORMAT_RECOGNIZED 1
+#endif
- d = factor; /* high bit */
- for (;;)
+#if ! FORMAT_RECOGNIZED
+ { /* Non-IEEE or strange limb size, do something generic. */
+ mp_size_t i;
+ double d, weight;
+ unsigned long uexp;
+
+ /* First generate an fp number disregarding exp, instead keeping things
+ within the numb base factor from 1, which should prevent overflow and
+ underflow even for the most exponent limited fp formats. The
+ termination criteria should be refined, since we now include too many
+ limbs. */
+ weight = 1/MP_BASE_AS_DOUBLE;
+ d = up[size - 1];
+ for (i = size - 2; i >= 0; i--)
{
- factor *= 0.5; /* next bit */
- bit >>= 1;
- if (bit == 0)
- {
- /* next limb, if any */
- i--;
- if (i < 0)
- break;
- limb = up[i];
- bit = GMP_NUMB_HIGHBIT;
- }
+ d += up[i] * weight;
+ weight /= MP_BASE_AS_DOUBLE;
+ if (weight == 0)
+ break;
+ }
- if (bit & limb)
- {
- new_d = d + factor;
- FORCE_DOUBLE (new_d);
- diff = new_d - d;
- if (diff != factor)
- break; /* rounding occured, stop now */
- d = new_d;
- }
+ /* Now apply exp. */
+ exp -= GMP_NUMB_BITS;
+ if (exp > 0)
+ {
+ weight = 2.0;
+ uexp = exp;
+ }
+ else
+ {
+ weight = 0.5;
+ uexp = 1 - (unsigned long) (exp + 1);
}
+#if 1
+ /* Square-and-multiply exponentiation. */
+ if (uexp & 1)
+ d *= weight;
+ while (uexp >>= 1)
+ {
+ weight *= weight;
+ if (uexp & 1)
+ d *= weight;
+ }
+#else
+ /* Plain exponentiation. */
+ while (uexp > 0)
+ {
+ d *= weight;
+ uexp--;
+ }
+#endif
- generic_done:
- return (sign >= 0 ? d : -d);
+ return sign >= 0 ? d : -d;
}
#endif
}
GNU MP RELEASE.
Copyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2006, 2007,
-2008 Free Software Foundation, Inc.
+2008, 2011, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
\f
/* There are no leading zeros on the digits generated at str, but that's not
- currently a documented feature. */
+ currently a documented feature. The current mpz_out_str and mpz_get_str
+ rely on it. */
size_t
mpn_get_str (unsigned char *str, int base, mp_ptr up, mp_size_t un)
mp_size_t n_pows, xn, pn, exptab[GMP_LIMB_BITS], bexp;
mp_limb_t cy;
mp_size_t shift;
+ size_t ndig;
+
+ DIGITS_IN_BASE_PER_LIMB (ndig, un, base);
+ xn = 1 + ndig / mp_bases[base].chars_per_limb; /* FIXME: scalar integer division */
n_pows = 0;
- xn = 1 + un*(mp_bases[base].chars_per_bit_exactly*GMP_NUMB_BITS)/mp_bases[base].chars_per_limb;
for (pn = xn; pn != 1; pn = (pn + 1) >> 1)
{
exptab[n_pows] = pn;
SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2003, 2004, 2005, 2008 Free Software Foundation, Inc.
+Copyright 2003, 2004, 2005, 2008, 2011, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#include "gmp-impl.h"
#include "longlong.h"
-/* For input of size n, matrix elements are of size at most ceil(n/2)
- - 1, but we need two limbs extra. */
-void
-mpn_hgcd_matrix_init (struct hgcd_matrix *M, mp_size_t n, mp_ptr p)
-{
- mp_size_t s = (n+1)/2 + 1;
- M->alloc = s;
- M->n = 1;
- MPN_ZERO (p, 4 * s);
- M->p[0][0] = p;
- M->p[0][1] = p + s;
- M->p[1][0] = p + 2 * s;
- M->p[1][1] = p + 3 * s;
-
- M->p[0][0][0] = M->p[1][1][0] = 1;
-}
-
-/* Updated column COL, adding in column (1-COL). */
-static void
-hgcd_matrix_update_1 (struct hgcd_matrix *M, unsigned col)
-{
- mp_limb_t c0, c1;
- ASSERT (col < 2);
-
- c0 = mpn_add_n (M->p[0][col], M->p[0][0], M->p[0][1], M->n);
- c1 = mpn_add_n (M->p[1][col], M->p[1][0], M->p[1][1], M->n);
-
- M->p[0][col][M->n] = c0;
- M->p[1][col][M->n] = c1;
-
- M->n += (c0 | c1) != 0;
- ASSERT (M->n < M->alloc);
-}
-
-/* Updated column COL, adding in column Q * (1-COL). Temporary
- * storage: qn + n <= M->alloc, where n is the size of the largest
- * element in column 1 - COL. */
-static void
-hgcd_matrix_update_q (struct hgcd_matrix *M, mp_srcptr qp, mp_size_t qn,
- unsigned col, mp_ptr tp)
-{
- ASSERT (col < 2);
-
- if (qn == 1)
- {
- mp_limb_t q = qp[0];
- mp_limb_t c0, c1;
-
- c0 = mpn_addmul_1 (M->p[0][col], M->p[0][1-col], M->n, q);
- c1 = mpn_addmul_1 (M->p[1][col], M->p[1][1-col], M->n, q);
-
- M->p[0][col][M->n] = c0;
- M->p[1][col][M->n] = c1;
-
- M->n += (c0 | c1) != 0;
- }
- else
- {
- unsigned row;
-
- /* Carries for the unlikely case that we get both high words
- from the multiplication and carries from the addition. */
- mp_limb_t c[2];
- mp_size_t n;
-
- /* The matrix will not necessarily grow in size by qn, so we
- need normalization in order not to overflow M. */
-
- for (n = M->n; n + qn > M->n; n--)
- {
- ASSERT (n > 0);
- if (M->p[0][1-col][n-1] > 0 || M->p[1][1-col][n-1] > 0)
- break;
- }
-
- ASSERT (qn + n <= M->alloc);
-
- for (row = 0; row < 2; row++)
- {
- if (qn <= n)
- mpn_mul (tp, M->p[row][1-col], n, qp, qn);
- else
- mpn_mul (tp, qp, qn, M->p[row][1-col], n);
-
- ASSERT (n + qn >= M->n);
- c[row] = mpn_add (M->p[row][col], tp, n + qn, M->p[row][col], M->n);
- }
- if (c[0] | c[1])
- {
- M->n = n + qn + 1;
- M->p[0][col][M->n - 1] = c[0];
- M->p[1][col][M->n - 1] = c[1];
- }
- else
- {
- n += qn;
- n -= (M->p[0][col][n-1] | M->p[1][col][n-1]) == 0;
- if (n > M->n)
- M->n = n;
- }
- }
-
- ASSERT (M->n < M->alloc);
-}
-
-/* Multiply M by M1 from the right. Since the M1 elements fit in
- GMP_NUMB_BITS - 1 bits, M grows by at most one limb. Needs
- temporary space M->n */
-static void
-hgcd_matrix_mul_1 (struct hgcd_matrix *M, const struct hgcd_matrix1 *M1,
- mp_ptr tp)
-{
- mp_size_t n0, n1;
-
- /* Could avoid copy by some swapping of pointers. */
- MPN_COPY (tp, M->p[0][0], M->n);
- n0 = mpn_hgcd_mul_matrix1_vector (M1, M->p[0][0], tp, M->p[0][1], M->n);
- MPN_COPY (tp, M->p[1][0], M->n);
- n1 = mpn_hgcd_mul_matrix1_vector (M1, M->p[1][0], tp, M->p[1][1], M->n);
-
- /* Depends on zero initialization */
- M->n = MAX(n0, n1);
- ASSERT (M->n < M->alloc);
-}
-
-/* Perform a few steps, using some of mpn_hgcd2, subtraction and
- division. Reduces the size by almost one limb or more, but never
- below the given size s. Return new size for a and b, or 0 if no
- more steps are possible.
-
- If hgcd2 succeds, needs temporary space for hgcd_matrix_mul_1, M->n
- limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2
- fails, needs space for the quotient, qn <= n - s + 1 limbs, for and
- hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=
- resulting size of $.
-
- If N is the input size to the calling hgcd, then s = floor(N/2) +
- 1, M->n < N, qn + matrix size <= n - s + 1 + n - s = 2 (n - s) + 1
- < N, so N is sufficient.
-*/
-
-static mp_size_t
-hgcd_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
- struct hgcd_matrix *M, mp_ptr tp)
-{
- struct hgcd_matrix1 M1;
- mp_limb_t mask;
- mp_limb_t ah, al, bh, bl;
- mp_size_t an, bn, qn;
- int col;
-
- ASSERT (n > s);
-
- mask = ap[n-1] | bp[n-1];
- ASSERT (mask > 0);
-
- if (n == s + 1)
- {
- if (mask < 4)
- goto subtract;
-
- ah = ap[n-1]; al = ap[n-2];
- bh = bp[n-1]; bl = bp[n-2];
- }
- else if (mask & GMP_NUMB_HIGHBIT)
- {
- ah = ap[n-1]; al = ap[n-2];
- bh = bp[n-1]; bl = bp[n-2];
- }
- else
- {
- int shift;
-
- count_leading_zeros (shift, mask);
- ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
- al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
- bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
- bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
- }
-
- /* Try an mpn_hgcd2 step */
- if (mpn_hgcd2 (ah, al, bh, bl, &M1))
- {
- /* Multiply M <- M * M1 */
- hgcd_matrix_mul_1 (M, &M1, tp);
-
- /* Can't swap inputs, so we need to copy. */
- MPN_COPY (tp, ap, n);
- /* Multiply M1^{-1} (a;b) */
- return mpn_hgcd_mul_matrix1_inverse_vector (&M1, ap, tp, bp, n);
- }
-
- subtract:
- /* There are two ways in which mpn_hgcd2 can fail. Either one of ah and
- bh was too small, or ah, bh were (almost) equal. Perform one
- subtraction step (for possible cancellation of high limbs),
- followed by one division. */
-
- /* Since we must ensure that #(a-b) > s, we handle cancellation of
- high limbs explicitly up front. (FIXME: Or is it better to just
- subtract, normalize, and use an addition to undo if it turns out
- the the difference is too small?) */
- for (an = n; an > s; an--)
- if (ap[an-1] != bp[an-1])
- break;
-
- if (an == s)
- return 0;
-
- /* Maintain a > b. When needed, swap a and b, and let col keep track
- of how to update M. */
- if (ap[an-1] > bp[an-1])
- {
- /* a is largest. In the subtraction step, we need to update
- column 1 of M */
- col = 1;
- }
- else
- {
- MP_PTR_SWAP (ap, bp);
- col = 0;
- }
-
- bn = n;
- MPN_NORMALIZE (bp, bn);
- if (bn <= s)
- return 0;
-
- /* We have #a, #b > s. When is it possible that #(a-b) < s? For
- cancellation to happen, the numbers must be of the form
-
- a = x + 1, 0, ..., 0, al
- b = x , GMP_NUMB_MAX, ..., GMP_NUMB_MAX, bl
-
- where al, bl denotes the least significant k limbs. If al < bl,
- then #(a-b) < k, and if also high(al) != 0, high(bl) != GMP_NUMB_MAX,
- then #(a-b) = k. If al >= bl, then #(a-b) = k + 1. */
-
- if (ap[an-1] == bp[an-1] + 1)
- {
- mp_size_t k;
- int c;
- for (k = an-1; k > s; k--)
- if (ap[k-1] != 0 || bp[k-1] != GMP_NUMB_MAX)
- break;
-
- MPN_CMP (c, ap, bp, k);
- if (c < 0)
- {
- mp_limb_t cy;
-
- /* The limbs from k and up are cancelled. */
- if (k == s)
- return 0;
- cy = mpn_sub_n (ap, ap, bp, k);
- ASSERT (cy == 1);
- an = k;
- }
- else
- {
- ASSERT_NOCARRY (mpn_sub_n (ap, ap, bp, k));
- ap[k] = 1;
- an = k + 1;
- }
- }
- else
- ASSERT_NOCARRY (mpn_sub_n (ap, ap, bp, an));
-
- ASSERT (an > s);
- ASSERT (ap[an-1] > 0);
- ASSERT (bn > s);
- ASSERT (bp[bn-1] > 0);
-
- hgcd_matrix_update_1 (M, col);
-
- if (an < bn)
- {
- MPN_PTR_SWAP (ap, an, bp, bn);
- col ^= 1;
- }
- else if (an == bn)
- {
- int c;
- MPN_CMP (c, ap, bp, an);
- if (c < 0)
- {
- MP_PTR_SWAP (ap, bp);
- col ^= 1;
- }
- }
-
- /* Divide a / b. */
- qn = an + 1 - bn;
-
- /* FIXME: We could use an approximate division, that may return a
- too small quotient, and only guarantee that the size of r is
- almost the size of b. FIXME: Let ap and remainder overlap. */
- mpn_tdiv_qr (tp, ap, 0, ap, an, bp, bn);
- qn -= (tp[qn -1] == 0);
-
- /* Normalize remainder */
- an = bn;
- for ( ; an > s; an--)
- if (ap[an-1] > 0)
- break;
-
- if (an <= s)
- {
- /* Quotient is too large */
- mp_limb_t cy;
-
- cy = mpn_add (ap, bp, bn, ap, an);
-
- if (cy > 0)
- {
- ASSERT (bn < n);
- ap[bn] = cy;
- bp[bn] = 0;
- bn++;
- }
-
- MPN_DECR_U (tp, qn, 1);
- qn -= (tp[qn-1] == 0);
- }
-
- if (qn > 0)
- hgcd_matrix_update_q (M, tp, qn, col, tp + qn);
-
- return bn;
-}
-
-/* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
- with elements of size at most (n+1)/2 - 1. Returns new size of a,
- b, or zero if no reduction is possible. */
-mp_size_t
-mpn_hgcd_lehmer (mp_ptr ap, mp_ptr bp, mp_size_t n,
- struct hgcd_matrix *M, mp_ptr tp)
-{
- mp_size_t s = n/2 + 1;
- mp_size_t nn;
-
- ASSERT (n > s);
- ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
-
- nn = hgcd_step (n, ap, bp, s, M, tp);
- if (!nn)
- return 0;
-
- for (;;)
- {
- n = nn;
- ASSERT (n > s);
- nn = hgcd_step (n, ap, bp, s, M, tp);
- if (!nn )
- return n;
- }
-}
-
-/* Multiply M by M1 from the right. Needs 3*(M->n + M1->n) + 5 limbs
- of temporary storage (see mpn_matrix22_mul_itch). */
-void
-mpn_hgcd_matrix_mul (struct hgcd_matrix *M, const struct hgcd_matrix *M1,
- mp_ptr tp)
-{
- mp_size_t n;
-
- /* About the new size of M:s elements. Since M1's diagonal elements
- are > 0, no element can decrease. The new elements are of size
- M->n + M1->n, one limb more or less. The computation of the
- matrix product produces elements of size M->n + M1->n + 1. But
- the true size, after normalization, may be three limbs smaller.
-
- The reason that the product has normalized size >= M->n + M1->n -
- 2 is subtle. It depends on the fact that M and M1 can be factored
- as products of (1,1; 0,1) and (1,0; 1,1), and that we can't have
- M ending with a large power and M1 starting with a large power of
- the same matrix. */
-
- /* FIXME: Strassen multiplication gives only a small speedup. In FFT
- multiplication range, this function could be sped up quite a lot
- using invariance. */
- ASSERT (M->n + M1->n < M->alloc);
-
- ASSERT ((M->p[0][0][M->n-1] | M->p[0][1][M->n-1]
- | M->p[1][0][M->n-1] | M->p[1][1][M->n-1]) > 0);
-
- ASSERT ((M1->p[0][0][M1->n-1] | M1->p[0][1][M1->n-1]
- | M1->p[1][0][M1->n-1] | M1->p[1][1][M1->n-1]) > 0);
-
- mpn_matrix22_mul (M->p[0][0], M->p[0][1],
- M->p[1][0], M->p[1][1], M->n,
- M1->p[0][0], M1->p[0][1],
- M1->p[1][0], M1->p[1][1], M1->n, tp);
-
- /* Index of last potentially non-zero limb, size is one greater. */
- n = M->n + M1->n;
-
- n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
- n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
- n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
-
- ASSERT ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) > 0);
-
- M->n = n + 1;
-}
-
-/* Multiplies the least significant p limbs of (a;b) by M^-1.
- Temporary space needed: 2 * (p + M->n)*/
-mp_size_t
-mpn_hgcd_matrix_adjust (struct hgcd_matrix *M,
- mp_size_t n, mp_ptr ap, mp_ptr bp,
- mp_size_t p, mp_ptr tp)
-{
- /* M^-1 (a;b) = (r11, -r01; -r10, r00) (a ; b)
- = (r11 a - r01 b; - r10 a + r00 b */
-
- mp_ptr t0 = tp;
- mp_ptr t1 = tp + p + M->n;
- mp_limb_t ah, bh;
- mp_limb_t cy;
-
- ASSERT (p + M->n < n);
-
- /* First compute the two values depending on a, before overwriting a */
-
- if (M->n >= p)
- {
- mpn_mul (t0, M->p[1][1], M->n, ap, p);
- mpn_mul (t1, M->p[1][0], M->n, ap, p);
- }
- else
- {
- mpn_mul (t0, ap, p, M->p[1][1], M->n);
- mpn_mul (t1, ap, p, M->p[1][0], M->n);
- }
-
- /* Update a */
- MPN_COPY (ap, t0, p);
- ah = mpn_add (ap + p, ap + p, n - p, t0 + p, M->n);
-
- if (M->n >= p)
- mpn_mul (t0, M->p[0][1], M->n, bp, p);
- else
- mpn_mul (t0, bp, p, M->p[0][1], M->n);
-
- cy = mpn_sub (ap, ap, n, t0, p + M->n);
- ASSERT (cy <= ah);
- ah -= cy;
-
- /* Update b */
- if (M->n >= p)
- mpn_mul (t0, M->p[0][0], M->n, bp, p);
- else
- mpn_mul (t0, bp, p, M->p[0][0], M->n);
-
- MPN_COPY (bp, t0, p);
- bh = mpn_add (bp + p, bp + p, n - p, t0 + p, M->n);
- cy = mpn_sub (bp, bp, n, t1, p + M->n);
- ASSERT (cy <= bh);
- bh -= cy;
-
- if (ah > 0 || bh > 0)
- {
- ap[n] = ah;
- bp[n] = bh;
- n++;
- }
- else
- {
- /* The subtraction can reduce the size by at most one limb. */
- if (ap[n-1] == 0 && bp[n-1] == 0)
- n--;
- }
- ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
- return n;
-}
/* Size analysis for hgcd:
mp_size_t nscaled;
if (BELOW_THRESHOLD (n, HGCD_THRESHOLD))
- return MPN_HGCD_LEHMER_ITCH (n);
+ return n;
/* Get the recursion depth. */
nscaled = (n - 1) / (HGCD_THRESHOLD - 1);
count_leading_zeros (count, nscaled);
k = GMP_LIMB_BITS - count;
- return 20 * ((n+3) / 4) + 22 * k
- + MPN_HGCD_LEHMER_ITCH (HGCD_THRESHOLD);
+ return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;
}
/* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
struct hgcd_matrix *M, mp_ptr tp)
{
mp_size_t s = n/2 + 1;
- mp_size_t n2 = (3*n)/4 + 1;
- mp_size_t p, nn;
+ mp_size_t nn;
int success = 0;
if (n <= s)
ASSERT ((n+1)/2 - 1 < M->alloc);
- if (BELOW_THRESHOLD (n, HGCD_THRESHOLD))
- return mpn_hgcd_lehmer (ap, bp, n, M, tp);
-
- p = n/2;
- nn = mpn_hgcd (ap + p, bp + p, n - p, M, tp);
- if (nn > 0)
- {
- /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
- = 2 (n - 1) */
- n = mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
- success = 1;
- }
- while (n > n2)
+ if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD))
{
- /* Needs n + 1 storage */
- nn = hgcd_step (n, ap, bp, s, M, tp);
- if (!nn)
- return success ? n : 0;
- n = nn;
- success = 1;
- }
+ mp_size_t n2 = (3*n)/4 + 1;
+ mp_size_t p = n/2;
- if (n > s + 2)
- {
- struct hgcd_matrix M1;
- mp_size_t scratch;
+ nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);
+ if (nn)
+ {
+ n = nn;
+ success = 1;
+ }
+
+ /* NOTE: It apppears this loop never runs more than once (at
+ least when not recursing to hgcd_appr). */
+ while (n > n2)
+ {
+ /* Needs n + 1 storage */
+ nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+ if (!nn)
+ return success ? n : 0;
- p = 2*s - n + 1;
- scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
+ n = nn;
+ success = 1;
+ }
- mpn_hgcd_matrix_init(&M1, n - p, tp);
- nn = mpn_hgcd (ap + p, bp + p, n - p, &M1, tp + scratch);
- if (nn > 0)
+ if (n > s + 2)
{
- /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
- ASSERT (M->n + 2 >= M1.n);
+ struct hgcd_matrix M1;
+ mp_size_t scratch;
- /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
- then either q or q + 1 is a correct quotient, and M1 will
- start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
- rules out the case that the size of M * M1 is much
- smaller than the expected M->n + M1->n. */
+ p = 2*s - n + 1;
+ scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
- ASSERT (M->n + M1.n < M->alloc);
+ mpn_hgcd_matrix_init(&M1, n - p, tp);
- /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
- = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
- n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);
+ /* FIXME: Should use hgcd_reduce, but that may require more
+ scratch space, which requires review. */
- /* We need a bound for of M->n + M1.n. Let n be the original
- input size. Then
+ nn = mpn_hgcd (ap + p, bp + p, n - p, &M1, tp + scratch);
+ if (nn > 0)
+ {
+ /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
+ ASSERT (M->n + 2 >= M1.n);
- ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
+ /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
+ then either q or q + 1 is a correct quotient, and M1 will
+ start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
+ rules out the case that the size of M * M1 is much
+ smaller than the expected M->n + M1->n. */
- and it follows that
+ ASSERT (M->n + M1.n < M->alloc);
- M.n + M1.n <= ceil(n/2) + 1
+ /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
+ = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
+ n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);
- Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
- amount of needed scratch space. */
- mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
- success = 1;
+ /* We need a bound for of M->n + M1.n. Let n be the original
+ input size. Then
+
+ ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
+
+ and it follows that
+
+ M.n + M1.n <= ceil(n/2) + 1
+
+ Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
+ amount of needed scratch space. */
+ mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
+ success = 1;
+ }
}
}
- /* This really is the base case */
for (;;)
{
/* Needs s+3 < n */
- nn = hgcd_step (n, ap, bp, s, M, tp);
+ nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
if (!nn)
return success ? n : 0;
SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008 Free Software
+Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008, 2012 Free Software
Foundation, Inc.
This file is part of the GNU MP Library.
/* Reduces a,b until |a-b| (almost) fits in one limb + 1 bit. Constructs
matrix M. Returns 1 if we make progress, i.e. can perform at least
- one subtraction. Otherwise returns zero.. */
+ one subtraction. Otherwise returns zero. */
/* FIXME: Possible optimizations:
for (;;)
{
ASSERT (ah >= bh);
- if (ah == bh)
- break;
ah -= bh;
if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
}
subtract_a1:
ASSERT (bh >= ah);
- if (ah == bh)
- break;
bh -= ah;
if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
n += (ah | bh) > 0;
return n;
}
-
-/* Sets (r;b) = M^{-1}(a;b), with M^{-1} = (u11, -u01; -u10, u00) from
- the left. Uses three buffers, to avoid a copy. */
-mp_size_t
-mpn_hgcd_mul_matrix1_inverse_vector (const struct hgcd_matrix1 *M,
- mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
-{
- mp_limb_t h0, h1;
-
- /* Compute (r;b) <-- (u11 a - u01 b; -u10 a + u00 b) as
-
- r = u11 * a
- r -= u01 * b
- b *= u00
- b -= u10 * a
- */
-
- h0 = mpn_mul_1 (rp, ap, n, M->u[1][1]);
- h1 = mpn_submul_1 (rp, bp, n, M->u[0][1]);
- ASSERT (h0 == h1);
-
- h0 = mpn_mul_1 (bp, bp, n, M->u[0][0]);
- h1 = mpn_submul_1 (bp, ap, n, M->u[1][0]);
- ASSERT (h0 == h1);
-
- n -= (rp[n-1] | bp[n-1]) == 0;
- return n;
-}
--- /dev/null
+/* hgcd2_jacobi.c
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008, 2011 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#if GMP_NAIL_BITS > 0
+#error Nails not supported.
+#endif
+
+/* FIXME: Duplicated in hgcd2.c. Should move to gmp-impl.h, and
+ possibly be renamed. */
+static inline mp_limb_t
+div1 (mp_ptr rp,
+ mp_limb_t n0,
+ mp_limb_t d0)
+{
+ mp_limb_t q = 0;
+
+ if ((mp_limb_signed_t) n0 < 0)
+ {
+ int cnt;
+ for (cnt = 1; (mp_limb_signed_t) d0 >= 0; cnt++)
+ {
+ d0 = d0 << 1;
+ }
+
+ q = 0;
+ while (cnt)
+ {
+ q <<= 1;
+ if (n0 >= d0)
+ {
+ n0 = n0 - d0;
+ q |= 1;
+ }
+ d0 = d0 >> 1;
+ cnt--;
+ }
+ }
+ else
+ {
+ int cnt;
+ for (cnt = 0; n0 >= d0; cnt++)
+ {
+ d0 = d0 << 1;
+ }
+
+ q = 0;
+ while (cnt)
+ {
+ d0 = d0 >> 1;
+ q <<= 1;
+ if (n0 >= d0)
+ {
+ n0 = n0 - d0;
+ q |= 1;
+ }
+ cnt--;
+ }
+ }
+ *rp = n0;
+ return q;
+}
+
+/* Two-limb division optimized for small quotients. */
+static inline mp_limb_t
+div2 (mp_ptr rp,
+ mp_limb_t nh, mp_limb_t nl,
+ mp_limb_t dh, mp_limb_t dl)
+{
+ mp_limb_t q = 0;
+
+ if ((mp_limb_signed_t) nh < 0)
+ {
+ int cnt;
+ for (cnt = 1; (mp_limb_signed_t) dh >= 0; cnt++)
+ {
+ dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
+ dl = dl << 1;
+ }
+
+ while (cnt)
+ {
+ q <<= 1;
+ if (nh > dh || (nh == dh && nl >= dl))
+ {
+ sub_ddmmss (nh, nl, nh, nl, dh, dl);
+ q |= 1;
+ }
+ dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
+ dh = dh >> 1;
+ cnt--;
+ }
+ }
+ else
+ {
+ int cnt;
+ for (cnt = 0; nh > dh || (nh == dh && nl >= dl); cnt++)
+ {
+ dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
+ dl = dl << 1;
+ }
+
+ while (cnt)
+ {
+ dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
+ dh = dh >> 1;
+ q <<= 1;
+ if (nh > dh || (nh == dh && nl >= dl))
+ {
+ sub_ddmmss (nh, nl, nh, nl, dh, dl);
+ q |= 1;
+ }
+ cnt--;
+ }
+ }
+
+ rp[0] = nl;
+ rp[1] = nh;
+
+ return q;
+}
+
+int
+mpn_hgcd2_jacobi (mp_limb_t ah, mp_limb_t al, mp_limb_t bh, mp_limb_t bl,
+ struct hgcd_matrix1 *M, unsigned *bitsp)
+{
+ mp_limb_t u00, u01, u10, u11;
+ unsigned bits = *bitsp;
+
+ if (ah < 2 || bh < 2)
+ return 0;
+
+ if (ah > bh || (ah == bh && al > bl))
+ {
+ sub_ddmmss (ah, al, ah, al, bh, bl);
+ if (ah < 2)
+ return 0;
+
+ u00 = u01 = u11 = 1;
+ u10 = 0;
+ bits = mpn_jacobi_update (bits, 1, 1);
+ }
+ else
+ {
+ sub_ddmmss (bh, bl, bh, bl, ah, al);
+ if (bh < 2)
+ return 0;
+
+ u00 = u10 = u11 = 1;
+ u01 = 0;
+ bits = mpn_jacobi_update (bits, 0, 1);
+ }
+
+ if (ah < bh)
+ goto subtract_a;
+
+ for (;;)
+ {
+ ASSERT (ah >= bh);
+ if (ah == bh)
+ goto done;
+
+ if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
+ {
+ ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
+ bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
+
+ break;
+ }
+
+ /* Subtract a -= q b, and multiply M from the right by (1 q ; 0
+ 1), affecting the second column of M. */
+ ASSERT (ah > bh);
+ sub_ddmmss (ah, al, ah, al, bh, bl);
+
+ if (ah < 2)
+ goto done;
+
+ if (ah <= bh)
+ {
+ /* Use q = 1 */
+ u01 += u00;
+ u11 += u10;
+ bits = mpn_jacobi_update (bits, 1, 1);
+ }
+ else
+ {
+ mp_limb_t r[2];
+ mp_limb_t q = div2 (r, ah, al, bh, bl);
+ al = r[0]; ah = r[1];
+ if (ah < 2)
+ {
+ /* A is too small, but q is correct. */
+ u01 += q * u00;
+ u11 += q * u10;
+ bits = mpn_jacobi_update (bits, 1, q & 3);
+ goto done;
+ }
+ q++;
+ u01 += q * u00;
+ u11 += q * u10;
+ bits = mpn_jacobi_update (bits, 1, q & 3);
+ }
+ subtract_a:
+ ASSERT (bh >= ah);
+ if (ah == bh)
+ goto done;
+
+ if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2)))
+ {
+ ah = (ah << (GMP_LIMB_BITS / 2) ) + (al >> (GMP_LIMB_BITS / 2));
+ bh = (bh << (GMP_LIMB_BITS / 2) ) + (bl >> (GMP_LIMB_BITS / 2));
+
+ goto subtract_a1;
+ }
+
+ /* Subtract b -= q a, and multiply M from the right by (1 0 ; q
+ 1), affecting the first column of M. */
+ sub_ddmmss (bh, bl, bh, bl, ah, al);
+
+ if (bh < 2)
+ goto done;
+
+ if (bh <= ah)
+ {
+ /* Use q = 1 */
+ u00 += u01;
+ u10 += u11;
+ bits = mpn_jacobi_update (bits, 0, 1);
+ }
+ else
+ {
+ mp_limb_t r[2];
+ mp_limb_t q = div2 (r, bh, bl, ah, al);
+ bl = r[0]; bh = r[1];
+ if (bh < 2)
+ {
+ /* B is too small, but q is correct. */
+ u00 += q * u01;
+ u10 += q * u11;
+ bits = mpn_jacobi_update (bits, 0, q & 3);
+ goto done;
+ }
+ q++;
+ u00 += q * u01;
+ u10 += q * u11;
+ bits = mpn_jacobi_update (bits, 0, q & 3);
+ }
+ }
+
+ /* NOTE: Since we discard the least significant half limb, we don't
+ get a truly maximal M (corresponding to |a - b| <
+ 2^{GMP_LIMB_BITS +1}). */
+ /* Single precision loop */
+ for (;;)
+ {
+ ASSERT (ah >= bh);
+ if (ah == bh)
+ break;
+
+ ah -= bh;
+ if (ah < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
+ break;
+
+ if (ah <= bh)
+ {
+ /* Use q = 1 */
+ u01 += u00;
+ u11 += u10;
+ bits = mpn_jacobi_update (bits, 1, 1);
+ }
+ else
+ {
+ mp_limb_t r;
+ mp_limb_t q = div1 (&r, ah, bh);
+ ah = r;
+ if (ah < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
+ {
+ /* A is too small, but q is correct. */
+ u01 += q * u00;
+ u11 += q * u10;
+ bits = mpn_jacobi_update (bits, 1, q & 3);
+ break;
+ }
+ q++;
+ u01 += q * u00;
+ u11 += q * u10;
+ bits = mpn_jacobi_update (bits, 1, q & 3);
+ }
+ subtract_a1:
+ ASSERT (bh >= ah);
+ if (ah == bh)
+ break;
+
+ bh -= ah;
+ if (bh < (CNST_LIMB (1) << (GMP_LIMB_BITS / 2 + 1)))
+ break;
+
+ if (bh <= ah)
+ {
+ /* Use q = 1 */
+ u00 += u01;
+ u10 += u11;
+ bits = mpn_jacobi_update (bits, 0, 1);
+ }
+ else
+ {
+ mp_limb_t r;
+ mp_limb_t q = div1 (&r, bh, ah);
+ bh = r;
+ if (bh < (CNST_LIMB(1) << (GMP_LIMB_BITS / 2 + 1)))
+ {
+ /* B is too small, but q is correct. */
+ u00 += q * u01;
+ u10 += q * u11;
+ bits = mpn_jacobi_update (bits, 0, q & 3);
+ break;
+ }
+ q++;
+ u00 += q * u01;
+ u10 += q * u11;
+ bits = mpn_jacobi_update (bits, 0, q & 3);
+ }
+ }
+
+ done:
+ M->u[0][0] = u00; M->u[0][1] = u01;
+ M->u[1][0] = u10; M->u[1][1] = u11;
+ *bitsp = bits;
+
+ return 1;
+}
--- /dev/null
+/* hgcd_appr.c.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Identical to mpn_hgcd_itch. FIXME: Do we really need to add
+ HGCD_THRESHOLD at the end? */
+mp_size_t
+mpn_hgcd_appr_itch (mp_size_t n)
+{
+ if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
+ return n;
+ else
+ {
+ unsigned k;
+ int count;
+ mp_size_t nscaled;
+
+ /* Get the recursion depth. */
+ nscaled = (n - 1) / (HGCD_APPR_THRESHOLD - 1);
+ count_leading_zeros (count, nscaled);
+ k = GMP_LIMB_BITS - count;
+
+ return 20 * ((n+3) / 4) + 22 * k + HGCD_THRESHOLD;
+ }
+}
+
+/* Destroys inputs. */
+int
+mpn_hgcd_appr (mp_ptr ap, mp_ptr bp, mp_size_t n,
+ struct hgcd_matrix *M, mp_ptr tp)
+{
+ mp_size_t s;
+ int success = 0;
+
+ ASSERT (n > 0);
+
+ ASSERT ((ap[n-1] | bp[n-1]) != 0);
+
+ if (n <= 2)
+ /* Implies s = n. A fairly uninteresting case but exercised by the
+ random inputs of the testsuite. */
+ return 0;
+
+ ASSERT ((n+1)/2 - 1 < M->alloc);
+
+ /* We aim for reduction of to GMP_NUMB_BITS * s bits. But each time
+ we discard some of the least significant limbs, we must keep one
+ additional bit to account for the truncation error. We maintain
+ the GMP_NUMB_BITS * s - extra_bits as the current target size. */
+
+ s = n/2 + 1;
+ if (BELOW_THRESHOLD (n, HGCD_APPR_THRESHOLD))
+ {
+ unsigned extra_bits = 0;
+
+ while (n > 2)
+ {
+ mp_size_t nn;
+
+ ASSERT (n > s);
+ ASSERT (n <= 2*s);
+
+ nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+ if (!nn)
+ break;
+
+ n = nn;
+ success = 1;
+
+ /* We can truncate and discard the lower p bits whenever nbits <=
+ 2*sbits - p. To account for the truncation error, we must
+ adjust
+
+ sbits <-- sbits + 1 - p,
+
+ rather than just sbits <-- sbits - p. This adjustment makes
+ the produced matrix sligthly smaller than it could be. */
+
+ if (GMP_NUMB_BITS * (n + 1) + 2 * extra_bits <= 2*GMP_NUMB_BITS * s)
+ {
+ mp_size_t p = (GMP_NUMB_BITS * (2*s - n) - 2*extra_bits) / GMP_NUMB_BITS;
+
+ if (extra_bits == 0)
+ {
+ /* We cross a limb boundary and bump s. We can't do that
+ if the result is that it makes makes min(U, V)
+ smaller than 2^{GMP_NUMB_BITS} s. */
+ if (s + 1 == n
+ || mpn_zero_p (ap + s + 1, n - s - 1)
+ || mpn_zero_p (bp + s + 1, n - s - 1))
+ continue;
+
+ extra_bits = GMP_NUMB_BITS - 1;
+ s++;
+ }
+ else
+ {
+ extra_bits--;
+ }
+
+ /* Drop the p least significant limbs */
+ ap += p; bp += p; n -= p; s -= p;
+ }
+ }
+
+ ASSERT (s > 0);
+
+ if (extra_bits > 0)
+ {
+ /* We can get here only of we have dropped at least one of the
+ least significant bits, so we can decrement ap and bp. We can
+ then shift left extra bits using mpn_shiftr. */
+ /* NOTE: In the unlikely case that n is large, it would be
+ preferable to do an initial subdiv step to reduce the size
+ before shifting, but that would mean daplicating
+ mpn_gcd_subdiv_step with a bit count rather than a limb
+ count. */
+ ap--; bp--;
+ ap[0] = mpn_rshift (ap+1, ap+1, n, GMP_NUMB_BITS - extra_bits);
+ bp[0] = mpn_rshift (bp+1, bp+1, n, GMP_NUMB_BITS - extra_bits);
+ n += (ap[n] | bp[n]) > 0;
+
+ ASSERT (success);
+
+ while (n > 2)
+ {
+ mp_size_t nn;
+
+ ASSERT (n > s);
+ ASSERT (n <= 2*s);
+
+ nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+
+ if (!nn)
+ return 1;
+
+ n = nn;
+ }
+ }
+
+ if (n == 2)
+ {
+ struct hgcd_matrix1 M1;
+ ASSERT (s == 1);
+
+ if (mpn_hgcd2 (ap[1], ap[0], bp[1], bp[0], &M1))
+ {
+ /* Multiply M <- M * M1 */
+ mpn_hgcd_matrix_mul_1 (M, &M1, tp);
+ success = 1;
+ }
+ }
+ return success;
+ }
+ else
+ {
+ mp_size_t n2 = (3*n)/4 + 1;
+ mp_size_t p = n/2;
+ mp_size_t nn;
+
+ nn = mpn_hgcd_reduce (M, ap, bp, n, p, tp);
+ if (nn)
+ {
+ n = nn;
+ /* FIXME: Discard some of the low limbs immediately? */
+ success = 1;
+ }
+
+ while (n > n2)
+ {
+ mp_size_t nn;
+
+ /* Needs n + 1 storage */
+ nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+ if (!nn)
+ return success;
+
+ n = nn;
+ success = 1;
+ }
+ if (n > s + 2)
+ {
+ struct hgcd_matrix M1;
+ mp_size_t scratch;
+
+ p = 2*s - n + 1;
+ scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
+
+ mpn_hgcd_matrix_init(&M1, n - p, tp);
+ if (mpn_hgcd_appr (ap + p, bp + p, n - p, &M1, tp + scratch))
+ {
+ /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
+ ASSERT (M->n + 2 >= M1.n);
+
+ /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
+ then either q or q + 1 is a correct quotient, and M1 will
+ start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
+ rules out the case that the size of M * M1 is much
+ smaller than the expected M->n + M1->n. */
+
+ ASSERT (M->n + M1.n < M->alloc);
+
+ /* We need a bound for of M->n + M1.n. Let n be the original
+ input size. Then
+
+ ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
+
+ and it follows that
+
+ M.n + M1.n <= ceil(n/2) + 1
+
+ Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
+ amount of needed scratch space. */
+ mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
+ return 1;
+ }
+ }
+
+ for(;;)
+ {
+ mp_size_t nn;
+
+ ASSERT (n > s);
+ ASSERT (n <= 2*s);
+
+ nn = mpn_hgcd_step (n, ap, bp, s, M, tp);
+
+ if (!nn)
+ return success;
+
+ n = nn;
+ success = 1;
+ }
+ }
+}
--- /dev/null
+/* hgcd_jacobi.c.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003, 2004, 2005, 2008, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* This file is almost a copy of hgcd.c, with some added calls to
+ mpn_jacobi_update */
+
+struct hgcd_jacobi_ctx
+{
+ struct hgcd_matrix *M;
+ unsigned *bitsp;
+};
+
+static void
+hgcd_jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,
+ mp_srcptr qp, mp_size_t qn, int d)
+{
+ ASSERT (!gp);
+ ASSERT (d >= 0);
+
+ MPN_NORMALIZE (qp, qn);
+ if (qn > 0)
+ {
+ struct hgcd_jacobi_ctx *ctx = (struct hgcd_jacobi_ctx *) p;
+ /* NOTES: This is a bit ugly. A tp area is passed to
+ gcd_subdiv_step, which stores q at the start of that area. We
+ now use the rest. */
+ mp_ptr tp = (mp_ptr) qp + qn;
+
+ mpn_hgcd_matrix_update_q (ctx->M, qp, qn, d, tp);
+ *ctx->bitsp = mpn_jacobi_update (*ctx->bitsp, d, qp[0] & 3);
+ }
+}
+
+/* Perform a few steps, using some of mpn_hgcd2, subtraction and
+ division. Reduces the size by almost one limb or more, but never
+ below the given size s. Return new size for a and b, or 0 if no
+ more steps are possible.
+
+ If hgcd2 succeds, needs temporary space for hgcd_matrix_mul_1, M->n
+ limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2
+ fails, needs space for the quotient, qn <= n - s + 1 limbs, for and
+ hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=
+ resulting size of M.
+
+ If N is the input size to the calling hgcd, then s = floor(N/2) +
+ 1, M->n < N, qn + matrix size <= n - s + 1 + n - s = 2 (n - s) + 1
+ < N, so N is sufficient.
+*/
+
+static mp_size_t
+hgcd_jacobi_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
+ struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp)
+{
+ struct hgcd_matrix1 M1;
+ mp_limb_t mask;
+ mp_limb_t ah, al, bh, bl;
+
+ ASSERT (n > s);
+
+ mask = ap[n-1] | bp[n-1];
+ ASSERT (mask > 0);
+
+ if (n == s + 1)
+ {
+ if (mask < 4)
+ goto subtract;
+
+ ah = ap[n-1]; al = ap[n-2];
+ bh = bp[n-1]; bl = bp[n-2];
+ }
+ else if (mask & GMP_NUMB_HIGHBIT)
+ {
+ ah = ap[n-1]; al = ap[n-2];
+ bh = bp[n-1]; bl = bp[n-2];
+ }
+ else
+ {
+ int shift;
+
+ count_leading_zeros (shift, mask);
+ ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
+ al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
+ bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
+ bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
+ }
+
+ /* Try an mpn_hgcd2 step */
+ if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M1, bitsp))
+ {
+ /* Multiply M <- M * M1 */
+ mpn_hgcd_matrix_mul_1 (M, &M1, tp);
+
+ /* Can't swap inputs, so we need to copy. */
+ MPN_COPY (tp, ap, n);
+ /* Multiply M1^{-1} (a;b) */
+ return mpn_matrix22_mul1_inverse_vector (&M1, ap, tp, bp, n);
+ }
+
+ subtract:
+ {
+ struct hgcd_jacobi_ctx ctx;
+ ctx.M = M;
+ ctx.bitsp = bitsp;
+
+ return mpn_gcd_subdiv_step (ap, bp, n, s, hgcd_jacobi_hook, &ctx, tp);
+ }
+}
+
+/* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
+ with elements of size at most (n+1)/2 - 1. Returns new size of a,
+ b, or zero if no reduction is possible. */
+
+/* Same scratch requirements as for mpn_hgcd. */
+mp_size_t
+mpn_hgcd_jacobi (mp_ptr ap, mp_ptr bp, mp_size_t n,
+ struct hgcd_matrix *M, unsigned *bitsp, mp_ptr tp)
+{
+ mp_size_t s = n/2 + 1;
+
+ mp_size_t nn;
+ int success = 0;
+
+ if (n <= s)
+ /* Happens when n <= 2, a fairly uninteresting case but exercised
+ by the random inputs of the testsuite. */
+ return 0;
+
+ ASSERT ((ap[n-1] | bp[n-1]) > 0);
+
+ ASSERT ((n+1)/2 - 1 < M->alloc);
+
+ if (ABOVE_THRESHOLD (n, HGCD_THRESHOLD))
+ {
+ mp_size_t n2 = (3*n)/4 + 1;
+ mp_size_t p = n/2;
+
+ nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, M, bitsp, tp);
+ if (nn > 0)
+ {
+ /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
+ = 2 (n - 1) */
+ n = mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
+ success = 1;
+ }
+ while (n > n2)
+ {
+ /* Needs n + 1 storage */
+ nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp);
+ if (!nn)
+ return success ? n : 0;
+ n = nn;
+ success = 1;
+ }
+
+ if (n > s + 2)
+ {
+ struct hgcd_matrix M1;
+ mp_size_t scratch;
+
+ p = 2*s - n + 1;
+ scratch = MPN_HGCD_MATRIX_INIT_ITCH (n-p);
+
+ mpn_hgcd_matrix_init(&M1, n - p, tp);
+ nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M1, bitsp, tp + scratch);
+ if (nn > 0)
+ {
+ /* We always have max(M) > 2^{-(GMP_NUMB_BITS + 1)} max(M1) */
+ ASSERT (M->n + 2 >= M1.n);
+
+ /* Furthermore, assume M ends with a quotient (1, q; 0, 1),
+ then either q or q + 1 is a correct quotient, and M1 will
+ start with either (1, 0; 1, 1) or (2, 1; 1, 1). This
+ rules out the case that the size of M * M1 is much
+ smaller than the expected M->n + M1->n. */
+
+ ASSERT (M->n + M1.n < M->alloc);
+
+ /* Needs 2 (p + M->n) <= 2 (2*s - n2 + 1 + n2 - s - 1)
+ = 2*s <= 2*(floor(n/2) + 1) <= n + 2. */
+ n = mpn_hgcd_matrix_adjust (&M1, p + nn, ap, bp, p, tp + scratch);
+
+ /* We need a bound for of M->n + M1.n. Let n be the original
+ input size. Then
+
+ ceil(n/2) - 1 >= size of product >= M.n + M1.n - 2
+
+ and it follows that
+
+ M.n + M1.n <= ceil(n/2) + 1
+
+ Then 3*(M.n + M1.n) + 5 <= 3 * ceil(n/2) + 8 is the
+ amount of needed scratch space. */
+ mpn_hgcd_matrix_mul (M, &M1, tp + scratch);
+ success = 1;
+ }
+ }
+ }
+
+ for (;;)
+ {
+ /* Needs s+3 < n */
+ nn = hgcd_jacobi_step (n, ap, bp, s, M, bitsp, tp);
+ if (!nn)
+ return success ? n : 0;
+
+ n = nn;
+ success = 1;
+ }
+}
--- /dev/null
+/* hgcd_matrix.c.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003, 2004, 2005, 2008, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* For input of size n, matrix elements are of size at most ceil(n/2)
+ - 1, but we need two limbs extra. */
+void
+mpn_hgcd_matrix_init (struct hgcd_matrix *M, mp_size_t n, mp_ptr p)
+{
+ mp_size_t s = (n+1)/2 + 1;
+ M->alloc = s;
+ M->n = 1;
+ MPN_ZERO (p, 4 * s);
+ M->p[0][0] = p;
+ M->p[0][1] = p + s;
+ M->p[1][0] = p + 2 * s;
+ M->p[1][1] = p + 3 * s;
+
+ M->p[0][0][0] = M->p[1][1][0] = 1;
+}
+
+/* Update column COL, adding in Q * column (1-COL). Temporary storage:
+ * qn + n <= M->alloc, where n is the size of the largest element in
+ * column 1 - COL. */
+void
+mpn_hgcd_matrix_update_q (struct hgcd_matrix *M, mp_srcptr qp, mp_size_t qn,
+ unsigned col, mp_ptr tp)
+{
+ ASSERT (col < 2);
+
+ if (qn == 1)
+ {
+ mp_limb_t q = qp[0];
+ mp_limb_t c0, c1;
+
+ c0 = mpn_addmul_1 (M->p[0][col], M->p[0][1-col], M->n, q);
+ c1 = mpn_addmul_1 (M->p[1][col], M->p[1][1-col], M->n, q);
+
+ M->p[0][col][M->n] = c0;
+ M->p[1][col][M->n] = c1;
+
+ M->n += (c0 | c1) != 0;
+ }
+ else
+ {
+ unsigned row;
+
+ /* Carries for the unlikely case that we get both high words
+ from the multiplication and carries from the addition. */
+ mp_limb_t c[2];
+ mp_size_t n;
+
+ /* The matrix will not necessarily grow in size by qn, so we
+ need normalization in order not to overflow M. */
+
+ for (n = M->n; n + qn > M->n; n--)
+ {
+ ASSERT (n > 0);
+ if (M->p[0][1-col][n-1] > 0 || M->p[1][1-col][n-1] > 0)
+ break;
+ }
+
+ ASSERT (qn + n <= M->alloc);
+
+ for (row = 0; row < 2; row++)
+ {
+ if (qn <= n)
+ mpn_mul (tp, M->p[row][1-col], n, qp, qn);
+ else
+ mpn_mul (tp, qp, qn, M->p[row][1-col], n);
+
+ ASSERT (n + qn >= M->n);
+ c[row] = mpn_add (M->p[row][col], tp, n + qn, M->p[row][col], M->n);
+ }
+
+ n += qn;
+
+ if (c[0] | c[1])
+ {
+ M->p[0][col][n] = c[0];
+ M->p[1][col][n] = c[1];
+ n++;
+ }
+ else
+ {
+ n -= (M->p[0][col][n-1] | M->p[1][col][n-1]) == 0;
+ ASSERT (n >= M->n);
+ }
+ M->n = n;
+ }
+
+ ASSERT (M->n < M->alloc);
+}
+
+/* Multiply M by M1 from the right. Since the M1 elements fit in
+ GMP_NUMB_BITS - 1 bits, M grows by at most one limb. Needs
+ temporary space M->n */
+void
+mpn_hgcd_matrix_mul_1 (struct hgcd_matrix *M, const struct hgcd_matrix1 *M1,
+ mp_ptr tp)
+{
+ mp_size_t n0, n1;
+
+ /* Could avoid copy by some swapping of pointers. */
+ MPN_COPY (tp, M->p[0][0], M->n);
+ n0 = mpn_hgcd_mul_matrix1_vector (M1, M->p[0][0], tp, M->p[0][1], M->n);
+ MPN_COPY (tp, M->p[1][0], M->n);
+ n1 = mpn_hgcd_mul_matrix1_vector (M1, M->p[1][0], tp, M->p[1][1], M->n);
+
+ /* Depends on zero initialization */
+ M->n = MAX(n0, n1);
+ ASSERT (M->n < M->alloc);
+}
+
+/* Multiply M by M1 from the right. Needs 3*(M->n + M1->n) + 5 limbs
+ of temporary storage (see mpn_matrix22_mul_itch). */
+void
+mpn_hgcd_matrix_mul (struct hgcd_matrix *M, const struct hgcd_matrix *M1,
+ mp_ptr tp)
+{
+ mp_size_t n;
+
+ /* About the new size of M:s elements. Since M1's diagonal elements
+ are > 0, no element can decrease. The new elements are of size
+ M->n + M1->n, one limb more or less. The computation of the
+ matrix product produces elements of size M->n + M1->n + 1. But
+ the true size, after normalization, may be three limbs smaller.
+
+ The reason that the product has normalized size >= M->n + M1->n -
+ 2 is subtle. It depends on the fact that M and M1 can be factored
+ as products of (1,1; 0,1) and (1,0; 1,1), and that we can't have
+ M ending with a large power and M1 starting with a large power of
+ the same matrix. */
+
+ /* FIXME: Strassen multiplication gives only a small speedup. In FFT
+ multiplication range, this function could be sped up quite a lot
+ using invariance. */
+ ASSERT (M->n + M1->n < M->alloc);
+
+ ASSERT ((M->p[0][0][M->n-1] | M->p[0][1][M->n-1]
+ | M->p[1][0][M->n-1] | M->p[1][1][M->n-1]) > 0);
+
+ ASSERT ((M1->p[0][0][M1->n-1] | M1->p[0][1][M1->n-1]
+ | M1->p[1][0][M1->n-1] | M1->p[1][1][M1->n-1]) > 0);
+
+ mpn_matrix22_mul (M->p[0][0], M->p[0][1],
+ M->p[1][0], M->p[1][1], M->n,
+ M1->p[0][0], M1->p[0][1],
+ M1->p[1][0], M1->p[1][1], M1->n, tp);
+
+ /* Index of last potentially non-zero limb, size is one greater. */
+ n = M->n + M1->n;
+
+ n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
+ n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
+ n -= ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) == 0);
+
+ ASSERT ((M->p[0][0][n] | M->p[0][1][n] | M->p[1][0][n] | M->p[1][1][n]) > 0);
+
+ M->n = n + 1;
+}
+
+/* Multiplies the least significant p limbs of (a;b) by M^-1.
+ Temporary space needed: 2 * (p + M->n)*/
+mp_size_t
+mpn_hgcd_matrix_adjust (const struct hgcd_matrix *M,
+ mp_size_t n, mp_ptr ap, mp_ptr bp,
+ mp_size_t p, mp_ptr tp)
+{
+ /* M^-1 (a;b) = (r11, -r01; -r10, r00) (a ; b)
+ = (r11 a - r01 b; - r10 a + r00 b */
+
+ mp_ptr t0 = tp;
+ mp_ptr t1 = tp + p + M->n;
+ mp_limb_t ah, bh;
+ mp_limb_t cy;
+
+ ASSERT (p + M->n < n);
+
+ /* First compute the two values depending on a, before overwriting a */
+
+ if (M->n >= p)
+ {
+ mpn_mul (t0, M->p[1][1], M->n, ap, p);
+ mpn_mul (t1, M->p[1][0], M->n, ap, p);
+ }
+ else
+ {
+ mpn_mul (t0, ap, p, M->p[1][1], M->n);
+ mpn_mul (t1, ap, p, M->p[1][0], M->n);
+ }
+
+ /* Update a */
+ MPN_COPY (ap, t0, p);
+ ah = mpn_add (ap + p, ap + p, n - p, t0 + p, M->n);
+
+ if (M->n >= p)
+ mpn_mul (t0, M->p[0][1], M->n, bp, p);
+ else
+ mpn_mul (t0, bp, p, M->p[0][1], M->n);
+
+ cy = mpn_sub (ap, ap, n, t0, p + M->n);
+ ASSERT (cy <= ah);
+ ah -= cy;
+
+ /* Update b */
+ if (M->n >= p)
+ mpn_mul (t0, M->p[0][0], M->n, bp, p);
+ else
+ mpn_mul (t0, bp, p, M->p[0][0], M->n);
+
+ MPN_COPY (bp, t0, p);
+ bh = mpn_add (bp + p, bp + p, n - p, t0 + p, M->n);
+ cy = mpn_sub (bp, bp, n, t1, p + M->n);
+ ASSERT (cy <= bh);
+ bh -= cy;
+
+ if (ah > 0 || bh > 0)
+ {
+ ap[n] = ah;
+ bp[n] = bh;
+ n++;
+ }
+ else
+ {
+ /* The subtraction can reduce the size by at most one limb. */
+ if (ap[n-1] == 0 && bp[n-1] == 0)
+ n--;
+ }
+ ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
+ return n;
+}
--- /dev/null
+/* hgcd_reduce.c.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Computes R -= A * B. Result must be non-negative. Normalized down
+ to size an, and resulting size is returned. */
+static mp_size_t
+submul (mp_ptr rp, mp_size_t rn,
+ mp_srcptr ap, mp_size_t an, mp_srcptr bp, mp_size_t bn)
+{
+ mp_ptr tp;
+ TMP_DECL;
+
+ ASSERT (bn > 0);
+ ASSERT (an >= bn);
+ ASSERT (rn >= an);
+ ASSERT (an + bn <= rn + 1);
+
+ TMP_MARK;
+ tp = TMP_ALLOC_LIMBS (an + bn);
+
+ mpn_mul (tp, ap, an, bp, bn);
+ if (an + bn > rn)
+ {
+ ASSERT (tp[rn] == 0);
+ bn--;
+ }
+ ASSERT_NOCARRY (mpn_sub (rp, rp, rn, tp, an + bn));
+ TMP_FREE;
+
+ while (rn > an && (rp[rn-1] == 0))
+ rn--;
+
+ return rn;
+}
+
+/* Computes (a, b) <-- M^{-1} (a; b) */
+/* FIXME:
+ x Take scratch parameter, and figure out scratch need.
+
+ x Use some fallback for small M->n?
+*/
+static mp_size_t
+hgcd_matrix_apply (const struct hgcd_matrix *M,
+ mp_ptr ap, mp_ptr bp,
+ mp_size_t n)
+{
+ mp_size_t an, bn, un, vn, nn;
+ mp_size_t mn[2][2];
+ mp_size_t modn;
+ mp_ptr tp, sp, scratch;
+ mp_limb_t cy;
+ unsigned i, j;
+
+ TMP_DECL;
+
+ ASSERT ( (ap[n-1] | bp[n-1]) > 0);
+
+ an = n;
+ MPN_NORMALIZE (ap, an);
+ bn = n;
+ MPN_NORMALIZE (bp, bn);
+
+ for (i = 0; i < 2; i++)
+ for (j = 0; j < 2; j++)
+ {
+ mp_size_t k;
+ k = M->n;
+ MPN_NORMALIZE (M->p[i][j], k);
+ mn[i][j] = k;
+ }
+
+ ASSERT (mn[0][0] > 0);
+ ASSERT (mn[1][1] > 0);
+ ASSERT ( (mn[0][1] | mn[1][0]) > 0);
+
+ TMP_MARK;
+
+ if (mn[0][1] == 0)
+ {
+ /* A unchanged, M = (1, 0; q, 1) */
+ ASSERT (mn[0][0] == 1);
+ ASSERT (M->p[0][0][0] == 1);
+ ASSERT (mn[1][1] == 1);
+ ASSERT (M->p[1][1][0] == 1);
+
+ /* Put B <-- B - q A */
+ nn = submul (bp, bn, ap, an, M->p[1][0], mn[1][0]);
+ }
+ else if (mn[1][0] == 0)
+ {
+ /* B unchanged, M = (1, q; 0, 1) */
+ ASSERT (mn[0][0] == 1);
+ ASSERT (M->p[0][0][0] == 1);
+ ASSERT (mn[1][1] == 1);
+ ASSERT (M->p[1][1][0] == 1);
+
+ /* Put A <-- A - q * B */
+ nn = submul (ap, an, bp, bn, M->p[0][1], mn[0][1]);
+ }
+ else
+ {
+ /* A = m00 a + m01 b ==> a <= A / m00, b <= A / m01.
+ B = m10 a + m11 b ==> a <= B / m10, b <= B / m11. */
+ un = MIN (an - mn[0][0], bn - mn[1][0]) + 1;
+ vn = MIN (an - mn[0][1], bn - mn[1][1]) + 1;
+
+ nn = MAX (un, vn);
+ /* In the range of interest, mulmod_bnm1 should always beat mullo. */
+ modn = mpn_mulmod_bnm1_next_size (nn + 1);
+
+ scratch = TMP_ALLOC_LIMBS (mpn_mulmod_bnm1_itch (modn, modn, M->n));
+ tp = TMP_ALLOC_LIMBS (modn);
+ sp = TMP_ALLOC_LIMBS (modn);
+
+ ASSERT (n <= 2*modn);
+
+ if (n > modn)
+ {
+ cy = mpn_add (ap, ap, modn, ap + modn, n - modn);
+ MPN_INCR_U (ap, modn, cy);
+
+ cy = mpn_add (bp, bp, modn, bp + modn, n - modn);
+ MPN_INCR_U (bp, modn, cy);
+
+ n = modn;
+ }
+
+ mpn_mulmod_bnm1 (tp, modn, ap, n, M->p[1][1], mn[1][1], scratch);
+ mpn_mulmod_bnm1 (sp, modn, bp, n, M->p[0][1], mn[0][1], scratch);
+
+ /* FIXME: Handle the small n case in some better way. */
+ if (n + mn[1][1] < modn)
+ MPN_ZERO (tp + n + mn[1][1], modn - n - mn[1][1]);
+ if (n + mn[0][1] < modn)
+ MPN_ZERO (sp + n + mn[0][1], modn - n - mn[0][1]);
+
+ cy = mpn_sub_n (tp, tp, sp, modn);
+ MPN_DECR_U (tp, modn, cy);
+
+ ASSERT (mpn_zero_p (tp + nn, modn - nn));
+
+ mpn_mulmod_bnm1 (sp, modn, ap, n, M->p[1][0], mn[1][0], scratch);
+ MPN_COPY (ap, tp, nn);
+ mpn_mulmod_bnm1 (tp, modn, bp, n, M->p[0][0], mn[0][0], scratch);
+
+ if (n + mn[1][0] < modn)
+ MPN_ZERO (sp + n + mn[1][0], modn - n - mn[1][0]);
+ if (n + mn[0][0] < modn)
+ MPN_ZERO (tp + n + mn[0][0], modn - n - mn[0][0]);
+
+ cy = mpn_sub_n (tp, tp, sp, modn);
+ MPN_DECR_U (tp, modn, cy);
+
+ ASSERT (mpn_zero_p (tp + nn, modn - nn));
+ MPN_COPY (bp, tp, nn);
+
+ while ( (ap[nn-1] | bp[nn-1]) == 0)
+ {
+ nn--;
+ ASSERT (nn > 0);
+ }
+ }
+ TMP_FREE;
+
+ return nn;
+}
+
+mp_size_t
+mpn_hgcd_reduce_itch (mp_size_t n, mp_size_t p)
+{
+ mp_size_t itch;
+ if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD))
+ {
+ itch = mpn_hgcd_itch (n-p);
+
+ /* For arbitrary p, the storage for _adjust is 2*(p + M->n) = 2 *
+ (p + ceil((n-p)/2) - 1 <= n + p - 1 */
+ if (itch < n + p - 1)
+ itch = n + p - 1;
+ }
+ else
+ {
+ itch = 2*(n-p) + mpn_hgcd_itch (n-p);
+ /* Currently, hgcd_matrix_apply allocates its own storage. */
+ }
+ return itch;
+}
+
+/* FIXME: Document storage need. */
+mp_size_t
+mpn_hgcd_reduce (struct hgcd_matrix *M,
+ mp_ptr ap, mp_ptr bp, mp_size_t n, mp_size_t p,
+ mp_ptr tp)
+{
+ mp_size_t nn;
+ if (BELOW_THRESHOLD (n, HGCD_REDUCE_THRESHOLD))
+ {
+ nn = mpn_hgcd (ap + p, bp + p, n - p, M, tp);
+ if (nn > 0)
+ /* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
+ = 2 (n - 1) */
+ return mpn_hgcd_matrix_adjust (M, p + nn, ap, bp, p, tp);
+ }
+ else
+ {
+ MPN_COPY (tp, ap + p, n - p);
+ MPN_COPY (tp + n - p, bp + p, n - p);
+ if (mpn_hgcd_appr (tp, tp + n - p, n - p, M, tp + 2*(n-p)))
+ return hgcd_matrix_apply (M, ap, bp, n);
+ }
+ return 0;
+}
--- /dev/null
+/* hgcd_step.c.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2003, 2004, 2005, 2008, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+static void
+hgcd_hook (void *p, mp_srcptr gp, mp_size_t gn,
+ mp_srcptr qp, mp_size_t qn, int d)
+{
+ ASSERT (!gp);
+ ASSERT (d >= 0);
+ ASSERT (d <= 1);
+
+ MPN_NORMALIZE (qp, qn);
+ if (qn > 0)
+ {
+ struct hgcd_matrix *M = (struct hgcd_matrix *) p;
+ /* NOTES: This is a bit ugly. A tp area is passed to
+ gcd_subdiv_step, which stores q at the start of that area. We
+ now use the rest. */
+ mp_ptr tp = (mp_ptr) qp + qn;
+ mpn_hgcd_matrix_update_q (M, qp, qn, d, tp);
+ }
+}
+
+/* Perform a few steps, using some of mpn_hgcd2, subtraction and
+ division. Reduces the size by almost one limb or more, but never
+ below the given size s. Return new size for a and b, or 0 if no
+ more steps are possible.
+
+ If hgcd2 succeds, needs temporary space for hgcd_matrix_mul_1, M->n
+ limbs, and hgcd_mul_matrix1_inverse_vector, n limbs. If hgcd2
+ fails, needs space for the quotient, qn <= n - s limbs, for and
+ hgcd_matrix_update_q, qn + (size of the appropriate column of M) <=
+ (resulting size of M) + 1.
+
+ If N is the input size to the calling hgcd, then s = floor(N/2) +
+ 1, M->n < N, qn + product size <= n - s + n - s + 1 = 2 (n - s) + 1
+ <= N.
+*/
+
+mp_size_t
+mpn_hgcd_step (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
+ struct hgcd_matrix *M, mp_ptr tp)
+{
+ struct hgcd_matrix1 M1;
+ mp_limb_t mask;
+ mp_limb_t ah, al, bh, bl;
+
+ ASSERT (n > s);
+
+ mask = ap[n-1] | bp[n-1];
+ ASSERT (mask > 0);
+
+ if (n == s + 1)
+ {
+ if (mask < 4)
+ goto subtract;
+
+ ah = ap[n-1]; al = ap[n-2];
+ bh = bp[n-1]; bl = bp[n-2];
+ }
+ else if (mask & GMP_NUMB_HIGHBIT)
+ {
+ ah = ap[n-1]; al = ap[n-2];
+ bh = bp[n-1]; bl = bp[n-2];
+ }
+ else
+ {
+ int shift;
+
+ count_leading_zeros (shift, mask);
+ ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
+ al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
+ bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
+ bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
+ }
+
+ /* Try an mpn_hgcd2 step */
+ if (mpn_hgcd2 (ah, al, bh, bl, &M1))
+ {
+ /* Multiply M <- M * M1 */
+ mpn_hgcd_matrix_mul_1 (M, &M1, tp);
+
+ /* Can't swap inputs, so we need to copy. */
+ MPN_COPY (tp, ap, n);
+ /* Multiply M1^{-1} (a;b) */
+ return mpn_matrix22_mul1_inverse_vector (&M1, ap, tp, bp, n);
+ }
+
+ subtract:
+
+ return mpn_gcd_subdiv_step (ap, bp, n, s, hgcd_hook, M, tp);
+}
SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-Copyright (C) 2007, 2009, 2010 Free Software Foundation, Inc.
+Copyright (C) 2007, 2009, 2010, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-/* FIXME: Remove NULL and TMP_*, as soon as all the callers properly
- allocate and pass the scratch to the function. */
-#include <stdlib.h> /* for NULL */
-
#include "gmp.h"
#include "gmp-impl.h"
#include "longlong.h"
TMP_DECL;
TMP_MARK;
- if (scratch == NULL)
- scratch = TMP_ALLOC_LIMBS (mpn_invert_itch (n));
-
if (BELOW_THRESHOLD (n, INV_APPR_THRESHOLD))
{
/* Maximum scratch needed by this branch: 2*n */
ASSERT ( mpn_invert_itch (n) >= mpn_invertappr_itch (n) );
e = mpn_ni_invertappr (ip, dp, n, scratch);
- if (e) { /* Assume the error can only be "0" (no error) or "1". */
+ if (UNLIKELY (e)) { /* Assume the error can only be "0" (no error) or "1". */
/* Code to detect and correct the "off by one" approximation. */
mpn_mul_n (scratch, ip, dp, n);
ASSERT_NOCARRY (mpn_add_n (scratch + n, scratch + n, dp, n));
SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-Copyright (C) 2007, 2009, 2010 Free Software Foundation, Inc.
+Copyright (C) 2007, 2009, 2010, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
parts, the use of log_2 (or counting the bits) underestimate the maximum
number of iterations. */
-/* This is intended for constant THRESHOLDs only, where the compiler
- can completely fold the result. */
-#define LOG2C(n) \
- (((n) >= 0x1) + ((n) >= 0x2) + ((n) >= 0x4) + ((n) >= 0x8) + \
- ((n) >= 0x10) + ((n) >= 0x20) + ((n) >= 0x40) + ((n) >= 0x80) + \
- ((n) >= 0x100) + ((n) >= 0x200) + ((n) >= 0x400) + ((n) >= 0x800) + \
- ((n) >= 0x1000) + ((n) >= 0x2000) + ((n) >= 0x4000) + ((n) >= 0x8000))
-
#if TUNE_PROGRAM_BUILD
#define NPOWS \
((sizeof(mp_size_t) > 6 ? 48 : 8*sizeof(mp_size_t)))
THIS INTERFACE IS PRELIMINARY AND MIGHT DISAPPEAR OR BE SUBJECT TO
INCOMPATIBLE CHANGES IN A FUTURE RELEASE OF GMP.
-Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2010 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#define PROCESS_TWOS_EVEN \
{ \
int two, mask, shift; \
- \
+ \
two = JACOBI_TWO_U_BIT1 (b); \
mask = (~a & 2); \
a >>= 1; \
- \
+ \
shift = (~a & 1); \
a >>= shift; \
result_bit1 ^= two ^ (two & mask); \
- \
+ \
while ((a & 1) == 0) \
{ \
a >>= 1; \
#define PROCESS_TWOS_ANY \
{ \
int two, mask, shift; \
- \
+ \
two = JACOBI_TWO_U_BIT1 (b); \
shift = (~a & 1); \
a >>= shift; \
- \
+ \
mask = shift << 1; \
result_bit1 ^= (two & mask); \
- \
+ \
while ((a & 1) == 0) \
{ \
a >>= 1; \
}
#endif
-
+#if JACOBI_BASE_METHOD < 4
/* Calculate the value of the Jacobi symbol (a/b) of two mp_limb_t's, but
- with a restricted range of inputs accepted, namely b>1, b odd, and a<=b.
+ with a restricted range of inputs accepted, namely b>1, b odd.
The initial result_bit1 is taken as a parameter for the convenience of
mpz_kronecker_ui() et al. The sign changes both here and in those
Duplicating the loop body to avoid the MP_LIMB_T_SWAP(a,b) would be
possible, but a couple of tests suggest it's not a significant speedup,
- and may even be a slowdown, so what's here is good enough for now.
-
- Future: The code doesn't demand a<=b actually, so maybe this could be
- relaxed. All the places this is used currently call with a<=b though. */
+ and may even be a slowdown, so what's here is good enough for now. */
int
mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int result_bit1)
{
ASSERT (b & 1); /* b odd */
ASSERT (b != 1);
- ASSERT (a <= b);
if (a == 0)
return 0;
if (a == 1)
goto done;
+ if (a >= b)
+ goto a_gt_b;
+
for (;;)
{
result_bit1 ^= JACOBI_RECIP_UU_BIT1 (a, b);
MP_LIMB_T_SWAP (a, b);
+ a_gt_b:
do
{
/* working on (a/b), a,b odd, a>=b */
done:
return JACOBI_BIT1_TO_PN (result_bit1);
}
+#endif
+
+#if JACOBI_BASE_METHOD == 4
+/* Computes (a/b) for odd b > 1 and any a. The initial bit is taken as a
+ * parameter. We have no need for the convention that the sign is in
+ * bit 1, internally we use bit 0. */
+
+/* FIXME: Could try table-based count_trailing_zeros. */
+int
+mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int bit)
+{
+ int c;
+
+ ASSERT (b & 1);
+ ASSERT (b > 1);
+
+ if (a == 0)
+ /* This is the only line which depends on b > 1 */
+ return 0;
+
+ bit >>= 1;
+
+ /* Below, we represent a and b shifted right so that the least
+ significant one bit is implicit. */
+
+ b >>= 1;
+
+ count_trailing_zeros (c, a);
+ bit ^= c & (b ^ (b >> 1));
+
+ /* We may have c==GMP_LIMB_BITS-1, so we can't use a>>c+1. */
+ a >>= c;
+ a >>= 1;
+
+ do
+ {
+ mp_limb_t t = a - b;
+ mp_limb_t bgta = LIMB_HIGHBIT_TO_MASK (t);
+
+ if (t == 0)
+ return 0;
+
+ /* If b > a, invoke reciprocity */
+ bit ^= (bgta & a & b);
+
+ /* b <-- min (a, b) */
+ b += (bgta & t);
+
+ /* a <-- |a - b| */
+ a = (t ^ bgta) - bgta;
+
+ /* Number of trailing zeros is the same no matter if we look at
+ * t or a, but using t gives more parallelism. */
+ count_trailing_zeros (c, t);
+ c ++;
+ /* (2/b) = -1 if b = 3 or 5 mod 8 */
+ bit ^= c & (b ^ (b >> 1));
+ a >>= c;
+ }
+ while (b > 0);
+
+ return 1-2*(bit & 1);
+}
+#endif /* JACOBI_BASE_METHOD == 4 */
--- /dev/null
+/* jacobi.c
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008, 2010, 2011 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef JACOBI_DC_THRESHOLD
+#define JACOBI_DC_THRESHOLD GCD_DC_THRESHOLD
+#endif
+
+/* Schönhage's rules:
+ *
+ * Assume r0 = r1 q1 + r2, with r0 odd, and r1 = q2 r2 + r3
+ *
+ * If r1 is odd, then
+ *
+ * (r1 | r0) = s(r1, r0) (r0 | r1) = s(r1, r0) (r2, r1)
+ *
+ * where s(x,y) = (-1)^{(x-1)(y-1)/4} = (-1)^[x = y = 3 (mod 4)].
+ *
+ * If r1 is even, r2 must be odd. We have
+ *
+ * (r1 | r0) = (r1 - r0 | r0) = (-1)^(r0-1)/2 (r0 - r1 | r0)
+ * = (-1)^(r0-1)/2 s(r0, r0 - r1) (r0 | r0 - r1)
+ * = (-1)^(r0-1)/2 s(r0, r0 - r1) (r1 | r0 - r1)
+ *
+ * Now, if r1 = 0 (mod 4), then the sign factor is +1, and repeating
+ * q1 times gives
+ *
+ * (r1 | r0) = (r1 | r2) = (r3 | r2)
+ *
+ * On the other hand, if r1 = 2 (mod 4), the sign factor is
+ * (-1)^{(r0-1)/2}, and repeating q1 times gives the exponent
+ *
+ * (r0-1)/2 + (r0-r1-1)/2 + ... + (r0 - (q1-1) r1)/2
+ * = q1 (r0-1)/2 + q1 (q1-1)/2
+ *
+ * and we can summarize the even case as
+ *
+ * (r1 | r0) = t(r1, r0, q1) (r3 | r2)
+ *
+ * where t(x,y,q) = (-1)^{[x = 2 (mod 4)] (q(y-1)/2 + y(q-1)/2)}
+ *
+ * What about termination? The remainder sequence ends with (0|1) = 1
+ * (or (0 | r) = 0 if r != 1). What are the possible cases? If r1 is
+ * odd, r2 may be zero. If r1 is even, then r2 = r0 - q1 r1 is odd and
+ * hence non-zero. We may have r3 = r1 - q2 r2 = 0.
+ *
+ * Examples: (11|15) = - (15|11) = - (4|11)
+ * (4|11) = (4| 3) = (1| 3)
+ * (1| 3) = (3|1) = (0|1) = 1
+ *
+ * (2|7) = (2|1) = (0|1) = 1
+ *
+ * Detail: (2|7) = (2-7|7) = (-1|7)(5|7) = -(7|5) = -(2|5)
+ * (2|5) = (2-5|5) = (-1|5)(3|5) = (5|3) = (2|3)
+ * (2|3) = (2-3|3) = (-1|3)(1|3) = -(3|1) = -(2|1)
+ *
+ */
+
+/* In principle, the state consists of four variables: e (one bit), a,
+ b (two bits each), d (one bit). Collected factors are (-1)^e. a and
+ b are the least significant bits of the current remainders. d
+ (denominator) is 0 if we're currently subtracting multiplies of a
+ from b, and 1 if we're subtracting b from a.
+
+ e is stored in the least significant bit, while a, b and d are
+ coded as only 13 distinct values in bits 1-4, according to the
+ following table. For rows not mentioning d, the value is either
+ implied, or it doesn't matter. */
+
+#if WANT_ASSERT
+static const struct
+{
+ unsigned char a;
+ unsigned char b;
+} decode_table[13] = {
+ /* 0 */ { 0, 1 },
+ /* 1 */ { 0, 3 },
+ /* 2 */ { 1, 1 },
+ /* 3 */ { 1, 3 },
+ /* 4 */ { 2, 1 },
+ /* 5 */ { 2, 3 },
+ /* 6 */ { 3, 1 },
+ /* 7 */ { 3, 3 }, /* d = 1 */
+ /* 8 */ { 1, 0 },
+ /* 9 */ { 1, 2 },
+ /* 10 */ { 3, 0 },
+ /* 11 */ { 3, 2 },
+ /* 12 */ { 3, 3 }, /* d = 0 */
+};
+#define JACOBI_A(bits) (decode_table[(bits)>>1].a)
+#define JACOBI_B(bits) (decode_table[(bits)>>1].b)
+#endif /* WANT_ASSERT */
+
+const unsigned char jacobi_table[208] = {
+#include "jacobitab.h"
+};
+
+#define BITS_FAIL 31
+
+static void
+jacobi_hook (void *p, mp_srcptr gp, mp_size_t gn,
+ mp_srcptr qp, mp_size_t qn, int d)
+{
+ unsigned *bitsp = (unsigned *) p;
+
+ if (gp)
+ {
+ ASSERT (gn > 0);
+ if (gn != 1 || gp[0] != 1)
+ {
+ *bitsp = BITS_FAIL;
+ return;
+ }
+ }
+
+ if (qp)
+ {
+ ASSERT (qn > 0);
+ ASSERT (d >= 0);
+ *bitsp = mpn_jacobi_update (*bitsp, d, qp[0] & 3);
+ }
+}
+
+#define CHOOSE_P(n) (2*(n) / 3)
+
+int
+mpn_jacobi_n (mp_ptr ap, mp_ptr bp, mp_size_t n, unsigned bits)
+{
+ mp_size_t scratch;
+ mp_size_t matrix_scratch;
+ mp_ptr tp;
+
+ TMP_DECL;
+
+ ASSERT (n > 0);
+ ASSERT ( (ap[n-1] | bp[n-1]) > 0);
+ ASSERT ( (bp[0] | ap[0]) & 1);
+
+ /* FIXME: Check for small sizes first, before setting up temporary
+ storage etc. */
+ scratch = MPN_GCD_SUBDIV_STEP_ITCH(n);
+
+ if (ABOVE_THRESHOLD (n, GCD_DC_THRESHOLD))
+ {
+ mp_size_t hgcd_scratch;
+ mp_size_t update_scratch;
+ mp_size_t p = CHOOSE_P (n);
+ mp_size_t dc_scratch;
+
+ matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
+ hgcd_scratch = mpn_hgcd_itch (n - p);
+ update_scratch = p + n - 1;
+
+ dc_scratch = matrix_scratch + MAX(hgcd_scratch, update_scratch);
+ if (dc_scratch > scratch)
+ scratch = dc_scratch;
+ }
+
+ TMP_MARK;
+ tp = TMP_ALLOC_LIMBS(scratch);
+
+ while (ABOVE_THRESHOLD (n, JACOBI_DC_THRESHOLD))
+ {
+ struct hgcd_matrix M;
+ mp_size_t p = 2*n/3;
+ mp_size_t matrix_scratch = MPN_HGCD_MATRIX_INIT_ITCH (n - p);
+ mp_size_t nn;
+ mpn_hgcd_matrix_init (&M, n - p, tp);
+
+ nn = mpn_hgcd_jacobi (ap + p, bp + p, n - p, &M, &bits,
+ tp + matrix_scratch);
+ if (nn > 0)
+ {
+ ASSERT (M.n <= (n - p - 1)/2);
+ ASSERT (M.n + p <= (p + n - 1) / 2);
+ /* Temporary storage 2 (p + M->n) <= p + n - 1. */
+ n = mpn_hgcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + matrix_scratch);
+ }
+ else
+ {
+ /* Temporary storage n */
+ n = mpn_gcd_subdiv_step (ap, bp, n, 0, jacobi_hook, &bits, tp);
+ if (!n)
+ {
+ TMP_FREE;
+ return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
+ }
+ }
+ }
+
+ while (n > 2)
+ {
+ struct hgcd_matrix1 M;
+ mp_limb_t ah, al, bh, bl;
+ mp_limb_t mask;
+
+ mask = ap[n-1] | bp[n-1];
+ ASSERT (mask > 0);
+
+ if (mask & GMP_NUMB_HIGHBIT)
+ {
+ ah = ap[n-1]; al = ap[n-2];
+ bh = bp[n-1]; bl = bp[n-2];
+ }
+ else
+ {
+ int shift;
+
+ count_leading_zeros (shift, mask);
+ ah = MPN_EXTRACT_NUMB (shift, ap[n-1], ap[n-2]);
+ al = MPN_EXTRACT_NUMB (shift, ap[n-2], ap[n-3]);
+ bh = MPN_EXTRACT_NUMB (shift, bp[n-1], bp[n-2]);
+ bl = MPN_EXTRACT_NUMB (shift, bp[n-2], bp[n-3]);
+ }
+
+ /* Try an mpn_nhgcd2 step */
+ if (mpn_hgcd2_jacobi (ah, al, bh, bl, &M, &bits))
+ {
+ n = mpn_matrix22_mul1_inverse_vector (&M, tp, ap, bp, n);
+ MP_PTR_SWAP (ap, tp);
+ }
+ else
+ {
+ /* mpn_hgcd2 has failed. Then either one of a or b is very
+ small, or the difference is very small. Perform one
+ subtraction followed by one division. */
+ n = mpn_gcd_subdiv_step (ap, bp, n, 0, &jacobi_hook, &bits, tp);
+ if (!n)
+ {
+ TMP_FREE;
+ return bits == BITS_FAIL ? 0 : mpn_jacobi_finish (bits);
+ }
+ }
+ }
+
+ if (bits >= 16)
+ MP_PTR_SWAP (ap, bp);
+
+ ASSERT (bp[0] & 1);
+
+ if (n == 1)
+ {
+ mp_limb_t al, bl;
+ al = ap[0];
+ bl = bp[0];
+
+ TMP_FREE;
+ if (bl == 1)
+ return 1 - 2*(bits & 1);
+ else
+ return mpn_jacobi_base (al, bl, bits << 1);
+ }
+
+ else
+ {
+ int res = mpn_jacobi_2 (ap, bp, bits & 1);
+ TMP_FREE;
+ return res;
+ }
+}
--- /dev/null
+/* jacobi_2.c
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004, 2008, 2010 Free Software
+Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#ifndef JACOBI_2_METHOD
+#define JACOBI_2_METHOD 2
+#endif
+
+/* Computes (a / b) where b is odd, and a and b are otherwise arbitrary
+ two-limb numbers. */
+#if JACOBI_2_METHOD == 1
+int
+mpn_jacobi_2 (mp_srcptr ap, mp_srcptr bp, unsigned bit)
+{
+ mp_limb_t ah, al, bh, bl;
+ int c;
+
+ al = ap[0];
+ ah = ap[1];
+ bl = bp[0];
+ bh = bp[1];
+
+ ASSERT (bl & 1);
+
+ bl = ((bh << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK) | (bl >> 1);
+ bh >>= 1;
+
+ if ( (bh | bl) == 0)
+ return 1 - 2*(bit & 1);
+
+ if ( (ah | al) == 0)
+ return 0;
+
+ if (al == 0)
+ {
+ al = ah;
+ ah = 0;
+ bit ^= GMP_NUMB_BITS & (bl ^ (bl >> 1));
+ }
+ count_trailing_zeros (c, al);
+ bit ^= c & (bl ^ (bl >> 1));
+
+ c++;
+ if (UNLIKELY (c == GMP_NUMB_BITS))
+ {
+ al = ah;
+ ah = 0;
+ }
+ else
+ {
+ al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
+ ah >>= c;
+ }
+ while ( (ah | bh) > 0)
+ {
+ mp_limb_t th, tl;
+ mp_limb_t bgta;
+
+ sub_ddmmss (th, tl, ah, al, bh, bl);
+ if ( (tl | th) == 0)
+ return 0;
+
+ bgta = LIMB_HIGHBIT_TO_MASK (th);
+
+ /* If b > a, invoke reciprocity */
+ bit ^= (bgta & al & bl);
+
+ /* b <-- min (a, b) */
+ add_ssaaaa (bh, bl, bh, bl, th & bgta, tl & bgta);
+
+ if ( (bh | bl) == 0)
+ return 1 - 2*(bit & 1);
+
+ /* a <-- |a - b| */
+ al = (bgta ^ tl) - bgta;
+ ah = (bgta ^ th);
+
+ if (UNLIKELY (al == 0))
+ {
+ /* If b > a, al == 0 implies that we have a carry to
+ propagate. */
+ al = ah - bgta;
+ ah = 0;
+ bit ^= GMP_NUMB_BITS & (bl ^ (bl >> 1));
+ }
+ count_trailing_zeros (c, al);
+ c++;
+ bit ^= c & (bl ^ (bl >> 1));
+
+ if (UNLIKELY (c == GMP_NUMB_BITS))
+ {
+ al = ah;
+ ah = 0;
+ }
+ else
+ {
+ al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
+ ah >>= c;
+ }
+ }
+
+ ASSERT (bl > 0);
+
+ while ( (al | bl) & GMP_LIMB_HIGHBIT)
+ {
+ /* Need an extra comparison to get the mask. */
+ mp_limb_t t = al - bl;
+ mp_limb_t bgta = - (bl > al);
+
+ if (t == 0)
+ return 0;
+
+ /* If b > a, invoke reciprocity */
+ bit ^= (bgta & al & bl);
+
+ /* b <-- min (a, b) */
+ bl += (bgta & t);
+
+ /* a <-- |a - b| */
+ al = (t ^ bgta) - bgta;
+
+ /* Number of trailing zeros is the same no matter if we look at
+ * t or a, but using t gives more parallelism. */
+ count_trailing_zeros (c, t);
+ c ++;
+ /* (2/b) = -1 if b = 3 or 5 mod 8 */
+ bit ^= c & (bl ^ (bl >> 1));
+
+ if (UNLIKELY (c == GMP_NUMB_BITS))
+ return 1 - 2*(bit & 1);
+
+ al >>= c;
+ }
+
+ /* Here we have a little impedance mismatch. Better to inline it? */
+ return mpn_jacobi_base (2*al+1, 2*bl+1, bit << 1);
+}
+#elif JACOBI_2_METHOD == 2
+int
+mpn_jacobi_2 (mp_srcptr ap, mp_srcptr bp, unsigned bit)
+{
+ mp_limb_t ah, al, bh, bl;
+ int c;
+
+ al = ap[0];
+ ah = ap[1];
+ bl = bp[0];
+ bh = bp[1];
+
+ ASSERT (bl & 1);
+
+ /* Use bit 1. */
+ bit <<= 1;
+
+ if (bh == 0 && bl == 1)
+ /* (a|1) = 1 */
+ return 1 - (bit & 2);
+
+ if (al == 0)
+ {
+ if (ah == 0)
+ /* (0|b) = 0, b > 1 */
+ return 0;
+
+ count_trailing_zeros (c, ah);
+ bit ^= ((GMP_NUMB_BITS + c) << 1) & (bl ^ (bl >> 1));
+
+ al = bl;
+ bl = ah >> c;
+
+ if (bl == 1)
+ /* (1|b) = 1 */
+ return 1 - (bit & 2);
+
+ ah = bh;
+
+ bit ^= al & bl;
+
+ goto b_reduced;
+ }
+ if ( (al & 1) == 0)
+ {
+ count_trailing_zeros (c, al);
+
+ al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
+ ah >>= c;
+ bit ^= (c << 1) & (bl ^ (bl >> 1));
+ }
+ if (ah == 0)
+ {
+ if (bh > 0)
+ {
+ bit ^= al & bl;
+ MP_LIMB_T_SWAP (al, bl);
+ ah = bh;
+ goto b_reduced;
+ }
+ goto ab_reduced;
+ }
+
+ while (bh > 0)
+ {
+ /* Compute (a|b) */
+ while (ah > bh)
+ {
+ sub_ddmmss (ah, al, ah, al, bh, bl);
+ if (al == 0)
+ {
+ count_trailing_zeros (c, ah);
+ bit ^= ((GMP_NUMB_BITS + c) << 1) & (bl ^ (bl >> 1));
+
+ al = bl;
+ bl = ah >> c;
+ ah = bh;
+
+ bit ^= al & bl;
+ goto b_reduced;
+ }
+ count_trailing_zeros (c, al);
+ bit ^= (c << 1) & (bl ^ (bl >> 1));
+ al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
+ ah >>= c;
+ }
+ if (ah == bh)
+ goto cancel_hi;
+
+ if (ah == 0)
+ {
+ bit ^= al & bl;
+ MP_LIMB_T_SWAP (al, bl);
+ ah = bh;
+ break;
+ }
+
+ bit ^= al & bl;
+
+ /* Compute (b|a) */
+ while (bh > ah)
+ {
+ sub_ddmmss (bh, bl, bh, bl, ah, al);
+ if (bl == 0)
+ {
+ count_trailing_zeros (c, bh);
+ bit ^= ((GMP_NUMB_BITS + c) << 1) & (al ^ (al >> 1));
+
+ bl = bh >> c;
+ bit ^= al & bl;
+ goto b_reduced;
+ }
+ count_trailing_zeros (c, bl);
+ bit ^= (c << 1) & (al ^ (al >> 1));
+ bl = ((bh << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (bl >> c);
+ bh >>= c;
+ }
+ bit ^= al & bl;
+
+ /* Compute (a|b) */
+ if (ah == bh)
+ {
+ cancel_hi:
+ if (al < bl)
+ {
+ MP_LIMB_T_SWAP (al, bl);
+ bit ^= al & bl;
+ }
+ al -= bl;
+ if (al == 0)
+ return 0;
+
+ count_trailing_zeros (c, al);
+ bit ^= (c << 1) & (bl ^ (bl >> 1));
+ al >>= c;
+
+ if (al == 1)
+ return 1 - (bit & 2);
+
+ MP_LIMB_T_SWAP (al, bl);
+ bit ^= al & bl;
+ break;
+ }
+ }
+
+ b_reduced:
+ /* Compute (a|b), with b a single limb. */
+ ASSERT (bl & 1);
+
+ if (bl == 1)
+ /* (a|1) = 1 */
+ return 1 - (bit & 2);
+
+ while (ah > 0)
+ {
+ ah -= (al < bl);
+ al -= bl;
+ if (al == 0)
+ {
+ if (ah == 0)
+ return 0;
+ count_trailing_zeros (c, ah);
+ bit ^= ((GMP_NUMB_BITS + c) << 1) & (bl ^ (bl >> 1));
+ al = ah >> c;
+ goto ab_reduced;
+ }
+ count_trailing_zeros (c, al);
+
+ al = ((ah << (GMP_NUMB_BITS - c)) & GMP_NUMB_MASK) | (al >> c);
+ ah >>= c;
+ bit ^= (c << 1) & (bl ^ (bl >> 1));
+ }
+ ab_reduced:
+ ASSERT (bl & 1);
+ ASSERT (bl > 1);
+
+ return mpn_jacobi_base (al, bl, bit);
+}
+#else
+#error Unsupported value for JACOBI_2_METHOD
+#endif
{
s0[rn] = r1[rn] - mpn_sub_n (s0, r1, r0, rn);
s0s = 1; /* s4 = -r0 + r1 - r2 + r3 */
- /* Reverse sign! */
+ /* Reverse sign! */
}
else
{
--- /dev/null
+/* matrix22_mul1_inverse_vector.c
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Sets (r;b) = M^{-1}(a;b), with M^{-1} = (u11, -u01; -u10, u00) from
+ the left. Uses three buffers, to avoid a copy. */
+mp_size_t
+mpn_matrix22_mul1_inverse_vector (const struct hgcd_matrix1 *M,
+ mp_ptr rp, mp_srcptr ap, mp_ptr bp, mp_size_t n)
+{
+ mp_limb_t h0, h1;
+
+ /* Compute (r;b) <-- (u11 a - u01 b; -u10 a + u00 b) as
+
+ r = u11 * a
+ r -= u01 * b
+ b *= u00
+ b -= u10 * a
+ */
+
+ h0 = mpn_mul_1 (rp, ap, n, M->u[1][1]);
+ h1 = mpn_submul_1 (rp, bp, n, M->u[0][1]);
+ ASSERT (h0 == h1);
+
+ h0 = mpn_mul_1 (bp, bp, n, M->u[0][0]);
+ h1 = mpn_submul_1 (bp, ap, n, M->u[1][0]);
+ ASSERT (h0 == h1);
+
+ n -= (rp[n-1] | bp[n-1]) == 0;
+ return n;
+}
Return the single-limb remainder.
There are no constraints on the value of the divisor.
-Copyright 1991, 1993, 1994, 1999, 2000, 2002, 2007, 2008, 2009 Free
+Copyright 1991, 1993, 1994, 1999, 2000, 2002, 2007, 2008, 2009, 2012 Free
Software Foundation, Inc.
This file is part of the GNU MP Library.
#define MOD_1_2_TO_MOD_1_4_THRESHOLD 20
#endif
+#if TUNE_PROGRAM_BUILD && !HAVE_NATIVE_mpn_mod_1_1p
+/* Duplicates declaratinos in tune/speed.h */
+mp_limb_t mpn_mod_1_1p_1 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
+mp_limb_t mpn_mod_1_1p_2 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
+
+void mpn_mod_1_1p_cps_1 (mp_limb_t [4], mp_limb_t);
+void mpn_mod_1_1p_cps_2 (mp_limb_t [4], mp_limb_t);
+
+#undef mpn_mod_1_1p
+#define mpn_mod_1_1p(ap, n, b, pre) \
+ (mod_1_1p_method == 1 ? mpn_mod_1_1p_1 (ap, n, b, pre) \
+ : (mod_1_1p_method == 2 ? mpn_mod_1_1p_2 (ap, n, b, pre) \
+ : __gmpn_mod_1_1p (ap, n, b, pre)))
+
+#undef mpn_mod_1_1p_cps
+#define mpn_mod_1_1p_cps(pre, b) \
+ (mod_1_1p_method == 1 ? mpn_mod_1_1p_cps_1 (pre, b) \
+ : (mod_1_1p_method == 2 ? mpn_mod_1_1p_cps_2 (pre, b) \
+ : __gmpn_mod_1_1p_cps (pre, b)))
+#endif /* TUNE_PROGRAM_BUILD && !HAVE_NATIVE_mpn_mod_1_1p */
+
/* The comments in mpn/generic/divrem_1.c apply here too.
if (UDIV_NEEDS_NORMALIZATION
&& BELOW_THRESHOLD (un, MOD_1_UNNORM_THRESHOLD))
{
+ mp_limb_t nshift;
for (i = un - 2; i >= 0; i--)
{
n0 = up[i] << GMP_NAIL_BITS;
- udiv_qrnnd (dummy, r, r,
- (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)),
- d);
+ nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+ udiv_qrnnd (dummy, r, r, nshift, d);
r >>= GMP_NAIL_BITS;
n1 = n0;
}
}
else
{
- mp_limb_t inv;
+ mp_limb_t inv, nshift;
invert_limb (inv, d);
for (i = un - 2; i >= 0; i--)
{
n0 = up[i] << GMP_NAIL_BITS;
- udiv_qrnnd_preinv (dummy, r, r,
- (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt)),
- d, inv);
+ nshift = (n1 << cnt) | (n0 >> (GMP_NUMB_BITS - cnt));
+ udiv_rnnd_preinv (r, r, nshift, d, inv);
r >>= GMP_NAIL_BITS;
n1 = n0;
}
- udiv_qrnnd_preinv (dummy, r, r, n1 << cnt, d, inv);
+ udiv_rnnd_preinv (r, r, n1 << cnt, d, inv);
r >>= GMP_NAIL_BITS;
return r >> cnt;
}
for (i = un - 1; i >= 0; i--)
{
n0 = up[i] << GMP_NAIL_BITS;
- udiv_qrnnd_preinv (dummy, r, r, n0, d, inv);
+ udiv_rnnd_preinv (r, r, n0, d, inv);
r >>= GMP_NAIL_BITS;
}
return r;
/* mpn_mod_1_1p (ap, n, b, cps)
Divide (ap,,n) by b. Return the single-limb remainder.
- Contributed to the GNU project by Torbjorn Granlund.
+ Contributed to the GNU project by Torbjorn Granlund and Niels Möller.
Based on a suggestion by Peter L. Montgomery.
THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2008, 2009 Free Software Foundation, Inc.
+Copyright 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#include "gmp-impl.h"
#include "longlong.h"
+#ifndef MOD_1_1P_METHOD
+# define MOD_1_1P_METHOD 1 /* need to make sure this is 2 for asm testing */
+#endif
+
+/* Define some longlong.h-style macros, but for wider operations.
+ * add_mssaaaa is like longlong.h's add_ssaaaa, but also generates
+ * carry out, in the form of a mask. */
+
+#if defined (__GNUC__)
+
+#if HAVE_HOST_CPU_FAMILY_x86 && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \
+ __asm__ ( "add %6, %k2\n\t" \
+ "adc %4, %k1\n\t" \
+ "sbb %k0, %k0" \
+ : "=r" (m), "=r" (s1), "=&r" (s0) \
+ : "1" ((USItype)(a1)), "g" ((USItype)(b1)), \
+ "%2" ((USItype)(a0)), "g" ((USItype)(b0)))
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_x86_64 && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \
+ __asm__ ( "add %6, %q2\n\t" \
+ "adc %4, %q1\n\t" \
+ "sbb %q0, %q0" \
+ : "=r" (m), "=r" (s1), "=&r" (s0) \
+ : "1" ((UDItype)(a1)), "rme" ((UDItype)(b1)), \
+ "%2" ((UDItype)(a0)), "rme" ((UDItype)(b0)))
+#endif
+
+#if defined (__sparc__) && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl) \
+ __asm__ ( "addcc %r5, %6, %2\n\t" \
+ "addxcc %r3, %4, %1\n\t" \
+ "subx %%g0, %%g0, %0" \
+ : "=r" (m), "=r" (sh), "=&r" (sl) \
+ : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl) \
+ __CLOBBER_CC)
+#endif
+
+#if defined (__sparc__) && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl) \
+ __asm__ ( "addcc %r5, %6, %2\n\t" \
+ "addccc %r7, %8, %%g0\n\t" \
+ "addccc %r3, %4, %1\n\t" \
+ "clr %0\n\t" \
+ "movcs %%xcc, -1, %0" \
+ : "=r" (m), "=r" (sh), "=&r" (sl) \
+ : "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl), \
+ "rJ" ((al) >> 32), "rI" ((bl) >> 32) \
+ __CLOBBER_CC)
+#endif
+
+#if HAVE_HOST_CPU_FAMILY_powerpc && !defined (_LONG_LONG_LIMB)
+/* This works fine for 32-bit and 64-bit limbs, except for 64-bit limbs with a
+ processor running in 32-bit mode, since the carry flag then gets the 32-bit
+ carry. */
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \
+ __asm__ ( "add%I6c %2, %5, %6\n\t" \
+ "adde %1, %3, %4\n\t" \
+ "subfe %0, %0, %0\n\t" \
+ "nor %0, %0, %0" \
+ : "=r" (m), "=r" (s1), "=&r" (s0) \
+ : "r" (a1), "r" (b1), "%r" (a0), "rI" (b0))
+#endif
+
+#if defined (__s390x__) && W_TYPE_SIZE == 64
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \
+ __asm__ ( "algr %2, %6\n\t" \
+ "alcgr %1, %4\n\t" \
+ "lghi %0, 0\n\t" \
+ "alcgr %0, %0\n\t" \
+ "lcgr %0, %0" \
+ : "=r" (m), "=r" (s1), "=&r" (s0) \
+ : "1" ((UDItype)(a1)), "r" ((UDItype)(b1)), \
+ "%2" ((UDItype)(a0)), "r" ((UDItype)(b0)) __CLOBBER_CC)
+#endif
+
+#if defined (__arm__) && W_TYPE_SIZE == 32
+#define add_mssaaaa(m, sh, sl, ah, al, bh, bl) \
+ __asm__ ( "adds %2, %5, %6\n\t" \
+ "adcs %1, %3, %4\n\t" \
+ "movcc %0, #0\n\t" \
+ "movcs %0, #-1" \
+ : "=r" (m), "=r" (sh), "=&r" (sl) \
+ : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
+#endif
+#endif /* defined (__GNUC__) */
+
+#ifndef add_mssaaaa
+#define add_mssaaaa(m, s1, s0, a1, a0, b1, b0) \
+ do { \
+ UWtype __s0, __s1, __c0, __c1; \
+ __s0 = (a0) + (b0); \
+ __s1 = (a1) + (b1); \
+ __c0 = __s0 < (a0); \
+ __c1 = __s1 < (a1); \
+ (s0) = __s0; \
+ __s1 = __s1 + __c0; \
+ (s1) = __s1; \
+ (m) = - (__c1 + (__s1 < __c0)); \
+ } while (0)
+#endif
+
+#if MOD_1_1P_METHOD == 1
void
mpn_mod_1_1p_cps (mp_limb_t cps[4], mp_limb_t b)
{
b <<= cnt;
invert_limb (bi, b);
- if (UNLIKELY (cnt == 0))
- B1modb = -b;
- else
- B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
- ASSERT (B1modb <= b); /* NB: not fully reduced mod b */
- udiv_rnd_preinv (B2modb, B1modb, b, bi);
-
cps[0] = bi;
cps[1] = cnt;
+
+ B1modb = -b;
+ if (LIKELY (cnt != 0))
+ B1modb *= ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+ ASSERT (B1modb <= b); /* NB: not fully reduced mod b */
cps[2] = B1modb >> cnt;
+
+ /* In the normalized case, this can be simplified to
+ *
+ * B2modb = - b * bi;
+ * ASSERT (B2modb <= b); // NB: equality iff b = B/2
+ */
+ udiv_rnnd_preinv (B2modb, B1modb, 0, b, bi);
cps[3] = B2modb >> cnt;
}
mp_limb_t
mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t bmodb[4])
{
- mp_limb_t rh, rl, bi, q, ph, pl, r;
+ mp_limb_t rh, rl, bi, ph, pl, r;
mp_limb_t B1modb, B2modb;
mp_size_t i;
int cnt;
B1modb = bmodb[2];
B2modb = bmodb[3];
- umul_ppmm (ph, pl, ap[n - 1], B1modb);
+ rl = ap[n - 1];
+ umul_ppmm (ph, pl, rl, B1modb);
add_ssaaaa (rh, rl, ph, pl, 0, ap[n - 2]);
for (i = n - 3; i >= 0; i -= 1)
add_ssaaaa (rh, rl, rh, rl, ph, pl);
}
- bi = bmodb[0];
cnt = bmodb[1];
+ bi = bmodb[0];
if (LIKELY (cnt != 0))
rh = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
mask = -(mp_limb_t) (rh >= b);
rh -= mask & b;
- udiv_qrnnd_preinv (q, r, rh, rl << cnt, b, bi);
+ udiv_rnnd_preinv (r, rh, rl << cnt, b, bi);
+
+ return r >> cnt;
+}
+#endif /* MOD_1_1P_METHOD == 1 */
+
+#if MOD_1_1P_METHOD == 2
+void
+mpn_mod_1_1p_cps (mp_limb_t cps[4], mp_limb_t b)
+{
+ mp_limb_t bi;
+ mp_limb_t B2modb;
+ int cnt;
+
+ count_leading_zeros (cnt, b);
+
+ b <<= cnt;
+ invert_limb (bi, b);
+
+ cps[0] = bi;
+ cps[1] = cnt;
+
+ if (LIKELY (cnt != 0))
+ {
+ mp_limb_t B1modb = -b;
+ B1modb *= ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+ ASSERT (B1modb <= b); /* NB: not fully reduced mod b */
+ cps[2] = B1modb >> cnt;
+ }
+ B2modb = - b * bi;
+ ASSERT (B2modb <= b); // NB: equality iff b = B/2
+ cps[3] = B2modb;
+}
+
+mp_limb_t
+mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t bmodb[4])
+{
+ int cnt;
+ mp_limb_t bi, B1modb;
+ mp_limb_t r0, r1;
+ mp_limb_t r;
+
+ ASSERT (n >= 2); /* fix tuneup.c if this is changed */
+
+ r0 = ap[n-2];
+ r1 = ap[n-1];
+
+ if (n > 2)
+ {
+ mp_limb_t B2modb, B2mb;
+ mp_limb_t p0, p1;
+ mp_limb_t r2;
+ mp_size_t j;
+
+ B2modb = bmodb[3];
+ B2mb = B2modb - b;
+
+ umul_ppmm (p1, p0, r1, B2modb);
+ add_mssaaaa (r2, r1, r0, r0, ap[n-3], p1, p0);
+
+ for (j = n-4; j >= 0; j--)
+ {
+ mp_limb_t cy;
+ /* mp_limb_t t = r0 + B2mb; */
+ umul_ppmm (p1, p0, r1, B2modb);
+
+ ADDC_LIMB (cy, r0, r0, r2 & B2modb);
+ /* Alternative, for cmov: if (cy) r0 = t; */
+ r0 -= (-cy) & b;
+ add_mssaaaa (r2, r1, r0, r0, ap[j], p1, p0);
+ }
+
+ r1 -= (r2 & b);
+ }
+
+ cnt = bmodb[1];
+
+ if (LIKELY (cnt != 0))
+ {
+ mp_limb_t t;
+ mp_limb_t B1modb = bmodb[2];
+
+ umul_ppmm (r1, t, r1, B1modb);
+ r0 += t;
+ r1 += (r0 < t);
+
+ /* Normalize */
+ r1 = (r1 << cnt) | (r0 >> (GMP_LIMB_BITS - cnt));
+ r0 <<= cnt;
+
+ /* NOTE: Might get r1 == b here, but udiv_rnnd_preinv allows
+ that. */
+ }
+ else
+ {
+ mp_limb_t mask = -(mp_limb_t) (r1 >= b);
+ r1 -= mask & b;
+ }
+
+ bi = bmodb[0];
+ udiv_rnnd_preinv (r, r1, r0, b, bi);
return r >> cnt;
}
+#endif /* MOD_1_1P_METHOD == 2 */
Requires that b < B / 2.
Contributed to the GNU project by Torbjorn Granlund.
+ Based on a suggestion by Peter L. Montgomery.
THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2008, 2009 Free Software Foundation, Inc.
+Copyright 2008, 2009, 2010 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
b <<= cnt;
invert_limb (bi, b);
- B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
- ASSERT (B1modb <= b); /* NB: not fully reduced mod b */
- udiv_rnd_preinv (B2modb, B1modb, b, bi);
- udiv_rnd_preinv (B3modb, B2modb, b, bi);
-
cps[0] = bi;
cps[1] = cnt;
+
+ B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+ ASSERT (B1modb <= b); /* NB: not fully reduced mod b */
cps[2] = B1modb >> cnt;
+
+ udiv_rnnd_preinv (B2modb, B1modb, 0, b, bi);
cps[3] = B2modb >> cnt;
+
+ udiv_rnnd_preinv (B3modb, B2modb, 0, b, bi);
cps[4] = B3modb >> cnt;
#if WANT_ASSERT
mp_limb_t
mpn_mod_1s_2p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[5])
{
- mp_limb_t rh, rl, bi, q, ph, pl, ch, cl, r;
+ mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
mp_limb_t B1modb, B2modb, B3modb;
mp_size_t i;
int cnt;
rl = ap[n - 1];
bi = cps[0];
cnt = cps[1];
- udiv_qrnnd_preinv (q, r, rl >> (GMP_LIMB_BITS - cnt),
+ udiv_rnnd_preinv (r, rl >> (GMP_LIMB_BITS - cnt),
rl << cnt, b, bi);
return r >> cnt;
}
}
else
{
- umul_ppmm (rh, rl, ap[n - 1], B1modb);
- add_ssaaaa (rh, rl, rh, rl, 0, ap[n - 2]);
+ rh = ap[n - 1];
+ rl = ap[n - 2];
}
for (i = n - 4; i >= 0; i -= 2)
add_ssaaaa (rh, rl, rh, rl, ph, pl);
}
- bi = cps[0];
- cnt = cps[1];
-
-#if 1
umul_ppmm (rh, cl, rh, B1modb);
add_ssaaaa (rh, rl, rh, rl, 0, cl);
- r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
-#else
- udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
- (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
- ASSERT (q <= 2); /* optimize for small quotient? */
-#endif
- udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
+ cnt = cps[1];
+ bi = cps[0];
+
+ r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+ udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
return r >> cnt;
}
Requires that d < B / 3.
Contributed to the GNU project by Torbjorn Granlund.
+ Based on a suggestion by Peter L. Montgomery.
THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2008, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2008, 2009, 2010, 2013 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
b <<= cnt;
invert_limb (bi, b);
- B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
- ASSERT (B1modb <= b); /* NB: not fully reduced mod b */
- udiv_rnd_preinv (B2modb, B1modb, b, bi);
- udiv_rnd_preinv (B3modb, B2modb, b, bi);
- udiv_rnd_preinv (B4modb, B3modb, b, bi);
-
cps[0] = bi;
cps[1] = cnt;
+
+ B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+ ASSERT (B1modb <= b); /* NB: not fully reduced mod b */
cps[2] = B1modb >> cnt;
+
+ udiv_rnnd_preinv (B2modb, B1modb, 0, b, bi);
cps[3] = B2modb >> cnt;
+
+ udiv_rnnd_preinv (B3modb, B2modb, 0, b, bi);
cps[4] = B3modb >> cnt;
+
+ udiv_rnnd_preinv (B4modb, B3modb, 0, b, bi);
cps[5] = B4modb >> cnt;
#if WANT_ASSERT
mp_limb_t
mpn_mod_1s_3p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[6])
{
- mp_limb_t rh, rl, bi, q, ph, pl, ch, cl, r;
+ mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
mp_limb_t B1modb, B2modb, B3modb, B4modb;
mp_size_t i;
int cnt;
n -= 1;
break;
case 1: /* n mod 3 = 2 */
- umul_ppmm (ph, pl, ap[n - 1], B1modb);
- add_ssaaaa (rh, rl, ph, pl, 0, ap[n - 2]);
+ rh = ap[n - 1];
+ rl = ap[n - 2];
n -= 2;
break;
}
add_ssaaaa (rh, rl, rh, rl, ph, pl);
}
- bi = cps[0];
- cnt = cps[1];
-
-#if 1
umul_ppmm (rh, cl, rh, B1modb);
add_ssaaaa (rh, rl, rh, rl, 0, cl);
- r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
-#else
- udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
- (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
- ASSERT (q <= 3); /* optimize for small quotient? */
-#endif
- udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
+ cnt = cps[1];
+ bi = cps[0];
+
+ r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+ udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
return r >> cnt;
}
-/* mpn_mod_1s_3p (ap, n, b, cps)
+/* mpn_mod_1s_4p (ap, n, b, cps)
Divide (ap,,n) by b. Return the single-limb remainder.
Requires that d < B / 4.
Contributed to the GNU project by Torbjorn Granlund.
+ Based on a suggestion by Peter L. Montgomery.
THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2008, 2009 Free Software Foundation, Inc.
+Copyright 2008, 2009, 2010 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
b <<= cnt;
invert_limb (bi, b);
- B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
- ASSERT (B1modb <= b); /* NB: not fully reduced mod b */
- udiv_rnd_preinv (B2modb, B1modb, b, bi);
- udiv_rnd_preinv (B3modb, B2modb, b, bi);
- udiv_rnd_preinv (B4modb, B3modb, b, bi);
- udiv_rnd_preinv (B5modb, B4modb, b, bi);
-
cps[0] = bi;
cps[1] = cnt;
+
+ B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+ ASSERT (B1modb <= b); /* NB: not fully reduced mod b */
cps[2] = B1modb >> cnt;
+
+ udiv_rnnd_preinv (B2modb, B1modb, 0, b, bi);
cps[3] = B2modb >> cnt;
+
+ udiv_rnnd_preinv (B3modb, B2modb, 0, b, bi);
cps[4] = B3modb >> cnt;
+
+ udiv_rnnd_preinv (B4modb, B3modb, 0, b, bi);
cps[5] = B4modb >> cnt;
+
+ udiv_rnnd_preinv (B5modb, B4modb, 0, b, bi);
cps[6] = B5modb >> cnt;
#if WANT_ASSERT
mp_limb_t
mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[7])
{
- mp_limb_t rh, rl, bi, q, ph, pl, ch, cl, r;
+ mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
mp_size_t i;
int cnt;
n -= 1;
break;
case 2:
- umul_ppmm (ph, pl, ap[n - 1], B1modb);
- add_ssaaaa (rh, rl, ph, pl, 0, ap[n - 2]);
+ rh = ap[n - 1];
+ rl = ap[n - 2];
n -= 2;
break;
case 3:
add_ssaaaa (rh, rl, rh, rl, ph, pl);
}
- bi = cps[0];
- cnt = cps[1];
-
-#if 1
umul_ppmm (rh, cl, rh, B1modb);
add_ssaaaa (rh, rl, rh, rl, 0, cl);
- r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
-#else
- udiv_qrnnd_preinv (q, r, rh >> (GMP_LIMB_BITS - cnt),
- (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt)), b, bi);
- ASSERT (q <= 4); /* optimize for small quotient? */
-#endif
- udiv_qrnnd_preinv (q, r, r, rl << cnt, b, bi);
+ cnt = cps[1];
+ bi = cps[0];
+
+ r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+ udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
return r >> cnt;
}
SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-Copyright 2005, 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2005, 2006, 2007, 2009, 2010, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
mp_size_t qn;
mp_size_t in;
mp_limb_t cy, c0;
- int k;
mp_size_t tn, wn;
- mp_size_t i;
qn = nn - dn;
if (qh != 0)
mpn_sub_n (rp, np, dp, dn);
else
- MPN_COPY (rp, np, dn);
+ MPN_COPY_INCR (rp, np, dn);
if (qn == 0)
return qh; /* Degenerate use. Should we allow this? */
Contributed to the GNU project by Torbjorn Granlund.
Copyright 1991, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2005,
-2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+2006, 2007, 2009, 2010, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#define MUL_BASECASE_MAX_UN 500
#endif
+/* Areas where the different toom algorithms can be called (extracted
+ from the t-toom*.c files, and ignoring small constant offsets):
+
+ 1/6 1/5 1/4 4/13 1/3 3/8 2/5 5/11 1/2 3/5 2/3 3/4 4/5 1 vn/un
+ 4/7 6/7
+ 6/11
+ |--------------------| toom22 (small)
+ || toom22 (large)
+ |xxxx| toom22 called
+ |-------------------------------------| toom32
+ |xxxxxxxxxxxxxxxx| | toom32 called
+ |------------| toom33
+ |x| toom33 called
+ |---------------------------------| | toom42
+ |xxxxxxxxxxxxxxxxxxxxxxxx| | toom42 called
+ |--------------------| toom43
+ |xxxxxxxxxx| toom43 called
+ |-----------------------------| toom52 (unused)
+ |--------| toom44
+ |xxxxxxxx| toom44 called
+ |--------------------| | toom53
+ |xxxxxx| toom53 called
+ |-------------------------| toom62 (unused)
+ |----------------| toom54 (unused)
+ |--------------------| toom63
+ |xxxxxxxxx| | toom63 called
+ |---------------------------------| toom6h
+ |xxxxxxxx| toom6h called
+ |-------------------------| toom8h (32 bit)
+ |------------------------------------------| toom8h (64 bit)
+ |xxxxxxxx| toom8h called
+*/
+
#define TOOM33_OK(an,bn) (6 + 2 * an < 3 * bn)
#define TOOM44_OK(an,bn) (12 + 3 * an < 4 * bn)
/* FIXME: This condition (repeated in the loop below) leaves from a vn*vn
square to a (3vn-1)*vn rectangle. Leaving such a rectangle is hardly
wise; we would get better balance by slightly moving the bound. We
- will sometimes end up with un < vn, like the the X3 arm below. */
+ will sometimes end up with un < vn, like in the X3 arm below. */
if (un >= 3 * vn)
{
mp_limb_t cy;
THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
-
Copyright 1991, 1992, 1993, 1994, 1996, 1997, 2000, 2001, 2002 Free Software
Foundation, Inc.
GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
Copyright 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
-2009, 2010 Free Software Foundation, Inc.
+2009, 2010, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#define HAVE_NATIVE_mpn_add_n_sub_n 1
#endif
-static mp_limb_t mpn_mul_fft_internal
-__GMP_PROTO ((mp_ptr, mp_size_t, int, mp_ptr *, mp_ptr *,
- mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_size_t, int **, mp_ptr, int));
-static void mpn_mul_fft_decompose
-__GMP_PROTO ((mp_ptr, mp_ptr *, int, int, mp_srcptr, mp_size_t, int, int, mp_ptr));
+static mp_limb_t mpn_mul_fft_internal (mp_ptr, mp_size_t, int, mp_ptr *,
+ mp_ptr *, mp_ptr, mp_ptr, mp_size_t,
+ mp_size_t, mp_size_t, int **, mp_ptr, int);
+static void mpn_mul_fft_decompose (mp_ptr, mp_ptr *, int, int, mp_srcptr,
+ mp_size_t, int, int, mp_ptr);
/* Find the best k to use for a mod 2^(m*GMP_NUMB_BITS)+1 FFT for m >= n.
#if TUNE_PROGRAM_BUILD || (defined (MUL_FFT_TABLE3) && defined (SQR_FFT_TABLE3))
-#ifndef FFT_TABLE3_SIZE /* When tuning, this is define in gmp-impl.h */
+#ifndef FFT_TABLE3_SIZE /* When tuning this is defined in gmp-impl.h */
#if defined (MUL_FFT_TABLE3_SIZE) && defined (SQR_FFT_TABLE3_SIZE)
#if MUL_FFT_TABLE3_SIZE > SQR_FFT_TABLE3_SIZE
#define FFT_TABLE3_SIZE MUL_FFT_TABLE3_SIZE
FUNCTION THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST GUARANTEED
THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2004, 2005, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2004, 2005, 2009, 2010, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#define MULLO_MUL_N_THRESHOLD MUL_FFT_THRESHOLD
#endif
-#if TUNE_PROGRAM_BUILD
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
#define MAYBE_range_basecase 1
#define MAYBE_range_toom22 1
#else
--- /dev/null
+/* mpn_mulmid -- middle product
+
+ Contributed by David Harvey.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+#define CHUNK (200 + MULMID_TOOM42_THRESHOLD)
+
+
+void
+mpn_mulmid (mp_ptr rp,
+ mp_srcptr ap, mp_size_t an,
+ mp_srcptr bp, mp_size_t bn)
+{
+ mp_size_t rn, k;
+ mp_ptr scratch, temp;
+
+ ASSERT (an >= bn);
+ ASSERT (bn >= 1);
+ ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, ap, an));
+ ASSERT (! MPN_OVERLAP_P (rp, an - bn + 3, bp, bn));
+
+ if (bn < MULMID_TOOM42_THRESHOLD)
+ {
+ /* region not tall enough to make toom42 worthwhile for any portion */
+
+ if (an < CHUNK)
+ {
+ /* region not too wide either, just call basecase directly */
+ mpn_mulmid_basecase (rp, ap, an, bp, bn);
+ return;
+ }
+
+ /* Region quite wide. For better locality, use basecase on chunks:
+
+ AAABBBCC..
+ .AAABBBCC.
+ ..AAABBBCC
+ */
+
+ k = CHUNK - bn + 1; /* number of diagonals per chunk */
+
+ /* first chunk (marked A in the above diagram) */
+ mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
+
+ /* remaining chunks (B, C, etc) */
+ an -= k;
+
+ while (an >= CHUNK)
+ {
+ mp_limb_t t0, t1, cy;
+ ap += k, rp += k;
+ t0 = rp[0], t1 = rp[1];
+ mpn_mulmid_basecase (rp, ap, CHUNK, bp, bn);
+ ADDC_LIMB (cy, rp[0], rp[0], t0); /* add back saved limbs */
+ MPN_INCR_U (rp + 1, k + 1, t1 + cy);
+ an -= k;
+ }
+
+ if (an >= bn)
+ {
+ /* last remaining chunk */
+ mp_limb_t t0, t1, cy;
+ ap += k, rp += k;
+ t0 = rp[0], t1 = rp[1];
+ mpn_mulmid_basecase (rp, ap, an, bp, bn);
+ ADDC_LIMB (cy, rp[0], rp[0], t0);
+ MPN_INCR_U (rp + 1, an - bn + 2, t1 + cy);
+ }
+
+ return;
+ }
+
+ /* region is tall enough for toom42 */
+
+ rn = an - bn + 1;
+
+ if (rn < MULMID_TOOM42_THRESHOLD)
+ {
+ /* region not wide enough to make toom42 worthwhile for any portion */
+
+ TMP_DECL;
+
+ if (bn < CHUNK)
+ {
+ /* region not too tall either, just call basecase directly */
+ mpn_mulmid_basecase (rp, ap, an, bp, bn);
+ return;
+ }
+
+ /* Region quite tall. For better locality, use basecase on chunks:
+
+ AAAAA....
+ .AAAAA...
+ ..BBBBB..
+ ...BBBBB.
+ ....CCCCC
+ */
+
+ TMP_MARK;
+
+ temp = TMP_ALLOC_LIMBS (rn + 2);
+
+ /* first chunk (marked A in the above diagram) */
+ bp += bn - CHUNK, an -= bn - CHUNK;
+ mpn_mulmid_basecase (rp, ap, an, bp, CHUNK);
+
+ /* remaining chunks (B, C, etc) */
+ bn -= CHUNK;
+
+ while (bn >= CHUNK)
+ {
+ ap += CHUNK, bp -= CHUNK;
+ mpn_mulmid_basecase (temp, ap, an, bp, CHUNK);
+ mpn_add_n (rp, rp, temp, rn + 2);
+ bn -= CHUNK;
+ }
+
+ if (bn)
+ {
+ /* last remaining chunk */
+ ap += CHUNK, bp -= bn;
+ mpn_mulmid_basecase (temp, ap, rn + bn - 1, bp, bn);
+ mpn_add_n (rp, rp, temp, rn + 2);
+ }
+
+ TMP_FREE;
+ return;
+ }
+
+ /* we're definitely going to use toom42 somewhere */
+
+ if (bn > rn)
+ {
+ /* slice region into chunks, use toom42 on all chunks except possibly
+ the last:
+
+ AA....
+ .AA...
+ ..BB..
+ ...BB.
+ ....CC
+ */
+
+ TMP_DECL;
+ TMP_MARK;
+
+ temp = TMP_ALLOC_LIMBS (rn + 2 + mpn_toom42_mulmid_itch (rn));
+ scratch = temp + rn + 2;
+
+ /* first chunk (marked A in the above diagram) */
+ bp += bn - rn;
+ mpn_toom42_mulmid (rp, ap, bp, rn, scratch);
+
+ /* remaining chunks (B, C, etc) */
+ bn -= rn;
+
+ while (bn >= rn)
+ {
+ ap += rn, bp -= rn;
+ mpn_toom42_mulmid (temp, ap, bp, rn, scratch);
+ mpn_add_n (rp, rp, temp, rn + 2);
+ bn -= rn;
+ }
+
+ if (bn)
+ {
+ /* last remaining chunk */
+ ap += rn, bp -= bn;
+ mpn_mulmid (temp, ap, rn + bn - 1, bp, bn);
+ mpn_add_n (rp, rp, temp, rn + 2);
+ }
+
+ TMP_FREE;
+ }
+ else
+ {
+ /* slice region into chunks, use toom42 on all chunks except possibly
+ the last:
+
+ AAABBBCC..
+ .AAABBBCC.
+ ..AAABBBCC
+ */
+
+ TMP_DECL;
+ TMP_MARK;
+
+ scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (bn));
+
+ /* first chunk (marked A in the above diagram) */
+ mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
+
+ /* remaining chunks (B, C, etc) */
+ rn -= bn;
+
+ while (rn >= bn)
+ {
+ mp_limb_t t0, t1, cy;
+ ap += bn, rp += bn;
+ t0 = rp[0], t1 = rp[1];
+ mpn_toom42_mulmid (rp, ap, bp, bn, scratch);
+ ADDC_LIMB (cy, rp[0], rp[0], t0); /* add back saved limbs */
+ MPN_INCR_U (rp + 1, bn + 1, t1 + cy);
+ rn -= bn;
+ }
+
+ TMP_FREE;
+
+ if (rn)
+ {
+ /* last remaining chunk */
+ mp_limb_t t0, t1, cy;
+ ap += bn, rp += bn;
+ t0 = rp[0], t1 = rp[1];
+ mpn_mulmid (rp, ap, rn + bn - 1, bp, bn);
+ ADDC_LIMB (cy, rp[0], rp[0], t0);
+ MPN_INCR_U (rp + 1, rn + 1, t1 + cy);
+ }
+ }
+}
--- /dev/null
+/* mpn_mulmid_basecase -- classical middle product algorithm
+
+ Contributed by David Harvey.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* Middle product of {up,un} and {vp,vn}, write result to {rp,un-vn+3}.
+ Must have un >= vn >= 1.
+
+ Neither input buffer may overlap with the output buffer. */
+
+void
+mpn_mulmid_basecase (mp_ptr rp,
+ mp_srcptr up, mp_size_t un,
+ mp_srcptr vp, mp_size_t vn)
+{
+ mp_limb_t lo, hi; /* last two limbs of output */
+ mp_limb_t cy;
+
+ ASSERT (un >= vn);
+ ASSERT (vn >= 1);
+ ASSERT (! MPN_OVERLAP_P (rp, un - vn + 3, up, un));
+ ASSERT (! MPN_OVERLAP_P (rp, un - vn + 3, vp, vn));
+
+ up += vn - 1;
+ un -= vn - 1;
+
+ /* multiply by first limb, store result */
+ lo = mpn_mul_1 (rp, up, un, vp[0]);
+ hi = 0;
+
+ /* accumulate remaining rows */
+ for (vn--; vn; vn--)
+ {
+ up--, vp++;
+ cy = mpn_addmul_1 (rp, up, un, vp[0]);
+ add_ssaaaa (hi, lo, hi, lo, 0, cy);
+ }
+
+ /* store final limbs */
+#if GMP_NAIL_BITS != 0
+ hi = (hi << GMP_NAIL_BITS) + (lo >> GMP_NUMB_BITS);
+ lo &= GMP_NUMB_MASK;
+#endif
+
+ rp[un] = lo;
+ rp[un + 1] = hi;
+}
--- /dev/null
+/* mpn_mulmid_n -- balanced middle product
+
+ Contributed by David Harvey.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+void
+mpn_mulmid_n (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n)
+{
+ ASSERT (n >= 1);
+ ASSERT (! MPN_OVERLAP_P (rp, n + 2, ap, 2*n - 1));
+ ASSERT (! MPN_OVERLAP_P (rp, n + 2, bp, n));
+
+ if (n < MULMID_TOOM42_THRESHOLD)
+ {
+ mpn_mulmid_basecase (rp, ap, 2*n - 1, bp, n);
+ }
+ else
+ {
+ mp_ptr scratch;
+ TMP_DECL;
+ TMP_MARK;
+ scratch = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (n));
+ mpn_toom42_mulmid (rp, ap, bp, n, scratch);
+ TMP_FREE;
+ }
+}
SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2009, 2010 Free Software Foundation, Inc.
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
Contributed to the GNU project by Martin Boij.
-Copyright 2009, 2010 Free Software Foundation, Inc.
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#define SMALL 20
#define MEDIUM 100
-/*
- Returns non-zero if {np,nn} == {xp,xn} ^ k.
+/* Return non-zero if {np,nn} == {xp,xn} ^ k.
Algorithm:
- For s = 1, 2, 4, ..., s_max, compute the s least significant
- limbs of {xp,xn}^k. Stop if they don't match the s least
- significant limbs of {np,nn}.
-*/
+ For s = 1, 2, 4, ..., s_max, compute the s least significant limbs of
+ {xp,xn}^k. Stop if they don't match the s least significant limbs of
+ {np,nn}.
+
+ FIXME: Low xn limbs can be expected to always match, if computed as a mod
+ B^{xn} root. So instead of using mpn_powlo, compute an approximation of the
+ most significant (normalized) limb of {xp,xn} ^ k (and an error bound), and
+ compare to {np, nn}. Or use an even cruder approximation based on fix-point
+ base 2 logarithm. */
static int
-pow_equals (mp_srcptr np, mp_size_t nn,
+pow_equals (mp_srcptr np, mp_size_t n,
mp_srcptr xp,mp_size_t xn,
mp_limb_t k, mp_bitcnt_t f,
mp_ptr tp)
{
mp_limb_t *tp2;
- mp_bitcnt_t y, z, count;
+ mp_bitcnt_t y, z;
mp_size_t i, bn;
int ans;
mp_limb_t h, l;
TMP_DECL;
- ASSERT (nn > 1 || (nn == 1 && np[0] > 1));
- ASSERT (np[nn - 1] > 0);
+ ASSERT (n > 1 || (n == 1 && np[0] > 1));
+ ASSERT (np[n - 1] > 0);
ASSERT (xn > 0);
if (xn == 1 && xp[0] == 1)
return 0;
- z = 1 + (nn >> 1);
+ z = 1 + (n >> 1);
for (bn = 1; bn < z; bn <<= 1)
{
mpn_powlo (tp, xp, &k, 1, bn, tp + bn);
TMP_MARK;
- /* Final check. Estimate the size of {xp,xn}^k before computing
- the power with full precision.
- Optimization: It might pay off to make a more accurate estimation of
- the logarithm of {xp,xn}, rather than using the index of the MSB.
- */
+ /* Final check. Estimate the size of {xp,xn}^k before computing the power
+ with full precision. Optimization: It might pay off to make a more
+ accurate estimation of the logarithm of {xp,xn}, rather than using the
+ index of the MSB. */
- count_leading_zeros (count, xp[xn - 1]);
- y = xn * GMP_LIMB_BITS - count - 1; /* msb_index (xp, xn) */
+ MPN_SIZEINBASE_2EXP(y, xp, xn, 1);
+ y -= 1; /* msb_index (xp, xn) */
umul_ppmm (h, l, k, y);
h -= l == 0; l--; /* two-limb decrement */
- z = f - 1; /* msb_index (np, nn) */
+ z = f - 1; /* msb_index (np, n) */
if (h == 0 && l <= z)
{
mp_limb_t size;
tp2 = TMP_ALLOC_LIMBS (y);
i = mpn_pow_1 (tp, xp, xn, k, tp2);
- if (i == nn && mpn_cmp (tp, np, nn) == 0)
+ if (i == n && mpn_cmp (tp, np, n) == 0)
ans = 1;
else
ans = 0;
return ans;
}
-/*
- Computes rp such that rp^k * yp = 1 (mod 2^b).
- Algorithm:
- Apply Hensel lifting repeatedly, each time
- doubling (approx.) the number of known bits in rp.
-*/
-static void
-binv_root (mp_ptr rp, mp_srcptr yp,
- mp_limb_t k, mp_size_t bn,
- mp_bitcnt_t b, mp_ptr tp)
-{
- mp_limb_t *tp2 = tp + bn, *tp3 = tp + 2 * bn, di, k2 = k + 1;
- mp_bitcnt_t order[GMP_LIMB_BITS * 2];
- int i, d = 0;
-
- ASSERT (bn > 0);
- ASSERT (b > 0);
- ASSERT ((k & 1) != 0);
-
- binvert_limb (di, k);
-
- rp[0] = 1;
- for (; b != 1; b = (b + 1) >> 1)
- order[d++] = b;
-
- for (i = d - 1; i >= 0; i--)
- {
- b = order[i];
- bn = 1 + (b - 1) / GMP_LIMB_BITS;
-
- mpn_mul_1 (tp, rp, bn, k2);
-
- mpn_powlo (tp2, rp, &k2, 1, bn, tp3);
- mpn_mullo_n (rp, yp, tp2, bn);
-
- mpn_sub_n (tp2, tp, rp, bn);
- mpn_pi1_bdiv_q_1 (rp, tp2, bn, k, di, 0);
- if ((b % GMP_LIMB_BITS) != 0)
- rp[(b - 1) / GMP_LIMB_BITS] &= (((mp_limb_t) 1) << (b % GMP_LIMB_BITS)) - 1;
- }
- return;
-}
-/*
- Computes rp such that rp^2 * yp = 1 (mod 2^{b+1}).
- Returns non-zero if such an integer rp exists.
-*/
-static int
-binv_sqroot (mp_ptr rp, mp_srcptr yp,
- mp_size_t bn, mp_bitcnt_t b,
- mp_ptr tp)
-{
- mp_limb_t k = 3, *tp2 = tp + bn, *tp3 = tp + (bn << 1);
- mp_bitcnt_t order[GMP_LIMB_BITS * 2];
- int i, d = 0;
-
- ASSERT (bn > 0);
- ASSERT (b > 0);
-
- rp[0] = 1;
- if (b == 1)
- {
- if ((yp[0] & 3) != 1)
- return 0;
- }
- else
- {
- if ((yp[0] & 7) != 1)
- return 0;
-
- for (; b != 2; b = (b + 2) >> 1)
- order[d++] = b;
-
- for (i = d - 1; i >= 0; i--)
- {
- b = order[i];
- bn = 1 + b / GMP_LIMB_BITS;
-
- mpn_mul_1 (tp, rp, bn, k);
-
- mpn_powlo (tp2, rp, &k, 1, bn, tp3);
- mpn_mullo_n (rp, yp, tp2, bn);
-
-#if HAVE_NATIVE_mpn_rsh1sub_n
- mpn_rsh1sub_n (rp, tp, rp, bn);
-#else
- mpn_sub_n (tp2, tp, rp, bn);
- mpn_rshift (rp, tp2, bn, 1);
-#endif
- rp[b / GMP_LIMB_BITS] &= (((mp_limb_t) 1) << (b % GMP_LIMB_BITS)) - 1;
- }
- }
- return 1;
-}
-
-/*
- Returns non-zero if {np,nn} is a kth power.
-*/
+/* Return non-zero if N = {np,n} is a kth power.
+ I = {ip,n} = N^(-1) mod B^n. */
static int
is_kth_power (mp_ptr rp, mp_srcptr np,
- mp_limb_t k, mp_srcptr yp,
- mp_size_t nn, mp_bitcnt_t f,
+ mp_limb_t k, mp_srcptr ip,
+ mp_size_t n, mp_bitcnt_t f,
mp_ptr tp)
{
- mp_limb_t x, c;
mp_bitcnt_t b;
- mp_size_t i, rn, xn;
+ mp_size_t rn, xn;
- ASSERT (nn > 0);
- ASSERT (((k & 1) != 0) || (k == 2));
+ ASSERT (n > 0);
+ ASSERT ((k & 1) != 0 || k == 2);
ASSERT ((np[0] & 1) != 0);
if (k == 2)
{
b = (f + 1) >> 1;
rn = 1 + b / GMP_LIMB_BITS;
- if (binv_sqroot (rp, yp, rn, b, tp) != 0)
+ if (mpn_bsqrtinv (rp, ip, b, tp) != 0)
{
+ rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
xn = rn;
MPN_NORMALIZE (rp, xn);
- if (pow_equals (np, nn, rp, xn, k, f, tp) != 0)
+ if (pow_equals (np, n, rp, xn, k, f, tp) != 0)
return 1;
- /* Check if (2^b - rp)^2 == np */
- c = 0;
- for (i = 0; i < rn; i++)
- {
- x = rp[i];
- rp[i] = -x - c;
- c |= (x != 0);
- }
- rp[rn - 1] &= (((mp_limb_t) 1) << (b % GMP_LIMB_BITS)) - 1;
+ /* Check if (2^b - r)^2 == n */
+ mpn_neg (rp, rp, rn);
+ rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
MPN_NORMALIZE (rp, rn);
- if (pow_equals (np, nn, rp, rn, k, f, tp) != 0)
+ if (pow_equals (np, n, rp, rn, k, f, tp) != 0)
return 1;
}
}
{
b = 1 + (f - 1) / k;
rn = 1 + (b - 1) / GMP_LIMB_BITS;
- binv_root (rp, yp, k, rn, b, tp);
+ mpn_brootinv (rp, ip, rn, k, tp);
+ if ((b % GMP_LIMB_BITS) != 0)
+ rp[rn - 1] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
MPN_NORMALIZE (rp, rn);
- if (pow_equals (np, nn, rp, rn, k, f, tp) != 0)
+ if (pow_equals (np, n, rp, rn, k, f, tp) != 0)
return 1;
}
MPN_ZERO (rp, rn); /* Untrash rp */
}
static int
-perfpow (mp_srcptr np, mp_size_t nn,
+perfpow (mp_srcptr np, mp_size_t n,
mp_limb_t ub, mp_limb_t g,
mp_bitcnt_t f, int neg)
{
- mp_limb_t *yp, *tp, k = 0, *rp1;
- int ans = 0;
+ mp_ptr ip, tp, rp;
+ mp_limb_t k;
+ int ans;
mp_bitcnt_t b;
gmp_primesieve_t ps;
TMP_DECL;
- ASSERT (nn > 0);
+ ASSERT (n > 0);
ASSERT ((np[0] & 1) != 0);
ASSERT (ub > 0);
gmp_init_primesieve (&ps);
b = (f + 3) >> 1;
- yp = TMP_ALLOC_LIMBS (nn);
- rp1 = TMP_ALLOC_LIMBS (nn);
- tp = TMP_ALLOC_LIMBS (5 * nn); /* FIXME */
- MPN_ZERO (rp1, nn);
-
- mpn_binvert (yp, np, 1 + (b - 1) / GMP_LIMB_BITS, tp);
+ ip = TMP_ALLOC_LIMBS (n);
+ rp = TMP_ALLOC_LIMBS (n);
+ tp = TMP_ALLOC_LIMBS (5 * n); /* FIXME */
+ MPN_ZERO (rp, n);
+
+ /* FIXME: It seems the inverse in ninv is needed only to get non-inverted
+ roots. I.e., is_kth_power computes n^{1/2} as (n^{-1})^{-1/2} and
+ similarly for nth roots. It should be more efficient to compute n^{1/2} as
+ n * n^{-1/2}, with a mullo instead of a binvert. And we can do something
+ similar for kth roots if we switch to an iteration converging to n^{1/k -
+ 1}, and we can then eliminate this binvert call. */
+ mpn_binvert (ip, np, 1 + (b - 1) / GMP_LIMB_BITS, tp);
if (b % GMP_LIMB_BITS)
- yp[(b - 1) / GMP_LIMB_BITS] &= (((mp_limb_t) 1) << (b % GMP_LIMB_BITS)) - 1;
+ ip[(b - 1) / GMP_LIMB_BITS] &= (CNST_LIMB(1) << (b % GMP_LIMB_BITS)) - 1;
if (neg)
gmp_nextprime (&ps);
+ ans = 0;
if (g > 0)
{
ub = MIN (ub, g + 1);
{
if ((g % k) == 0)
{
- if (is_kth_power (rp1, np, k, yp, nn, f, tp) != 0)
+ if (is_kth_power (rp, np, k, ip, n, f, tp) != 0)
{
ans = 1;
goto ret;
{
while ((k = gmp_nextprime (&ps)) < ub)
{
- if (is_kth_power (rp1, np, k, yp, nn, f, tp) != 0)
+ if (is_kth_power (rp, np, k, ip, n, f, tp) != 0)
{
ans = 1;
goto ret;
static const unsigned short nrtrial[] = { 100, 500, 1000 };
-/* Table of (log_{p_i} 2) values, where p_i is
- the (nrtrial[i] + 1)'th prime number.
-*/
-static const double logs[] = { 0.1099457228193620, 0.0847016403115322, 0.0772048195144415 };
+/* Table of (log_{p_i} 2) values, where p_i is the (nrtrial[i] + 1)'th prime
+ number. */
+static const double logs[] =
+ { 0.1099457228193620, 0.0847016403115322, 0.0772048195144415 };
int
-mpn_perfect_power_p (mp_srcptr np, mp_size_t nn)
+mpn_perfect_power_p (mp_srcptr np, mp_size_t n)
{
mp_size_t ncn, s, pn, xn;
- mp_limb_t *nc, factor, g = 0;
+ mp_limb_t *nc, factor, g;
mp_limb_t exp, *prev, *next, d, l, r, c, *tp, cry;
- mp_bitcnt_t twos = 0, count;
- int ans, where = 0, neg = 0, trial;
+ mp_bitcnt_t twos, count;
+ int ans, where, neg, trial;
TMP_DECL;
nc = (mp_ptr) np;
- if (nn < 0)
+ neg = 0;
+ if (n < 0)
{
neg = 1;
- nn = -nn;
+ n = -n;
}
- if (nn == 0 || (nn == 1 && np[0] == 1))
+ if (n == 0 || (n == 1 && np[0] == 1))
return 1;
TMP_MARK;
- ncn = nn;
+ g = 0;
+
+ ncn = n;
twos = mpn_scan1 (np, 0);
if (twos > 0)
{
goto ret;
}
s = twos / GMP_LIMB_BITS;
- if (s + 1 == nn && POW2_P (np[s]))
+ if (s + 1 == n && POW2_P (np[s]))
{
ans = ! (neg && POW2_P (twos));
goto ret;
}
count = twos % GMP_LIMB_BITS;
- ncn = nn - s;
+ ncn = n - s;
nc = TMP_ALLOC_LIMBS (ncn);
if (count > 0)
{
else
trial = 2;
+ where = 0;
factor = mpn_trialdiv (nc, ncn, nrtrial[trial], &where);
if (factor != 0)
MPN_COPY (nc, np, ncn);
}
- /* Remove factors found by trialdiv.
- Optimization: Perhaps better to use
- the strategy in mpz_remove ().
- */
+ /* Remove factors found by trialdiv. Optimization: Perhaps better to use
+ the strategy in mpz_remove (). */
prev = TMP_ALLOC_LIMBS (ncn + 2);
next = TMP_ALLOC_LIMBS (ncn + 2);
tp = TMP_ALLOC_LIMBS (4 * ncn);
while (factor != 0);
}
- count_leading_zeros (count, nc[ncn-1]);
- count = GMP_LIMB_BITS * ncn - count; /* log (nc) + 1 */
+ MPN_SIZEINBASE_2EXP(count, nc, ncn, 1); /* log (nc) + 1 */
d = (mp_limb_t) (count * logs[trial] + 1e-9) + 1;
ans = perfpow (nc, ncn, d, g, count, neg);
/* mpn_perfect_square_p(u,usize) -- Return non-zero if U is a perfect square,
zero otherwise.
-Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005, 2012 Free
+Software Foundation, Inc.
This file is part of the GNU MP Library.
/* FIXME: The %= here isn't good, and might destroy any savings from keeping
the PERFSQR_MOD_IDX stuff within a limb (rather than needing umul_ppmm).
Maybe a new sort of mpn_preinv_mod_1 could accept an unnormalized divisor
- and a shift count, like mpn_preinv_divrem_1. But mod_34lsub1 is our
- normal case, so lets not worry too much about mod_1. */
+ and a shift count, like mpn_preinv_divrem_1. But mod_34lsub1 is our
+ normal case, so lets not worry too much about mod_1. */
#define PERFSQR_MOD_PP(r, up, usize) \
do { \
if (BELOW_THRESHOLD (usize, PREINV_MOD_1_TO_MOD_1_THRESHOLD)) \
} while (0)
/* The expression "(int) idx - GMP_LIMB_BITS < 0" lets the compiler use the
- sign bit from "idx-GMP_LIMB_BITS", which might help avoid a branch. */
+ sign bit from "idx-GMP_LIMB_BITS", which might help avoid a branch. */
#define PERFSQR_MOD_2(r, d, inv, mhi, mlo) \
do { \
mp_limb_t m; \
/* Check that we have even multiplicity of 2, and then check that the rest is
a possible perfect square. Leave disabled until we can determine this
really is an improvement. It it is, it could completely replace the
- simple probe above, since this should through out more non-squares, but at
+ simple probe above, since this should throw out more non-squares, but at
the expense of somewhat more cycles. */
{
mp_limb_t lo;
/* mpn_popcount, mpn_hamdist -- mpn bit population count/hamming distance.
-Copyright 1994, 1996, 2000, 2001, 2002, 2005, 2011 Free Software Foundation, Inc.
+Copyright 1994, 1996, 2000, 2001, 2002, 2005, 2011, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
/* mpn_powlo -- Compute R = U^E mod B^n, where B is the limb base.
-Copyright 2007, 2008, 2009 Free Software Foundation, Inc.
+Copyright 2007, 2008, 2009, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
TMP_MARK;
- count_leading_zeros (cnt, ep[en - 1]);
- ebi = (mp_bitcnt_t) en * GMP_LIMB_BITS - cnt;
+ MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
windowsize = win_size (ebi);
SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2007, 2008, 2009 Free Software Foundation, Inc.
+Copyright 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#include "gmp-impl.h"
#include "longlong.h"
+#undef MPN_REDC_1
+#define MPN_REDC_1(rp, up, mp, n, invm) \
+ do { \
+ mp_limb_t cy; \
+ cy = mpn_redc_1 (rp, up, mp, n, invm); \
+ if (cy != 0) \
+ mpn_sub_n (rp, rp, mp, n); \
+ } while (0)
+
+#undef MPN_REDC_2
+#define MPN_REDC_2(rp, up, mp, n, mip) \
+ do { \
+ mp_limb_t cy; \
+ cy = mpn_redc_2 (rp, up, mp, n, mip); \
+ if (cy != 0) \
+ mpn_sub_n (rp, rp, mp, n); \
+ } while (0)
+
#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
#define WANT_REDC_2 1
#endif
TMP_MARK;
- count_leading_zeros (cnt, ep[en - 1]);
- ebi = (mp_bitcnt_t) en * GMP_LIMB_BITS - cnt;
+ MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
#if 0
if (bn < n)
mpn_sqr (tp, this_pp, n);
#if WANT_REDC_2
if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
- mpn_redc_1 (rp, tp, mp, n, mip[0]);
+ MPN_REDC_1 (rp, tp, mp, n, mip[0]);
else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
- mpn_redc_2 (rp, tp, mp, n, mip);
+ MPN_REDC_2 (rp, tp, mp, n, mip);
#else
if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
- mpn_redc_1 (rp, tp, mp, n, mip[0]);
+ MPN_REDC_1 (rp, tp, mp, n, mip[0]);
#endif
else
mpn_redc_n (rp, tp, mp, n, mip);
this_pp += n;
#if WANT_REDC_2
if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
- mpn_redc_1 (this_pp, tp, mp, n, mip[0]);
+ MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
- mpn_redc_2 (this_pp, tp, mp, n, mip);
+ MPN_REDC_2 (this_pp, tp, mp, n, mip);
#else
if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
- mpn_redc_1 (this_pp, tp, mp, n, mip[0]);
+ MPN_REDC_1 (this_pp, tp, mp, n, mip[0]);
#endif
else
mpn_redc_n (this_pp, tp, mp, n, mip);
{
if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
{
+ if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
+ || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP;
+ }
+ else
+ {
#undef MPN_MUL_N
#undef MPN_SQR
#undef MPN_REDUCE
#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0])
- INNERLOOP;
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP;
+ }
}
else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
{
+ if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+ || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
+ INNERLOOP;
+ }
+ else
+ {
#undef MPN_MUL_N
#undef MPN_SQR
#undef MPN_REDUCE
#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_2 (rp, tp, mp, n, mip)
- INNERLOOP;
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
+ INNERLOOP;
+ }
}
else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
{
#undef MPN_REDUCE
#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_2 (rp, tp, mp, n, mip)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
INNERLOOP;
}
else
{
if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
{
+ if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+ || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP;
+ }
+ else
+ {
#undef MPN_MUL_N
#undef MPN_SQR
#undef MPN_REDUCE
#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0])
- INNERLOOP;
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP;
+ }
}
else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
{
#undef MPN_REDUCE
#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0])
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
INNERLOOP;
}
else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
#undef MPN_REDUCE
#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_2 (rp, tp, mp, n, mip)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
INNERLOOP;
}
else
{
if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
{
+ if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
+ || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP;
+ }
+ else
+ {
#undef MPN_MUL_N
#undef MPN_SQR
#undef MPN_REDUCE
#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0])
- INNERLOOP;
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP;
+ }
}
else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
{
+ if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+ || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
+ INNERLOOP;
+ }
+ else
+ {
#undef MPN_MUL_N
#undef MPN_SQR
#undef MPN_REDUCE
#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
- INNERLOOP;
+ INNERLOOP;
+ }
}
else
{
{
if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
{
+ if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+ || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP;
+ }
+ else
+ {
#undef MPN_MUL_N
#undef MPN_SQR
#undef MPN_REDUCE
#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0])
- INNERLOOP;
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP;
+ }
}
else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
{
#undef MPN_REDUCE
#define MPN_MUL_N(r,a,b,n) mpn_mul_n (r,a,b,n)
#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
-#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_1 (rp, tp, mp, n, mip[0])
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
INNERLOOP;
}
else
#if WANT_REDC_2
if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
- mpn_redc_1 (rp, tp, mp, n, mip[0]);
+ MPN_REDC_1 (rp, tp, mp, n, mip[0]);
else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
- mpn_redc_2 (rp, tp, mp, n, mip);
+ MPN_REDC_2 (rp, tp, mp, n, mip);
#else
if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
- mpn_redc_1 (rp, tp, mp, n, mip[0]);
+ MPN_REDC_1 (rp, tp, mp, n, mip[0]);
#endif
else
mpn_redc_n (rp, tp, mp, n, mip);
* Choose window size without looping. (Superoptimize or think(tm).)
- * Call new division functions, not mpn_tdiv_qr.
+ * Call side-channel silent division function for converting to REDC residue.
+
+ * REDC_1_TO_REDC_2_THRESHOLD might actually represent the cutoff between
+ redc_1 and redc_n. On such systems, we will switch to redc_2 causing
+ slowdown.
*/
#include "gmp.h"
#include "gmp-impl.h"
#include "longlong.h"
-#define WANT_CACHE_SECURITY 1
+#undef MPN_REDC_1_SEC
+#define MPN_REDC_1_SEC(rp, up, mp, n, invm) \
+ do { \
+ mp_limb_t cy; \
+ cy = mpn_redc_1 (rp, up, mp, n, invm); \
+ mpn_subcnd_n (rp, rp, mp, n, cy); \
+ } while (0)
+#undef MPN_REDC_2_SEC
+#define MPN_REDC_2_SEC(rp, up, mp, n, mip) \
+ do { \
+ mp_limb_t cy; \
+ cy = mpn_redc_2 (rp, up, mp, n, mip); \
+ mpn_subcnd_n (rp, rp, mp, n, cy); \
+ } while (0)
+
+#if HAVE_NATIVE_mpn_addmul_2 || HAVE_NATIVE_mpn_redc_2
+#define WANT_REDC_2 1
+#endif
/* Define our own mpn squaring function. We do this since we cannot use a
native mpn_sqr_basecase over TUNE_SQR_TOOM2_MAX, or a non-native one over
#endif
#define getbit(p,bi) \
- ((p[(bi - 1) / GMP_LIMB_BITS] >> (bi - 1) % GMP_LIMB_BITS) & 1)
+ ((p[(bi - 1) / GMP_NUMB_BITS] >> (bi - 1) % GMP_NUMB_BITS) & 1)
static inline mp_limb_t
getbits (const mp_limb_t *p, mp_bitcnt_t bi, int nbits)
else
{
bi -= nbits; /* bit index of low bit to extract */
- i = bi / GMP_LIMB_BITS; /* word index of low bit to extract */
- bi %= GMP_LIMB_BITS; /* bit index in low word */
+ i = bi / GMP_NUMB_BITS; /* word index of low bit to extract */
+ bi %= GMP_NUMB_BITS; /* bit index in low word */
r = p[i] >> bi; /* extract (low) bits */
- nbits_in_r = GMP_LIMB_BITS - bi; /* number of bits now in r */
+ nbits_in_r = GMP_NUMB_BITS - bi; /* number of bits now in r */
if (nbits_in_r < nbits) /* did we get enough bits? */
r += p[i + 1] << nbits_in_r; /* prepend bits from higher word */
return r & (((mp_limb_t ) 1 << nbits) - 1);
}
}
+#ifndef POWM_SEC_TABLE
+#if GMP_NUMB_BITS < 50
+#define POWM_SEC_TABLE 2,33,96,780,2741
+#else
+#define POWM_SEC_TABLE 2,130,524,2578
+#endif
+#endif
+
+#if TUNE_PROGRAM_BUILD
+extern int win_size (mp_bitcnt_t);
+#else
static inline int
win_size (mp_bitcnt_t eb)
{
int k;
- static mp_bitcnt_t x[] = {0,4,27,100,325,1026,2905,7848,20457,51670,~(mp_bitcnt_t)0};
+ static mp_bitcnt_t x[] = {0,POWM_SEC_TABLE,~(mp_bitcnt_t)0};
for (k = 1; eb > x[k]; k++)
;
return k;
}
+#endif
-/* Convert U to REDC form, U_r = B^n * U mod M */
+/* Convert U to REDC form, U_r = B^n * U mod M.
+ Uses scratch space at tp of size 2un + n + 1. */
static void
redcify (mp_ptr rp, mp_srcptr up, mp_size_t un, mp_srcptr mp, mp_size_t n, mp_ptr tp)
{
+#if 0
mp_ptr qp;
- qp = tp + un + n;
+ qp = tp + un + n; /* un + n - n + 1 = un + 1 limbs */
MPN_ZERO (tp, n);
MPN_COPY (tp + n, up, un);
+
mpn_tdiv_qr (qp, rp, 0L, tp, un + n, mp, n);
+#else
+ /* FIXME: Use passed scratch space instead of allocating our own! */
+ mp_ptr scratch;
+ TMP_DECL;
+ TMP_MARK;
+
+ MPN_ZERO (tp, n);
+ MPN_COPY (tp + n, up, un);
+
+ scratch = TMP_ALLOC_LIMBS ((un + n) + 2 * n + 2);
+ mpn_sb_div_r_sec (tp, un + n, mp, n, scratch);
+ MPN_COPY (rp, tp, n);
+ TMP_FREE;
+#endif
}
/* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
- Requires that mp[n-1..0] is odd. FIXME: is this true?
- Requires that ep[en-1..0] is > 1.
- Uses scratch space at tp of 3n+1 limbs. */
+ Requires that mp[n-1..0] is odd.
+ Requires that ep[en-1..0] > 1.
+ Uses scratch space at tp as defined by mpn_powm_sec_itch. */
void
mpn_powm_sec (mp_ptr rp, mp_srcptr bp, mp_size_t bn,
mp_srcptr ep, mp_size_t en,
mp_srcptr mp, mp_size_t n, mp_ptr tp)
{
- mp_limb_t minv;
- int cnt;
+ mp_limb_t ip[2], *mip;
mp_bitcnt_t ebi;
int windowsize, this_windowsize;
mp_limb_t expbits;
ASSERT (en > 1 || (en == 1 && ep[0] > 0));
ASSERT (n >= 1 && ((mp[0] & 1) != 0));
- count_leading_zeros (cnt, ep[en - 1]);
- ebi = (mp_bitcnt_t) en * GMP_LIMB_BITS - cnt;
+ MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
windowsize = win_size (ebi);
- binvert_limb (minv, mp[0]);
- minv = -minv;
+#if WANT_REDC_2
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+ {
+ mip = ip;
+ binvert_limb (mip[0], mp[0]);
+ mip[0] = -mip[0];
+ }
+ else
+ {
+ mip = ip;
+ mpn_binvert (mip, mp, 2, tp);
+ mip[0] = -mip[0]; mip[1] = ~mip[1];
+ }
+#else
+ mip = ip;
+ binvert_limb (mip[0], mp[0]);
+ mip[0] = -mip[0];
+#endif
+
- pp = tp + 4 * n;
+ pp = tp;
+ tp += (n << windowsize); /* put tp after power table */
+ /* Compute pp[0] table entry */
+ /* scratch: | n | 1 | n+2 | */
+ /* | pp[0] | 1 | redcify | */
this_pp = pp;
this_pp[n] = 1;
- redcify (this_pp, this_pp + n, 1, mp, n, tp + 6 * n);
+ redcify (this_pp, this_pp + n, 1, mp, n, this_pp + n + 1);
this_pp += n;
- redcify (this_pp, bp, bn, mp, n, tp + 6 * n);
+
+ /* Compute pp[1] table entry. To avoid excessive scratch usage in the
+ degenerate situation where B >> M, we let redcify use scratch space which
+ will later be used by the pp table (element 2 and up). */
+ /* scratch: | n | n | bn + n + 1 | */
+ /* | pp[0] | pp[1] | redcify | */
+ redcify (this_pp, bp, bn, mp, n, this_pp + n);
/* Precompute powers of b and put them in the temporary area at pp. */
+ /* scratch: | n | n | ... | | 2n | */
+ /* | pp[0] | pp[1] | ... | pp[2^windowsize-1] | product | */
for (i = (1 << windowsize) - 2; i > 0; i--)
{
mpn_mul_basecase (tp, this_pp, n, pp + n, n);
this_pp += n;
- mpn_redc_1_sec (this_pp, tp, mp, n, minv);
+#if WANT_REDC_2
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+ MPN_REDC_1_SEC (this_pp, tp, mp, n, mip[0]);
+ else
+ MPN_REDC_2_SEC (this_pp, tp, mp, n, mip);
+#else
+ MPN_REDC_1_SEC (this_pp, tp, mp, n, mip[0]);
+#endif
}
expbits = getbits (ep, ebi, windowsize);
else
ebi -= windowsize;
-#if WANT_CACHE_SECURITY
mpn_tabselect (rp, pp, n, 1 << windowsize, expbits);
-#else
- MPN_COPY (rp, pp + n * expbits, n);
-#endif
- while (ebi != 0)
- {
- expbits = getbits (ep, ebi, windowsize);
- this_windowsize = windowsize;
- if (ebi < windowsize)
- {
- this_windowsize -= windowsize - ebi;
- ebi = 0;
- }
- else
- ebi -= windowsize;
-
- do
- {
- mpn_local_sqr (tp, rp, n, tp + 2 * n);
- mpn_redc_1_sec (rp, tp, mp, n, minv);
- this_windowsize--;
- }
- while (this_windowsize != 0);
+ /* Main exponentiation loop. */
+ /* scratch: | n | n | ... | | 3n-4n | */
+ /* | pp[0] | pp[1] | ... | pp[2^windowsize-1] | loop scratch | */
+
+#define INNERLOOP \
+ while (ebi != 0) \
+ { \
+ expbits = getbits (ep, ebi, windowsize); \
+ this_windowsize = windowsize; \
+ if (ebi < windowsize) \
+ { \
+ this_windowsize -= windowsize - ebi; \
+ ebi = 0; \
+ } \
+ else \
+ ebi -= windowsize; \
+ \
+ do \
+ { \
+ mpn_local_sqr (tp, rp, n, tp + 2 * n); \
+ MPN_REDUCE (rp, tp, mp, n, mip); \
+ this_windowsize--; \
+ } \
+ while (this_windowsize != 0); \
+ \
+ mpn_tabselect (tp + 2*n, pp, n, 1 << windowsize, expbits); \
+ mpn_mul_basecase (tp, rp, n, tp + 2*n, n); \
+ \
+ MPN_REDUCE (rp, tp, mp, n, mip); \
+ }
-#if WANT_CACHE_SECURITY
- mpn_tabselect (tp + 2*n, pp, n, 1 << windowsize, expbits);
- mpn_mul_basecase (tp, rp, n, tp + 2*n, n);
+#if WANT_REDC_2
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1_SEC (rp, tp, mp, n, mip[0])
+ INNERLOOP;
+ }
+ else
+ {
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2_SEC (rp, tp, mp, n, mip)
+ INNERLOOP;
+ }
#else
- mpn_mul_basecase (tp, rp, n, pp + n * expbits, n);
+#undef MPN_MUL_N
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_MUL_N(r,a,b,n) mpn_mul_basecase (r,a,n,b,n)
+#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1_SEC (rp, tp, mp, n, mip[0])
+ INNERLOOP;
#endif
- mpn_redc_1_sec (rp, tp, mp, n, minv);
- }
MPN_COPY (tp, rp, n);
MPN_ZERO (tp + n, n);
- mpn_redc_1_sec (rp, tp, mp, n, minv);
+
+#if WANT_REDC_2
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+ MPN_REDC_1_SEC (rp, tp, mp, n, mip[0]);
+ else
+ MPN_REDC_2_SEC (rp, tp, mp, n, mip);
+#else
+ MPN_REDC_1_SEC (rp, tp, mp, n, mip[0]);
+#endif
cnd = mpn_sub_n (tp, rp, mp, n); /* we need just retval */
mpn_subcnd_n (rp, rp, mp, n, !cnd);
}
-#if ! HAVE_NATIVE_mpn_tabselect
-/* Select entry `which' from table `tab', which has nents entries, each `n'
- limbs. Store the selected entry at rp. Reads entire table to avoid
- side-channel information leaks. O(n*nents).
- FIXME: Move to its own file. */
-void
-mpn_tabselect (volatile mp_limb_t *rp, volatile mp_limb_t *tab, mp_size_t n,
- mp_size_t nents, mp_size_t which)
-{
- mp_size_t k, i;
- mp_limb_t mask;
- volatile mp_limb_t *tp;
-
- for (k = 0; k < nents; k++)
- {
- mask = -(mp_limb_t) (which == k);
- tp = tab + n * k;
- for (i = 0; i < n; i++)
- {
- rp[i] = (rp[i] & ~mask) | (tp[i] & mask);
- }
- }
-}
-#endif
-
mp_size_t
mpn_powm_sec_itch (mp_size_t bn, mp_size_t en, mp_size_t n)
{
int windowsize;
mp_size_t redcify_itch, itch;
+ /* The top scratch usage will either be when reducing B in the 2nd redcify
+ call, or more typically n*2^windowsize + 3n or 4n, in the main loop. (It
+ is 3n or 4n depending on if we use mpn_local_sqr or a native
+ mpn_sqr_basecase. We assume 4n always for now.) */
+
windowsize = win_size (en * GMP_NUMB_BITS); /* slight over-estimate of exp */
- itch = 4 * n + (n << windowsize);
- redcify_itch = 2 * bn + n + 1;
- /* The 6n is due to the placement of reduce scratch 6n into the start of the
- scratch area. */
- return MAX (itch, redcify_itch + 6 * n);
+
+ /* The 2n term is due to pp[0] and pp[1] at the time of the 2nd redcify call,
+ the 2bn + n + 1 term is due to redcify's own usage. */
+ redcify_itch = (2 * n) + (2 * bn + n + 1);
+
+ /* The n * 2^windowsize term is due to the power table, the 4n term is due to
+ scratch needs of squaring/multiplication in the exponentiation loop. */
+ itch = (n << windowsize) + (4 * n);
+
+ return MAX (itch, redcify_itch);
}
{
mp_size_t i;
mp_limb_t n0, r;
- mp_limb_t dummy;
ASSERT (un >= 1);
ASSERT (d & GMP_LIMB_HIGHBIT);
for (i = un - 2; i >= 0; i--)
{
n0 = up[i];
- udiv_qrnnd_preinv (dummy, r, r, n0, d, dinv);
+ udiv_rnnd_preinv (r, r, n0, d, dinv);
}
return r;
}
/* mpn_random2 -- Generate random numbers with relatively long strings
of ones and zeroes. Suitable for border testing.
-Copyright 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004 Free Software
+Copyright 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2012 Free Software
Foundation, Inc.
This file is part of the GNU MP Library.
#include "gmp.h"
#include "gmp-impl.h"
-static void gmp_rrandomb __GMP_PROTO ((mp_ptr, gmp_randstate_t, mp_bitcnt_t));
+static void gmp_rrandomb (mp_ptr, gmp_randstate_t, mp_bitcnt_t);
/* Ask _gmp_rand for 32 bits per call unless that's more than a limb can hold.
Thus, we get the same random number sequence in the common cases.
-/* mpn_redc_1. Set cp[] <- up[]/R^n mod mp[]. Clobber up[].
+/* mpn_redc_1. Set rp[] <- up[]/R^n mod mp[]. Clobber up[].
mp[] is n limbs; up[] is 2n limbs.
THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
-Copyright (C) 2000, 2001, 2002, 2004, 2008, 2009 Free Software Foundation, Inc.
+Copyright (C) 2000, 2001, 2002, 2004, 2008, 2009, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
#include "gmp.h"
#include "gmp-impl.h"
-void
+mp_limb_t
mpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
{
mp_size_t j;
up[0] = cy;
up++;
}
+
cy = mpn_add_n (rp, up, up - n, n);
- if (cy != 0)
- mpn_sub_n (rp, rp, mp, n);
+ return cy;
}
+++ /dev/null
-/* mpn_redc_1_sec. Set cp[] <- up[]/R^n mod mp[]. Clobber up[].
- mp[] is n limbs; up[] is 2n limbs.
-
- THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
- SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
-
-Copyright (C) 2000, 2001, 2002, 2004, 2008, 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-mpn_redc_1_sec (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
-{
- mp_size_t j;
- mp_limb_t cy;
-
- ASSERT (n > 0);
- ASSERT_MPN (up, 2*n);
-
- for (j = n - 1; j >= 0; j--)
- {
- cy = mpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
- ASSERT (up[0] == 0);
- up[0] = cy;
- up++;
- }
- cy = mpn_add_n (rp, up, up - n, n);
- mpn_subcnd_n (rp, rp, mp, n, cy);
-}
-/* mpn_redc_2. Set cp[] <- up[]/R^n mod mp[]. Clobber up[].
+/* mpn_redc_2. Set rp[] <- up[]/R^n mod mp[]. Clobber up[].
mp[] is n limbs; up[] is 2n limbs.
THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
-Copyright (C) 2000, 2001, 2002, 2004, 2008 Free Software Foundation, Inc.
+Copyright (C) 2000, 2001, 2002, 2004, 2008, 2012 Free Software Foundation,
+Inc.
This file is part of the GNU MP Library.
/* For testing purposes, define our own mpn_addmul_2 if there is none already
available. */
#ifndef HAVE_NATIVE_mpn_addmul_2
-mp_limb_t
+#undef mpn_addmul_2
+static mp_limb_t
mpn_addmul_2 (mp_ptr rp, mp_srcptr up, mp_size_t n, mp_srcptr vp)
{
rp[n] = mpn_addmul_1 (rp, up, n, vp[0]);
} while (0)
#endif
-void
+mp_limb_t
mpn_redc_2 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_srcptr mip)
{
mp_limb_t q[2];
up[n] = upn;
up += 2;
}
+
cy = mpn_add_n (rp, up, up - n, n);
- if (cy != 0)
- mpn_sub_n (rp, rp, mp, n);
+ return cy;
}
-/* mpn_redc_n. Set cp[] <- up[]/R^n mod mp[]. Clobber up[].
+/* mpn_redc_n. Set rp[] <- up[]/R^n mod mp[]. Clobber up[].
mp[] is n limbs; up[] is 2n limbs, the inverse ip[] is n limbs.
THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
-Copyright (C) 2009 Free Software Foundation, Inc.
+Copyright 2009, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
assumption.
* Decrease scratch usage.
+
+ * Consider removing the residue canonicalisation.
*/
void
TMP_DECL;
TMP_MARK;
+ ASSERT (n > 8);
+
rn = mpn_mulmod_bnm1_next_size (n);
scratch = TMP_ALLOC_LIMBS (n + rn + mpn_mulmod_bnm1_itch (rn, n, n));
SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-Copyright 2009 Free Software Foundation, Inc.
+Copyright 2009, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
* If we allow ourselves to clobber U, we could save the other of qp and qp2.
*/
+/* FIXME: We need to wrap mpn_bdiv_qr due to the itch interface. This need
+ indicates a flaw in the current itch mechanism: Which operands not greater
+ than un,un will incur the worst itch? We need a parallel foo_maxitch set
+ of functions. */
+static void
+mpn_bdiv_qr_wrap (mp_ptr qp, mp_ptr rp,
+ mp_srcptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn)
+{
+ mp_ptr scratch_out;
+ TMP_DECL;
+
+ TMP_MARK;
+ scratch_out = TMP_ALLOC_LIMBS (mpn_bdiv_qr_itch (nn, dn));
+ mpn_bdiv_qr (qp, rp, np, nn, dp, dn, scratch_out);
+
+ TMP_FREE;
+}
+
mp_bitcnt_t
mpn_remove (mp_ptr wp, mp_size_t *wn,
mp_ptr up, mp_size_t un, mp_ptr vp, mp_size_t vn,
mp_ptr pwpsp[LOG];
mp_size_t pwpsn[LOG];
mp_size_t npowers;
- mp_ptr tp, qp, np, pp, qp2, scratch_out;
+ mp_ptr tp, qp, np, pp, qp2;
mp_size_t pn, nn, qn, i;
mp_bitcnt_t pwr;
TMP_DECL;
TMP_MARK;
- tp = TMP_ALLOC_LIMBS ((un + vn) / 2); /* remainder */
- qp = TMP_ALLOC_LIMBS (un); /* quotient, alternating */
- qp2 = TMP_ALLOC_LIMBS (un); /* quotient, alternating */
+ tp = TMP_ALLOC_LIMBS ((un + 1 + vn) / 2); /* remainder */
+ qp = TMP_ALLOC_LIMBS (un + 1); /* quotient, alternating */
+ qp2 = TMP_ALLOC_LIMBS (un + 1); /* quotient, alternating */
np = TMP_ALLOC_LIMBS (un + LOG); /* powers of V */
pp = vp;
pn = vn;
- /* FIXME: This allocation need indicate a flaw in the current itch mechanism:
- Which operands not greater than un,un will incur the worst itch? We need
- a parallel foo_maxitch set of functions. */
- scratch_out = TMP_ALLOC_LIMBS (mpn_bdiv_qr_itch (un, un >> 1));
-
MPN_COPY (qp, up, un);
qn = un;
npowers = 0;
while (qn >= pn)
{
- mpn_bdiv_qr (qp2, tp, qp, qn, pp, pn, scratch_out);
+ qp[qn] = 0;
+ mpn_bdiv_qr_wrap (qp2, tp, qp, qn + 1, pp, pn);
if (!mpn_zero_p (tp, pn))
break; /* could not divide by V^npowers */
if (((mp_bitcnt_t) 2 << npowers) - 1 > cap)
break;
- nn = 2 * pn - 1; /* next power will be at least this many limbs */
+ nn = 2 * pn - 1; /* next power will be at least this large */
if (nn > qn)
break; /* next power would be overlarge */
if (pwr + ((mp_bitcnt_t) 1 << i) > cap)
continue; /* V^i would bring us past cap */
- mpn_bdiv_qr (qp2, tp, qp, qn, pp, pn, scratch_out);
+ qp[qn] = 0;
+ mpn_bdiv_qr_wrap (qp2, tp, qp, qn + 1, pp, pn);
if (!mpn_zero_p (tp, pn))
continue; /* could not divide by V^i */
ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT'S ALMOST
GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2002, 2005, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2002, 2005, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
mpn_rootrem (mp_ptr rootp, mp_ptr remp,
mp_srcptr up, mp_size_t un, mp_limb_t k)
{
+ mp_size_t m;
ASSERT (un > 0);
ASSERT (up[un - 1] != 0);
ASSERT (k > 1);
- if ((remp == NULL) && (un / k > 2))
- /* call mpn_rootrem recursively, padding {up,un} with k zero limbs,
- which will produce an approximate root with one more limb,
- so that in most cases we can conclude. */
+ m = (un - 1) / k; /* ceil(un/k) - 1 */
+ if (remp == NULL && m > 2)
+ /* Pad {up,un} with k zero limbs. This will produce an approximate root
+ with one more limb, allowing us to compute the exact integral result. */
{
mp_ptr sp, wp;
mp_size_t rn, sn, wn;
TMP_MARK;
wn = un + k;
wp = TMP_ALLOC_LIMBS (wn); /* will contain the padded input */
- sn = (un - 1) / k + 2; /* ceil(un/k) + 1 */
+ sn = m + 2; /* ceil(un/k) + 1 */
sp = TMP_ALLOC_LIMBS (sn); /* approximate root of padded input */
MPN_COPY (wp + k, up, un);
MPN_ZERO (wp, k);
rn = mpn_rootrem_internal (sp, NULL, wp, wn, k, 1);
- /* the approximate root S = {sp,sn} is either the correct root of
- {sp,sn}, or one too large. Thus unless the least significant limb
- of S is 0 or 1, we can deduce the root of {up,un} is S truncated by
- one limb. (In case sp[0]=1, we can deduce the root, but not decide
+ /* The approximate root S = {sp,sn} is either the correct root of
+ {sp,sn}, or 1 too large. Thus unless the least significant limb of
+ S is 0 or 1, we can deduce the root of {up,un} is S truncated by one
+ limb. (In case sp[0]=1, we can deduce the root, but not decide
whether it is exact or not.) */
MPN_COPY (rootp, sp + 1, sn - 1);
TMP_FREE;
return rn;
}
- else /* remp <> NULL */
+ else
{
return mpn_rootrem_internal (rootp, remp, up, un, k, 0);
}
mp_limb_t save, save2, cy;
unsigned long int unb; /* number of significant bits of {up,un} */
unsigned long int xnb; /* number of significant bits of the result */
- unsigned int cnt;
unsigned long b, kk;
unsigned long sizes[GMP_NUMB_BITS + 1];
int ni, i;
TMP_MARK;
- /* qp and wp need enough space to store S'^k where S' is an approximate
- root. Since S' can be as large as S+2, the worst case is when S=2 and
- S'=4. But then since we know the number of bits of S in advance, S'
- can only be 3 at most. Similarly for S=4, then S' can be 6 at most.
- So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k
- fits in un limbs, the number of extra limbs needed is bounded by
- ceil(k*log2(3/2)/GMP_NUMB_BITS). */
-#define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
- qp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain quotient and remainder
- of R/(k*S^(k-1)), and S^k */
if (remp == NULL)
{
rp = TMP_ALLOC_LIMBS (un + 1); /* will contain the remainder */
rp = remp;
}
sp = rootp;
- wp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
- and temporary for mpn_pow_1 */
- count_leading_zeros (cnt, up[un - 1]);
- unb = un * GMP_NUMB_BITS - cnt + GMP_NAIL_BITS;
+
+ MPN_SIZEINBASE_2EXP(unb, up, un, 1);
/* unb is the number of bits of the input U */
xnb = (unb - 1) / k + 1; /* ceil (unb / k) */
Newton iteration will first compute sizes[ni-1] extra bits,
then sizes[ni-2], ..., then sizes[0] = b. */
+ /* qp and wp need enough space to store S'^k where S' is an approximate
+ root. Since S' can be as large as S+2, the worst case is when S=2 and
+ S'=4. But then since we know the number of bits of S in advance, S'
+ can only be 3 at most. Similarly for S=4, then S' can be 6 at most.
+ So the worst case is S'/S=3/2, thus S'^k <= (3/2)^k * S^k. Since S^k
+ fits in un limbs, the number of extra limbs needed is bounded by
+ ceil(k*log2(3/2)/GMP_NUMB_BITS). */
+#define EXTRA 2 + (mp_size_t) (0.585 * (double) k / (double) GMP_NUMB_BITS)
+ qp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain quotient and remainder
+ of R/(k*S^(k-1)), and S^k */
+ wp = TMP_ALLOC_LIMBS (un + EXTRA); /* will contain S^(k-1), k*S^(k-1),
+ and temporary for mpn_pow_1 */
+
wp[0] = 1; /* {sp,sn}^(k-1) = 1 */
wn = 1;
for (i = ni; i != 0; i--)
}
else
{
- mp_ptr tp;
qn = rn - wn; /* expected quotient size */
- /* tp must have space for wn limbs.
- The quotient needs rn-wn+1 limbs, thus quotient+remainder
- need altogether rn+1 limbs. */
- tp = qp + qn + 1; /* put remainder in Q buffer */
mpn_div_q (qp, rp, rn, wp, wn, scratch);
qn += qp[qn] != 0;
}
ASSERT_ALWAYS (rn >= qn);
/* R = R - Q = floor(U/2^kk) - S^k */
- if ((i > 1) || (approx == 0))
+ if (i > 1 || approx == 0)
{
mpn_sub (rp, rp, rn, qp, qn);
MPN_NORMALIZE (rp, rn);
--- /dev/null
+/* mpn_sb_div_qr_sec, mpn_sb_div_r_sec -- Compute Q = floor(U / V), U = U mod
+ V. Side-channel silent under the assumption that the used instructions are
+ side-channel silent.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your option)
+any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License along
+with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#if OPERATION_sb_div_qr_sec
+/* Needs (nn + dn + 1) + mpn_sbpi1_div_qr_sec's needs of (2nn' - dn + 1) for a
+ total of 3nn + 4 limbs at tp. Note that mpn_sbpi1_div_qr_sec's nn is one
+ greater than ours, therefore +4 and not just +2. */
+#define FNAME mpn_sb_div_qr_sec
+#define Q(q) q,
+#endif
+#if OPERATION_sb_div_r_sec
+/* Needs (nn + dn + 1) + mpn_sbpi1_div_r_sec's needs of (dn + 1) for a total of
+ nn + 2dn + 2 limbs at tp. */
+#define FNAME mpn_sb_div_r_sec
+#define Q(q)
+#endif
+
+void
+FNAME (Q(mp_ptr qp)
+ mp_ptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn,
+ mp_ptr tp)
+{
+ mp_limb_t d1, d0, qh;
+ unsigned int cnt;
+ mp_ptr np2, dp2;
+ gmp_pi1_t dinv;
+ mp_limb_t inv32;
+ mp_limb_t cy;
+
+ ASSERT (dn >= 1);
+ ASSERT (nn >= dn);
+ ASSERT (dp[dn - 1] != 0);
+
+ d1 = dp[dn - 1];
+ count_leading_zeros (cnt, d1);
+
+ if (cnt != 0)
+ {
+ dp2 = tp; /* dn limbs */
+ mpn_lshift (dp2, dp, dn, cnt);
+
+ np2 = tp + dn; /* (nn + 1) limbs */
+ cy = mpn_lshift (np2, np, nn, cnt);
+ np2[nn++] = cy;
+ }
+ else
+ {
+ /* FIXME: Consider copying np->np2 here, adding a 0-limb at the top.
+ That would simplify the underlying sbpi1 function, since then it
+ could assume nn > dn. */
+ dp2 = (mp_ptr) dp;
+ np2 = np;
+ }
+
+ d0 = dp2[dn - 1];
+ d0 += (~d0 != 0);
+ invert_limb (inv32, d0);
+
+ /* We add nn + dn to tp here, not nn + 1 + dn, as expected. This is since nn
+ here will have been incremented. */
+#if OPERATION_sb_div_qr_sec
+ qh = mpn_sbpi1_div_qr_sec (qp, np2, nn, dp2, dn, inv32, tp + nn + dn);
+#else
+ mpn_sbpi1_div_r_sec (np2, nn, dp2, dn, inv32, tp + nn + dn);
+#endif
+
+ if (cnt == 0)
+ ; /* we have np = np2 here. */
+ else
+ mpn_rshift (np, np2, dn, cnt);
+
+#if OPERATION_sb_div_qr_sec
+ if (cnt == 0)
+ qp[nn - dn] = qh;
+#endif
+}
IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS
ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-Copyright 2005, 2006, 2009 Free Software Foundation, Inc.
+Copyright 2005, 2006, 2009, 2011, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
ASSERT (dn > 0);
ASSERT (nn >= dn);
ASSERT ((dp[0] & 1) != 0);
+ /* FIXME: Add ASSERTs for allowable overlapping; i.e., that qp = np is OK,
+ but some over N/Q overlaps will not work. */
for (i = nn - dn; i > 0; i--)
{
q = dinv * np[0];
- qp[0] = ~q;
- qp++;
cy = mpn_addmul_1 (np, dp, dn, q);
mpn_add_1 (np + dn, np + dn, i, cy);
ASSERT (np[0] == 0);
+ qp[0] = ~q;
+ qp++;
np++;
}
for (i = dn; i > 1; i--)
{
q = dinv * np[0];
- qp[0] = ~q;
- qp++;
mpn_addmul_1 (np, dp, i, q);
ASSERT (np[0] == 0);
+ qp[0] = ~q;
+ qp++;
np++;
}
IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS
ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GMP RELEASE.
-Copyright 2006, 2009 Free Software Foundation, Inc.
+Copyright 2006, 2009, 2011, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
ASSERT (dn > 0);
ASSERT (nn > dn);
ASSERT ((dp[0] & 1) != 0);
+ /* FIXME: Add ASSERTs for allowable overlapping; i.e., that qp = np is OK,
+ but some over N/Q overlaps will not work. */
qn = nn - dn;
mp_limb_t q;
q = dinv * np[i];
- qp[i] = ~q;
-
np[i] = mpn_addmul_1 (np + i, dp, dn, q);
+ qp[i] = ~q;
}
rh += mpn_add (np + dn, np + dn, qn, np, dn);
ql = mpn_add_1 (qp, qp, dn, ql);
mp_limb_t q;
q = dinv * np[i];
- qp[i] = ~q;
-
np[i] = mpn_addmul_1 (np + i, dp, dn, q);
+ qp[i] = ~q;
}
rh += mpn_add_n (np + dn, np + dn, np, qn);
--- /dev/null
+/* mpn_sbpi1_div_qr_sec, mpn_sbpi1_div_r_sec -- Compute Q = floor(U / V), U = U
+ mod V. Side-channel silent under the assumption that the used instructions
+ are side-channel silent.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011, 2012, 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the
+Free Software Foundation; either version 3 of the License, or (at your option)
+any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+for more details.
+
+You should have received a copy of the GNU Lesser General Public License along
+with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* This side-channel silent division algorithm reduces the partial remainder by
+ GMP_NUMB_BITS/2 bits at a time, compared to GMP_NUMB_BITS for the main
+ division algorithm. We do not insists on reducing by exactly
+ GMP_NUMB_BITS/2, but may leave a partial remainder that is D*B^i to 3D*B^i
+ too large (B is the limb base, D is the divisor, and i is the induction
+ variable); the subsequent step will handle the extra partial remainder bits.
+
+ With that partial remainder reduction, each step generates a quotient "half
+ limb". The outer loop generates two quotient half limbs, an upper (q1h) and
+ a lower (q0h) which are stored sparsely in separate limb arrays. These
+ arrays are added at the end; using separate arrays avoids data-dependent
+ carry propagation which could else pose a side-channel leakage problem.
+
+ The quotient half limbs may be between -3 to 0 from the accurate value
+ ("accurate" being the one which corresponds to a reduction to a principal
+ partial remainder). Too small quotient half limbs correspond to too large
+ remainders, which we reduce later, as described above.
+
+ In order to keep quotients from getting too big, corresponding to a negative
+ partial remainder, we use an inverse which is slightly smaller than usually.
+*/
+
+#if OPERATION_sbpi1_div_qr_sec
+/* Needs (dn + 1) + (nn - dn) + (nn - dn) = 2nn - dn + 1 limbs at tp. */
+#define FNAME mpn_sbpi1_div_qr_sec
+#define Q(q) q,
+#define RETTYPE mp_limb_t
+#endif
+#if OPERATION_sbpi1_div_r_sec
+/* Needs (dn + 1) limbs at tp. */
+#define FNAME mpn_sbpi1_div_r_sec
+#define Q(q)
+#define RETTYPE void
+#endif
+
+RETTYPE
+FNAME (Q(mp_ptr qp)
+ mp_ptr np, mp_size_t nn,
+ mp_srcptr dp, mp_size_t dn,
+ mp_limb_t dinv,
+ mp_ptr tp)
+{
+ mp_limb_t nh, cy, q1h, q0h, dummy, cnd;
+ mp_size_t i;
+ mp_ptr hp;
+#if OPERATION_sbpi1_div_qr_sec
+ mp_limb_t qh;
+ mp_ptr qlp, qhp;
+#endif
+
+ ASSERT (dn >= 1);
+ ASSERT (nn >= dn);
+ ASSERT ((dp[dn - 1] & GMP_NUMB_HIGHBIT) != 0);
+
+ if (nn == dn)
+ {
+ cy = mpn_sub_n (np, np, dp, dn);
+ mpn_addcnd_n (np, np, dp, dn, cy);
+#if OPERATION_sbpi1_div_qr_sec
+ return 1 - cy;
+#else
+ return;
+#endif
+ }
+
+ /* Create a divisor copy shifted half a limb. */
+ hp = tp; /* (dn + 1) limbs */
+ hp[dn] = mpn_lshift (hp, dp, dn, GMP_NUMB_BITS / 2);
+
+#if OPERATION_sbpi1_div_qr_sec
+ qlp = tp + (dn + 1); /* (nn - dn) limbs */
+ qhp = tp + (nn + 1); /* (nn - dn) limbs */
+#endif
+
+ np += nn - dn;
+ nh = 0;
+
+ for (i = nn - dn - 1; i >= 0; i--)
+ {
+ np--;
+
+ nh = (nh << GMP_NUMB_BITS/2) + (np[dn] >> GMP_NUMB_BITS/2);
+ umul_ppmm (q1h, dummy, nh, dinv);
+ q1h += nh;
+#if OPERATION_sbpi1_div_qr_sec
+ qhp[i] = q1h;
+#endif
+ mpn_submul_1 (np, hp, dn + 1, q1h);
+
+ nh = np[dn];
+ umul_ppmm (q0h, dummy, nh, dinv);
+ q0h += nh;
+#if OPERATION_sbpi1_div_qr_sec
+ qlp[i] = q0h;
+#endif
+ nh -= mpn_submul_1 (np, dp, dn, q0h);
+ }
+
+ /* 1st adjustment depends on extra high remainder limb. */
+ cnd = nh != 0; /* FIXME: cmp-to-int */
+#if OPERATION_sbpi1_div_qr_sec
+ qlp[0] += cnd;
+#endif
+ nh -= mpn_subcnd_n (np, np, dp, dn, cnd);
+
+ /* 2nd adjustment depends on remainder/divisor comparison as well as whether
+ extra remainder limb was nullified by previous subtract. */
+ cy = mpn_sub_n (np, np, dp, dn);
+ cy = cy - nh;
+#if OPERATION_sbpi1_div_qr_sec
+ qlp[0] += 1 - cy;
+#endif
+ mpn_addcnd_n (np, np, dp, dn, cy);
+
+ /* 3rd adjustment depends on remainder/divisor comparison. */
+ cy = mpn_sub_n (np, np, dp, dn);
+#if OPERATION_sbpi1_div_qr_sec
+ qlp[0] += 1 - cy;
+#endif
+ mpn_addcnd_n (np, np, dp, dn, cy);
+
+#if OPERATION_sbpi1_div_qr_sec
+ /* Combine quotient halves into final quotient. */
+ qh = mpn_lshift (qhp, qhp, nn - dn, GMP_NUMB_BITS/2);
+ qh += mpn_add_n (qp, qhp, qlp, nn - dn);
+
+ return qh;
+#else
+ return;
+#endif
+}
GNU MP RELEASE.
Copyright 1991, 1992, 1993, 1994, 1996, 2000, 2001, 2002, 2004, 2006, 2007,
-2008 Free Software Foundation, Inc.
+2008, 2012, 2013 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
long i, pi;
mp_size_t n;
mp_ptr p, t;
- unsigned normalization_steps;
- mp_limb_t big_base, big_base_inverted;
+ mp_limb_t big_base;
int chars_per_limb;
size_t digits_in_base;
mp_size_t shift;
chars_per_limb = mp_bases[base].chars_per_limb;
big_base = mp_bases[base].big_base;
- big_base_inverted = mp_bases[base].big_base_inverted;
- count_leading_zeros (normalization_steps, big_base);
p = powtab_mem_ptr;
powtab_mem_ptr += 1;
if (hn == 0)
{
- MPN_ZERO (rp, powtab->n + sn);
+ /* Zero +1 limb here, to avoid reading an allocated but uninitialised
+ limb in mpn_incr_u below. */
+ MPN_ZERO (rp, powtab->n + sn + 1);
}
else
{
CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
FUTURE GNU MP RELEASES.
-Copyright 1991, 1993, 1994, 1995, 2001, 2002 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1995, 2001, 2002, 2011, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
size_t
mpn_sizeinbase (mp_srcptr xp, mp_size_t xsize, int base)
{
- int lb_base, cnt;
- mp_size_t totbits;
-
- ASSERT (xsize >= 0);
- ASSERT (base >= 2);
- ASSERT (base < numberof (mp_bases));
-
- /* Special case for X == 0. */
- if (xsize == 0)
- return 1;
-
- /* Calculate the total number of significant bits of X. */
- count_leading_zeros (cnt, xp[xsize-1]);
- totbits = xsize * GMP_LIMB_BITS - cnt;
-
- if (POW2_P (base))
- {
- /* Special case for powers of 2, giving exact result. */
- lb_base = mp_bases[base].big_base;
- return (totbits + lb_base - 1) / lb_base;
- }
- else
- return (size_t) (totbits * mp_bases[base].chars_per_bit_exactly) + 1;
+ size_t result;
+ MPN_SIZEINBASE (result, xp, xsize, base);
+ return result;
}
Copyright 1991, 1992, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004,
-2005, 2008 Free Software Foundation, Inc.
+2005, 2008, 2010, 2011 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
} while (0)
#endif
+#if HAVE_NATIVE_mpn_sqr_diag_addlsh1
+#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
+ mpn_sqr_diag_addlsh1 (rp, tp, up, n)
+#else
+#if HAVE_NATIVE_mpn_addlsh1_n
+#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
+ do { \
+ mp_limb_t cy; \
+ MPN_SQR_DIAGONAL (rp, up, n); \
+ cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2); \
+ rp[2 * n - 1] += cy; \
+ } while (0)
+#else
+#define MPN_SQR_DIAG_ADDLSH1(rp, tp, up, n) \
+ do { \
+ mp_limb_t cy; \
+ MPN_SQR_DIAGONAL (rp, up, n); \
+ cy = mpn_lshift (tp, tp, 2 * n - 2, 1); \
+ cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2); \
+ rp[2 * n - 1] += cy; \
+ } while (0)
+#endif
+#endif
+
#undef READY_WITH_mpn_sqr_basecase
{
if (n == 2)
{
+#if HAVE_NATIVE_mpn_mul_2
+ rp[3] = mpn_mul_2 (rp, up, 2, up);
+#else
rp[0] = 0;
rp[1] = 0;
rp[3] = mpn_addmul_2 (rp, up, 2, up);
+#endif
return;
}
tp[2 * n - 3] = cy;
}
- MPN_SQR_DIAGONAL (rp, up, n);
-
-#if HAVE_NATIVE_mpn_addlsh1_n
- cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
-#else
- cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
- cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
-#endif
- rp[2 * n - 1] += cy;
+ MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
}
#define READY_WITH_mpn_sqr_basecase
#endif
if (n == 2)
{
+#if HAVE_NATIVE_mpn_mul_2
+ rp[3] = mpn_mul_2 (rp, up, 2, up);
+#else
rp[0] = 0;
rp[1] = 0;
rp[3] = mpn_addmul_2 (rp, up, 2, up);
+#endif
return;
}
cy = mpn_addmul_1 (tp + 2 * i - 2, up + i, n - i, up[i - 1]);
tp[n + i - 2] = cy;
}
- MPN_SQR_DIAGONAL (rp + 2, up + 1, n - 1);
- {
- mp_limb_t cy;
-#if HAVE_NATIVE_mpn_addlsh1_n
- cy = mpn_addlsh1_n (rp + 1, rp + 1, tp, 2 * n - 2);
-#else
- cy = mpn_lshift (tp, tp, 2 * n - 2, 1);
- cy += mpn_add_n (rp + 1, rp + 1, tp, 2 * n - 2);
-#endif
- rp[2 * n - 1] += cy;
- }
+ MPN_SQR_DIAG_ADDLSH1 (rp, tp, up, n);
}
}
#endif
SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2009, 2010 Free Software Foundation, Inc.
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
INTERFACES. IN FACT, IT IS ALMOST GUARANTEED THAT THEY WILL CHANGE OR
DISAPPEAR IN A FUTURE GMP RELEASE.
-Copyright 1999, 2000, 2001, 2002, 2004, 2005, 2008, 2010 Free Software
+Copyright 1999, 2000, 2001, 2002, 2004, 2005, 2008, 2010, 2012 Free Software
Foundation, Inc.
This file is part of the GNU MP Library.
#include "gmp-impl.h"
#include "longlong.h"
-static const unsigned short invsqrttab[384] =
+static const unsigned char invsqrttab[384] = /* The common 0x100 was removed */
{
- 0x1ff,0x1fd,0x1fb,0x1f9,0x1f7,0x1f5,0x1f3,0x1f2, /* sqrt(1/80)..sqrt(1/87) */
- 0x1f0,0x1ee,0x1ec,0x1ea,0x1e9,0x1e7,0x1e5,0x1e4, /* sqrt(1/88)..sqrt(1/8f) */
- 0x1e2,0x1e0,0x1df,0x1dd,0x1db,0x1da,0x1d8,0x1d7, /* sqrt(1/90)..sqrt(1/97) */
- 0x1d5,0x1d4,0x1d2,0x1d1,0x1cf,0x1ce,0x1cc,0x1cb, /* sqrt(1/98)..sqrt(1/9f) */
- 0x1c9,0x1c8,0x1c6,0x1c5,0x1c4,0x1c2,0x1c1,0x1c0, /* sqrt(1/a0)..sqrt(1/a7) */
- 0x1be,0x1bd,0x1bc,0x1ba,0x1b9,0x1b8,0x1b7,0x1b5, /* sqrt(1/a8)..sqrt(1/af) */
- 0x1b4,0x1b3,0x1b2,0x1b0,0x1af,0x1ae,0x1ad,0x1ac, /* sqrt(1/b0)..sqrt(1/b7) */
- 0x1aa,0x1a9,0x1a8,0x1a7,0x1a6,0x1a5,0x1a4,0x1a3, /* sqrt(1/b8)..sqrt(1/bf) */
- 0x1a2,0x1a0,0x19f,0x19e,0x19d,0x19c,0x19b,0x19a, /* sqrt(1/c0)..sqrt(1/c7) */
- 0x199,0x198,0x197,0x196,0x195,0x194,0x193,0x192, /* sqrt(1/c8)..sqrt(1/cf) */
- 0x191,0x190,0x18f,0x18e,0x18d,0x18c,0x18c,0x18b, /* sqrt(1/d0)..sqrt(1/d7) */
- 0x18a,0x189,0x188,0x187,0x186,0x185,0x184,0x183, /* sqrt(1/d8)..sqrt(1/df) */
- 0x183,0x182,0x181,0x180,0x17f,0x17e,0x17e,0x17d, /* sqrt(1/e0)..sqrt(1/e7) */
- 0x17c,0x17b,0x17a,0x179,0x179,0x178,0x177,0x176, /* sqrt(1/e8)..sqrt(1/ef) */
- 0x176,0x175,0x174,0x173,0x172,0x172,0x171,0x170, /* sqrt(1/f0)..sqrt(1/f7) */
- 0x16f,0x16f,0x16e,0x16d,0x16d,0x16c,0x16b,0x16a, /* sqrt(1/f8)..sqrt(1/ff) */
- 0x16a,0x169,0x168,0x168,0x167,0x166,0x166,0x165, /* sqrt(1/100)..sqrt(1/107) */
- 0x164,0x164,0x163,0x162,0x162,0x161,0x160,0x160, /* sqrt(1/108)..sqrt(1/10f) */
- 0x15f,0x15e,0x15e,0x15d,0x15c,0x15c,0x15b,0x15a, /* sqrt(1/110)..sqrt(1/117) */
- 0x15a,0x159,0x159,0x158,0x157,0x157,0x156,0x156, /* sqrt(1/118)..sqrt(1/11f) */
- 0x155,0x154,0x154,0x153,0x153,0x152,0x152,0x151, /* sqrt(1/120)..sqrt(1/127) */
- 0x150,0x150,0x14f,0x14f,0x14e,0x14e,0x14d,0x14d, /* sqrt(1/128)..sqrt(1/12f) */
- 0x14c,0x14b,0x14b,0x14a,0x14a,0x149,0x149,0x148, /* sqrt(1/130)..sqrt(1/137) */
- 0x148,0x147,0x147,0x146,0x146,0x145,0x145,0x144, /* sqrt(1/138)..sqrt(1/13f) */
- 0x144,0x143,0x143,0x142,0x142,0x141,0x141,0x140, /* sqrt(1/140)..sqrt(1/147) */
- 0x140,0x13f,0x13f,0x13e,0x13e,0x13d,0x13d,0x13c, /* sqrt(1/148)..sqrt(1/14f) */
- 0x13c,0x13b,0x13b,0x13a,0x13a,0x139,0x139,0x139, /* sqrt(1/150)..sqrt(1/157) */
- 0x138,0x138,0x137,0x137,0x136,0x136,0x135,0x135, /* sqrt(1/158)..sqrt(1/15f) */
- 0x135,0x134,0x134,0x133,0x133,0x132,0x132,0x132, /* sqrt(1/160)..sqrt(1/167) */
- 0x131,0x131,0x130,0x130,0x12f,0x12f,0x12f,0x12e, /* sqrt(1/168)..sqrt(1/16f) */
- 0x12e,0x12d,0x12d,0x12d,0x12c,0x12c,0x12b,0x12b, /* sqrt(1/170)..sqrt(1/177) */
- 0x12b,0x12a,0x12a,0x129,0x129,0x129,0x128,0x128, /* sqrt(1/178)..sqrt(1/17f) */
- 0x127,0x127,0x127,0x126,0x126,0x126,0x125,0x125, /* sqrt(1/180)..sqrt(1/187) */
- 0x124,0x124,0x124,0x123,0x123,0x123,0x122,0x122, /* sqrt(1/188)..sqrt(1/18f) */
- 0x121,0x121,0x121,0x120,0x120,0x120,0x11f,0x11f, /* sqrt(1/190)..sqrt(1/197) */
- 0x11f,0x11e,0x11e,0x11e,0x11d,0x11d,0x11d,0x11c, /* sqrt(1/198)..sqrt(1/19f) */
- 0x11c,0x11b,0x11b,0x11b,0x11a,0x11a,0x11a,0x119, /* sqrt(1/1a0)..sqrt(1/1a7) */
- 0x119,0x119,0x118,0x118,0x118,0x118,0x117,0x117, /* sqrt(1/1a8)..sqrt(1/1af) */
- 0x117,0x116,0x116,0x116,0x115,0x115,0x115,0x114, /* sqrt(1/1b0)..sqrt(1/1b7) */
- 0x114,0x114,0x113,0x113,0x113,0x112,0x112,0x112, /* sqrt(1/1b8)..sqrt(1/1bf) */
- 0x112,0x111,0x111,0x111,0x110,0x110,0x110,0x10f, /* sqrt(1/1c0)..sqrt(1/1c7) */
- 0x10f,0x10f,0x10f,0x10e,0x10e,0x10e,0x10d,0x10d, /* sqrt(1/1c8)..sqrt(1/1cf) */
- 0x10d,0x10c,0x10c,0x10c,0x10c,0x10b,0x10b,0x10b, /* sqrt(1/1d0)..sqrt(1/1d7) */
- 0x10a,0x10a,0x10a,0x10a,0x109,0x109,0x109,0x109, /* sqrt(1/1d8)..sqrt(1/1df) */
- 0x108,0x108,0x108,0x107,0x107,0x107,0x107,0x106, /* sqrt(1/1e0)..sqrt(1/1e7) */
- 0x106,0x106,0x106,0x105,0x105,0x105,0x104,0x104, /* sqrt(1/1e8)..sqrt(1/1ef) */
- 0x104,0x104,0x103,0x103,0x103,0x103,0x102,0x102, /* sqrt(1/1f0)..sqrt(1/1f7) */
- 0x102,0x102,0x101,0x101,0x101,0x101,0x100,0x100 /* sqrt(1/1f8)..sqrt(1/1ff) */
+ 0xff,0xfd,0xfb,0xf9,0xf7,0xf5,0xf3,0xf2, /* sqrt(1/80)..sqrt(1/87) */
+ 0xf0,0xee,0xec,0xea,0xe9,0xe7,0xe5,0xe4, /* sqrt(1/88)..sqrt(1/8f) */
+ 0xe2,0xe0,0xdf,0xdd,0xdb,0xda,0xd8,0xd7, /* sqrt(1/90)..sqrt(1/97) */
+ 0xd5,0xd4,0xd2,0xd1,0xcf,0xce,0xcc,0xcb, /* sqrt(1/98)..sqrt(1/9f) */
+ 0xc9,0xc8,0xc6,0xc5,0xc4,0xc2,0xc1,0xc0, /* sqrt(1/a0)..sqrt(1/a7) */
+ 0xbe,0xbd,0xbc,0xba,0xb9,0xb8,0xb7,0xb5, /* sqrt(1/a8)..sqrt(1/af) */
+ 0xb4,0xb3,0xb2,0xb0,0xaf,0xae,0xad,0xac, /* sqrt(1/b0)..sqrt(1/b7) */
+ 0xaa,0xa9,0xa8,0xa7,0xa6,0xa5,0xa4,0xa3, /* sqrt(1/b8)..sqrt(1/bf) */
+ 0xa2,0xa0,0x9f,0x9e,0x9d,0x9c,0x9b,0x9a, /* sqrt(1/c0)..sqrt(1/c7) */
+ 0x99,0x98,0x97,0x96,0x95,0x94,0x93,0x92, /* sqrt(1/c8)..sqrt(1/cf) */
+ 0x91,0x90,0x8f,0x8e,0x8d,0x8c,0x8c,0x8b, /* sqrt(1/d0)..sqrt(1/d7) */
+ 0x8a,0x89,0x88,0x87,0x86,0x85,0x84,0x83, /* sqrt(1/d8)..sqrt(1/df) */
+ 0x83,0x82,0x81,0x80,0x7f,0x7e,0x7e,0x7d, /* sqrt(1/e0)..sqrt(1/e7) */
+ 0x7c,0x7b,0x7a,0x79,0x79,0x78,0x77,0x76, /* sqrt(1/e8)..sqrt(1/ef) */
+ 0x76,0x75,0x74,0x73,0x72,0x72,0x71,0x70, /* sqrt(1/f0)..sqrt(1/f7) */
+ 0x6f,0x6f,0x6e,0x6d,0x6d,0x6c,0x6b,0x6a, /* sqrt(1/f8)..sqrt(1/ff) */
+ 0x6a,0x69,0x68,0x68,0x67,0x66,0x66,0x65, /* sqrt(1/100)..sqrt(1/107) */
+ 0x64,0x64,0x63,0x62,0x62,0x61,0x60,0x60, /* sqrt(1/108)..sqrt(1/10f) */
+ 0x5f,0x5e,0x5e,0x5d,0x5c,0x5c,0x5b,0x5a, /* sqrt(1/110)..sqrt(1/117) */
+ 0x5a,0x59,0x59,0x58,0x57,0x57,0x56,0x56, /* sqrt(1/118)..sqrt(1/11f) */
+ 0x55,0x54,0x54,0x53,0x53,0x52,0x52,0x51, /* sqrt(1/120)..sqrt(1/127) */
+ 0x50,0x50,0x4f,0x4f,0x4e,0x4e,0x4d,0x4d, /* sqrt(1/128)..sqrt(1/12f) */
+ 0x4c,0x4b,0x4b,0x4a,0x4a,0x49,0x49,0x48, /* sqrt(1/130)..sqrt(1/137) */
+ 0x48,0x47,0x47,0x46,0x46,0x45,0x45,0x44, /* sqrt(1/138)..sqrt(1/13f) */
+ 0x44,0x43,0x43,0x42,0x42,0x41,0x41,0x40, /* sqrt(1/140)..sqrt(1/147) */
+ 0x40,0x3f,0x3f,0x3e,0x3e,0x3d,0x3d,0x3c, /* sqrt(1/148)..sqrt(1/14f) */
+ 0x3c,0x3b,0x3b,0x3a,0x3a,0x39,0x39,0x39, /* sqrt(1/150)..sqrt(1/157) */
+ 0x38,0x38,0x37,0x37,0x36,0x36,0x35,0x35, /* sqrt(1/158)..sqrt(1/15f) */
+ 0x35,0x34,0x34,0x33,0x33,0x32,0x32,0x32, /* sqrt(1/160)..sqrt(1/167) */
+ 0x31,0x31,0x30,0x30,0x2f,0x2f,0x2f,0x2e, /* sqrt(1/168)..sqrt(1/16f) */
+ 0x2e,0x2d,0x2d,0x2d,0x2c,0x2c,0x2b,0x2b, /* sqrt(1/170)..sqrt(1/177) */
+ 0x2b,0x2a,0x2a,0x29,0x29,0x29,0x28,0x28, /* sqrt(1/178)..sqrt(1/17f) */
+ 0x27,0x27,0x27,0x26,0x26,0x26,0x25,0x25, /* sqrt(1/180)..sqrt(1/187) */
+ 0x24,0x24,0x24,0x23,0x23,0x23,0x22,0x22, /* sqrt(1/188)..sqrt(1/18f) */
+ 0x21,0x21,0x21,0x20,0x20,0x20,0x1f,0x1f, /* sqrt(1/190)..sqrt(1/197) */
+ 0x1f,0x1e,0x1e,0x1e,0x1d,0x1d,0x1d,0x1c, /* sqrt(1/198)..sqrt(1/19f) */
+ 0x1c,0x1b,0x1b,0x1b,0x1a,0x1a,0x1a,0x19, /* sqrt(1/1a0)..sqrt(1/1a7) */
+ 0x19,0x19,0x18,0x18,0x18,0x18,0x17,0x17, /* sqrt(1/1a8)..sqrt(1/1af) */
+ 0x17,0x16,0x16,0x16,0x15,0x15,0x15,0x14, /* sqrt(1/1b0)..sqrt(1/1b7) */
+ 0x14,0x14,0x13,0x13,0x13,0x12,0x12,0x12, /* sqrt(1/1b8)..sqrt(1/1bf) */
+ 0x12,0x11,0x11,0x11,0x10,0x10,0x10,0x0f, /* sqrt(1/1c0)..sqrt(1/1c7) */
+ 0x0f,0x0f,0x0f,0x0e,0x0e,0x0e,0x0d,0x0d, /* sqrt(1/1c8)..sqrt(1/1cf) */
+ 0x0d,0x0c,0x0c,0x0c,0x0c,0x0b,0x0b,0x0b, /* sqrt(1/1d0)..sqrt(1/1d7) */
+ 0x0a,0x0a,0x0a,0x0a,0x09,0x09,0x09,0x09, /* sqrt(1/1d8)..sqrt(1/1df) */
+ 0x08,0x08,0x08,0x07,0x07,0x07,0x07,0x06, /* sqrt(1/1e0)..sqrt(1/1e7) */
+ 0x06,0x06,0x06,0x05,0x05,0x05,0x04,0x04, /* sqrt(1/1e8)..sqrt(1/1ef) */
+ 0x04,0x04,0x03,0x03,0x03,0x03,0x02,0x02, /* sqrt(1/1f0)..sqrt(1/1f7) */
+ 0x02,0x02,0x01,0x01,0x01,0x01,0x00,0x00 /* sqrt(1/1f8)..sqrt(1/1ff) */
};
/* Compute s = floor(sqrt(a0)), and *rp = a0 - s^2. */
iteration convert from 1/sqrt(a) to sqrt(a). */
abits = a0 >> (GMP_LIMB_BITS - 1 - 8); /* extract bits for table lookup */
- x0 = invsqrttab[abits - 0x80]; /* initial 1/sqrt(a) */
+ x0 = 0x100 | invsqrttab[abits - 0x80]; /* initial 1/sqrt(a) */
/* x0 is now an 8 bits approximation of 1/sqrt(a0) */
t = (mp_limb_signed_t) (CNST_LIMB(0x2000000000000) - 0x30000 - a1 * x0 * x0) >> 16;
x0 = (x0 << 16) + ((mp_limb_signed_t) (x0 * t) >> (16+2));
- /* x0 is now an 16 bits approximation of 1/sqrt(a0) */
+ /* x0 is now a 16 bits approximation of 1/sqrt(a0) */
t2 = x0 * (a0 >> (32-8));
t = t2 >> 25;
if (c < 0)
{
+#if HAVE_NATIVE_mpn_addlsh1_n
+ c += mpn_addlsh1_n (np, np, sp, n) + 2 * q;
+#else
c += mpn_addmul_1 (np, sp, n, CNST_LIMB(2)) + 2 * q;
+#endif
c -= mpn_sub_1 (np, np, n, CNST_LIMB(1));
q -= mpn_sub_1 (sp, sp, n, CNST_LIMB(1));
}
--- /dev/null
+/* mpn_sub_err1_n -- sub_n with one error term
+
+ Contributed by David Harvey.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/*
+ Computes:
+
+ (1) {rp,n} := {up,n} - {vp,n} (just like mpn_sub_n) with incoming borrow cy,
+ return value is borrow out.
+
+ (2) Let c[i+1] = borrow from i-th limb subtraction (c[0] = cy).
+ Computes c[1]*yp[n-1] + ... + c[n]*yp[0], stores two-limb result at ep.
+
+ Requires n >= 1.
+
+ None of the outputs may overlap each other or any of the inputs, except
+ that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_sub_err1_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+ mp_ptr ep, mp_srcptr yp,
+ mp_size_t n, mp_limb_t cy)
+{
+ mp_limb_t el, eh, ul, vl, yl, zl, rl, sl, cy1, cy2;
+
+ ASSERT (n >= 1);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+ ASSERT (! MPN_OVERLAP_P (rp, n, yp, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 2, up, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 2, vp, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 2, yp, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 2, rp, n));
+
+ yp += n - 1;
+ el = eh = 0;
+
+ do
+ {
+ yl = *yp--;
+ ul = *up++;
+ vl = *vp++;
+
+ /* ordinary sub_n */
+ SUBC_LIMB (cy1, sl, ul, vl);
+ SUBC_LIMB (cy2, rl, sl, cy);
+ cy = cy1 | cy2;
+ *rp++ = rl;
+
+ /* update (eh:el) */
+ zl = (-cy) & yl;
+ el += zl;
+ eh += el < zl;
+ }
+ while (--n);
+
+#if GMP_NAIL_BITS != 0
+ eh = (eh << GMP_NAIL_BITS) + (el >> GMP_NUMB_BITS);
+ el &= GMP_NUMB_MASK;
+#endif
+
+ ep[0] = el;
+ ep[1] = eh;
+
+ return cy;
+}
--- /dev/null
+/* mpn_sub_err2_n -- sub_n with two error terms
+
+ Contributed by David Harvey.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/*
+ Computes:
+
+ (1) {rp,n} := {up,n} - {vp,n} (just like mpn_sub_n) with incoming borrow cy,
+ return value is borrow out.
+
+ (2) Let c[i+1] = borrow from i-th limb subtraction (c[0] = cy).
+ Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
+ c[1]*yp2[n-1] + ... + c[n]*yp2[0],
+ stores two-limb results at {ep,2} and {ep+2,2} respectively.
+
+ Requires n >= 1.
+
+ None of the outputs may overlap each other or any of the inputs, except
+ that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_sub_err2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+ mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2,
+ mp_size_t n, mp_limb_t cy)
+{
+ mp_limb_t el1, eh1, el2, eh2, ul, vl, yl1, yl2, zl1, zl2, rl, sl, cy1, cy2;
+
+ ASSERT (n >= 1);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+ ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
+ ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 4, up, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 4, vp, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 4, yp1, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 4, yp2, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 4, rp, n));
+
+ yp1 += n - 1;
+ yp2 += n - 1;
+ el1 = eh1 = 0;
+ el2 = eh2 = 0;
+
+ do
+ {
+ yl1 = *yp1--;
+ yl2 = *yp2--;
+ ul = *up++;
+ vl = *vp++;
+
+ /* ordinary sub_n */
+ SUBC_LIMB (cy1, sl, ul, vl);
+ SUBC_LIMB (cy2, rl, sl, cy);
+ cy = cy1 | cy2;
+ *rp++ = rl;
+
+ /* update (eh1:el1) */
+ zl1 = (-cy) & yl1;
+ el1 += zl1;
+ eh1 += el1 < zl1;
+
+ /* update (eh2:el2) */
+ zl2 = (-cy) & yl2;
+ el2 += zl2;
+ eh2 += el2 < zl2;
+ }
+ while (--n);
+
+#if GMP_NAIL_BITS != 0
+ eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
+ el1 &= GMP_NUMB_MASK;
+ eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
+ el2 &= GMP_NUMB_MASK;
+#endif
+
+ ep[0] = el1;
+ ep[1] = eh1;
+ ep[2] = el2;
+ ep[3] = eh2;
+
+ return cy;
+}
--- /dev/null
+/* mpn_sub_err3_n -- sub_n with three error terms
+
+ Contributed by David Harvey.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/*
+ Computes:
+
+ (1) {rp,n} := {up,n} - {vp,n} (just like mpn_sub_n) with incoming borrow cy,
+ return value is borrow out.
+
+ (2) Let c[i+1] = borrow from i-th limb subtraction (c[0] = cy).
+ Computes c[1]*yp1[n-1] + ... + c[n]*yp1[0],
+ c[1]*yp2[n-1] + ... + c[n]*yp2[0],
+ c[1]*yp3[n-1] + ... + c[n]*yp3[0],
+ stores two-limb results at {ep,2}, {ep+2,2} and {ep+4,2} respectively.
+
+ Requires n >= 1.
+
+ None of the outputs may overlap each other or any of the inputs, except
+ that {rp,n} may be equal to {up,n} or {vp,n}.
+*/
+mp_limb_t
+mpn_sub_err3_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+ mp_ptr ep, mp_srcptr yp1, mp_srcptr yp2, mp_srcptr yp3,
+ mp_size_t n, mp_limb_t cy)
+{
+ mp_limb_t el1, eh1, el2, eh2, el3, eh3, ul, vl, yl1, yl2, yl3, zl1, zl2, zl3, rl, sl, cy1, cy2;
+
+ ASSERT (n >= 1);
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
+ ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
+ ASSERT (! MPN_OVERLAP_P (rp, n, yp1, n));
+ ASSERT (! MPN_OVERLAP_P (rp, n, yp2, n));
+ ASSERT (! MPN_OVERLAP_P (rp, n, yp3, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 6, up, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 6, vp, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 6, yp1, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 6, yp2, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 6, yp3, n));
+ ASSERT (! MPN_OVERLAP_P (ep, 6, rp, n));
+
+ yp1 += n - 1;
+ yp2 += n - 1;
+ yp3 += n - 1;
+ el1 = eh1 = 0;
+ el2 = eh2 = 0;
+ el3 = eh3 = 0;
+
+ do
+ {
+ yl1 = *yp1--;
+ yl2 = *yp2--;
+ yl3 = *yp3--;
+ ul = *up++;
+ vl = *vp++;
+
+ /* ordinary sub_n */
+ SUBC_LIMB (cy1, sl, ul, vl);
+ SUBC_LIMB (cy2, rl, sl, cy);
+ cy = cy1 | cy2;
+ *rp++ = rl;
+
+ /* update (eh1:el1) */
+ zl1 = (-cy) & yl1;
+ el1 += zl1;
+ eh1 += el1 < zl1;
+
+ /* update (eh2:el2) */
+ zl2 = (-cy) & yl2;
+ el2 += zl2;
+ eh2 += el2 < zl2;
+
+ /* update (eh3:el3) */
+ zl3 = (-cy) & yl3;
+ el3 += zl3;
+ eh3 += el3 < zl3;
+ }
+ while (--n);
+
+#if GMP_NAIL_BITS != 0
+ eh1 = (eh1 << GMP_NAIL_BITS) + (el1 >> GMP_NUMB_BITS);
+ el1 &= GMP_NUMB_MASK;
+ eh2 = (eh2 << GMP_NAIL_BITS) + (el2 >> GMP_NUMB_BITS);
+ el2 &= GMP_NUMB_MASK;
+ eh3 = (eh3 << GMP_NAIL_BITS) + (el3 >> GMP_NUMB_BITS);
+ el3 &= GMP_NUMB_MASK;
+#endif
+
+ ep[0] = el1;
+ ep[1] = eh1;
+ ep[2] = el2;
+ ep[3] = eh2;
+ ep[4] = el3;
+ ep[5] = eh3;
+
+ return cy;
+}
/* mpn_subcnd_n -- Compute R = U - V if CND != 0 or R = U if CND == 0.
+ Both cases should take the same time and perform the exact same memory
+ accesses, since this function is intended to be used where side-channel
+ attack resilience is relevant.
THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
-Copyright 1992, 1993, 1994, 1996, 2000, 2002, 2008, 2009 Free Software
+Copyright 1992, 1993, 1994, 1996, 2000, 2002, 2008, 2009, 2011 Free Software
Foundation, Inc.
This file is part of the GNU MP Library.
#include "gmp.h"
#include "gmp-impl.h"
-
-#if GMP_NAIL_BITS == 0
-
mp_limb_t
mpn_subcnd_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t cnd)
{
{
ul = *up++;
vl = *vp++ & mask;
+#if GMP_NAIL_BITS == 0
sl = ul - vl;
cy1 = sl > ul;
rl = sl - cy;
cy2 = rl > sl;
cy = cy1 | cy2;
*rp++ = rl;
- }
- while (--n != 0);
-
- return cy;
-}
-
-#endif
-
-#if GMP_NAIL_BITS >= 1
-
-mp_limb_t
-mpn_subcnd_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t cnd)
-{
- mp_limb_t ul, vl, rl, cy, mask;
-
- ASSERT (n >= 1);
- ASSERT (MPN_SAME_OR_SEPARATE_P (rp, up, n));
- ASSERT (MPN_SAME_OR_SEPARATE_P (rp, vp, n));
-
- mask = -(mp_limb_t) (cnd != 0);
- cy = 0;
- do
- {
- ul = *up++;
- vl = *vp++ & mask;
- rl = ul - vl - cy;
+#else
+ rl = ul - vl;
+ rl -= cy;
cy = rl >> (GMP_LIMB_BITS - 1);
*rp++ = rl & GMP_NUMB_MASK;
+#endif
}
while (--n != 0);
return cy;
}
-
-#endif
--- /dev/null
+/* mpn_tabselect.
+
+ THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
+
+Copyright 2007, 2008, 2009, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Select entry `which' from table `tab', which has nents entries, each `n'
+ limbs. Store the selected entry at rp. Reads entire table to avoid
+ side-channel information leaks. O(n*nents).
+ FIXME: Move to its own file. */
+void
+mpn_tabselect (volatile mp_limb_t *rp, volatile mp_limb_t *tab, mp_size_t n,
+ mp_size_t nents, mp_size_t which)
+{
+ mp_size_t k, i;
+ mp_limb_t mask;
+ volatile mp_limb_t *tp;
+
+ for (k = 0; k < nents; k++)
+ {
+ mask = -(mp_limb_t) (which == k);
+ tp = tab + n * k;
+ for (i = 0; i < n; i++)
+ {
+ rp[i] = (rp[i] & ~mask) | (tp[i] & mask);
+ }
+ }
+}
SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008, 2009, 2010, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
vinf= a1 * b1 # A(inf)*B(inf)
*/
-#if TUNE_PROGRAM_BUILD
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
#define MAYBE_mul_toom22 1
#else
#define MAYBE_mul_toom22 \
mp_srcptr bp, mp_size_t bn,
mp_ptr scratch)
{
+ const int __gmpn_cpuvec_initialized = 1;
mp_size_t n, s, t;
int vm1_neg;
mp_limb_t cy, cy2;
SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008, 2009, 2010, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
vinf= a1 ^2 # A(inf)^2
*/
-#if TUNE_PROGRAM_BUILD
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
#define MAYBE_sqr_toom2 1
#else
#define MAYBE_sqr_toom2 \
mp_srcptr ap, mp_size_t an,
mp_ptr scratch)
{
+ const int __gmpn_cpuvec_initialized = 1;
mp_size_t n, s;
mp_limb_t cy, cy2;
mp_ptr asm1;
mp_size_t n, s, t;
int vm1_neg;
mp_limb_t cy;
- int hi;
+ mp_limb_signed_t hi;
mp_limb_t ap1_hi, bp1_hi;
#define a0 ap
SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2006, 2007, 2008, 2010 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008, 2010, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
vinf= a2 * b2 # A(inf)*B(inf)
*/
-#if TUNE_PROGRAM_BUILD
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
#define MAYBE_mul_basecase 1
#define MAYBE_mul_toom33 1
#else
mp_srcptr bp, mp_size_t bn,
mp_ptr scratch)
{
+ const int __gmpn_cpuvec_initialized = 1;
mp_size_t n, s, t;
int vm1_neg;
mp_limb_t cy, vinf0;
SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008, 2009, 2010, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
vinf= a2 ^2 # A(inf)^2
*/
-#if TUNE_PROGRAM_BUILD
+#if TUNE_PROGRAM_BUILD || WANT_FAT_BINARY
#define MAYBE_sqr_basecase 1
#define MAYBE_sqr_toom3 1
#else
mp_srcptr ap, mp_size_t an,
mp_ptr scratch)
{
+ const int __gmpn_cpuvec_initialized = 1;
mp_size_t n, s;
mp_limb_t cy, vinf0;
mp_ptr gp;
SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
mp_size_t n, s, t;
int vm1_neg;
mp_limb_t cy, vinf0;
- mp_ptr a0_a2, a1_a3;
+ mp_ptr a0_a2;
mp_ptr as1, asm1, as2;
mp_ptr bs1, bsm1, bs2;
TMP_DECL;
bs2 = TMP_SALLOC_LIMBS (n + 1);
a0_a2 = pp;
- a1_a3 = pp + n + 1;
/* Compute as1 and asm1. */
vm1_neg = mpn_toom_eval_dgr3_pm1 (as1, asm1, ap, n, s, a0_a2) & 1;
--- /dev/null
+/* mpn_toom42_mulmid -- toom42 middle product
+
+ Contributed by David Harvey.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+
+/*
+ Middle product of {ap,2n-1} and {bp,n}, output written to {rp,n+2}.
+
+ Neither ap nor bp may overlap rp.
+
+ Must have n >= 4.
+
+ Amount of scratch space required is given by mpn_toom42_mulmid_itch().
+
+ FIXME: this code assumes that n is small compared to GMP_NUMB_MAX. The exact
+ requirements should be clarified.
+*/
+void
+mpn_toom42_mulmid (mp_ptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n,
+ mp_ptr scratch)
+{
+ mp_limb_t cy, e[12], zh, zl;
+ mp_size_t m;
+ int neg;
+
+ ASSERT (n >= 4);
+ ASSERT (! MPN_OVERLAP_P (rp, n + 2, ap, 2*n - 1));
+ ASSERT (! MPN_OVERLAP_P (rp, n + 2, bp, n));
+
+ ap += n & 1; /* handle odd row and diagonal later */
+ m = n / 2;
+
+ /* (e0h:e0l) etc are correction terms, in 2's complement */
+#define e0l (e[0])
+#define e0h (e[1])
+#define e1l (e[2])
+#define e1h (e[3])
+#define e2l (e[4])
+#define e2h (e[5])
+#define e3l (e[6])
+#define e3h (e[7])
+#define e4l (e[8])
+#define e4h (e[9])
+#define e5l (e[10])
+#define e5h (e[11])
+
+#define s (scratch + 2)
+#define t (rp + m + 2)
+#define p0 rp
+#define p1 scratch
+#define p2 (rp + m)
+#define next_scratch (scratch + 3*m + 1)
+
+ /*
+ rp scratch
+ |---------|-----------| |---------|---------|----------|
+ 0 m 2m+2 0 m 2m 3m+1
+ <----p2----> <-------------s------------->
+ <----p0----><---t----> <----p1---->
+ */
+
+ /* compute {s,3m-1} = {a,3m-1} + {a+m,3m-1} and error terms e0, e1, e2, e3 */
+ cy = mpn_add_err1_n (s, ap, ap + m, &e0l, bp + m, m - 1, 0);
+ cy = mpn_add_err2_n (s + m - 1, ap + m - 1, ap + 2*m - 1, &e1l,
+ bp + m, bp, m, cy);
+ mpn_add_err1_n (s + 2*m - 1, ap + 2*m - 1, ap + 3*m - 1, &e3l, bp, m, cy);
+
+ /* compute t = (-1)^neg * ({b,m} - {b+m,m}) and error terms e4, e5 */
+ if (mpn_cmp (bp + m, bp, m) < 0)
+ {
+ ASSERT_NOCARRY (mpn_sub_err2_n (t, bp, bp + m, &e4l,
+ ap + m - 1, ap + 2*m - 1, m, 0));
+ neg = 1;
+ }
+ else
+ {
+ ASSERT_NOCARRY (mpn_sub_err2_n (t, bp + m, bp, &e4l,
+ ap + m - 1, ap + 2*m - 1, m, 0));
+ neg = 0;
+ }
+
+ /* recursive middle products. The picture is:
+
+ b[2m-1] A A A B B B - - - - -
+ ... - A A A B B B - - - -
+ b[m] - - A A A B B B - - -
+ b[m-1] - - - C C C D D D - -
+ ... - - - - C C C D D D -
+ b[0] - - - - - C C C D D D
+ a[0] ... a[m] ... a[2m] ... a[4m-2]
+ */
+
+ if (m < MULMID_TOOM42_THRESHOLD)
+ {
+ /* A + B */
+ mpn_mulmid_basecase (p0, s, 2*m - 1, bp + m, m);
+ /* accumulate high limbs of p0 into e1 */
+ ADDC_LIMB (cy, e1l, e1l, p0[m]);
+ e1h += p0[m + 1] + cy;
+ /* (-1)^neg * (B - C) (overwrites first m limbs of s) */
+ mpn_mulmid_basecase (p1, ap + m, 2*m - 1, t, m);
+ /* C + D (overwrites t) */
+ mpn_mulmid_basecase (p2, s + m, 2*m - 1, bp, m);
+ }
+ else
+ {
+ /* as above, but use toom42 instead */
+ mpn_toom42_mulmid (p0, s, bp + m, m, next_scratch);
+ ADDC_LIMB (cy, e1l, e1l, p0[m]);
+ e1h += p0[m + 1] + cy;
+ mpn_toom42_mulmid (p1, ap + m, t, m, next_scratch);
+ mpn_toom42_mulmid (p2, s + m, bp, m, next_scratch);
+ }
+
+ /* apply error terms */
+
+ /* -e0 at rp[0] */
+ SUBC_LIMB (cy, rp[0], rp[0], e0l);
+ SUBC_LIMB (cy, rp[1], rp[1], e0h + cy);
+ if (UNLIKELY (cy))
+ {
+ cy = (m > 2) ? mpn_sub_1 (rp + 2, rp + 2, m - 2, 1) : 1;
+ SUBC_LIMB (cy, e1l, e1l, cy);
+ e1h -= cy;
+ }
+
+ /* z = e1 - e2 + high(p0) */
+ SUBC_LIMB (cy, zl, e1l, e2l);
+ zh = e1h - e2h - cy;
+
+ /* z at rp[m] */
+ ADDC_LIMB (cy, rp[m], rp[m], zl);
+ zh = (zh + cy) & GMP_NUMB_MASK;
+ ADDC_LIMB (cy, rp[m + 1], rp[m + 1], zh);
+ cy -= (zh >> (GMP_NUMB_BITS - 1));
+ if (UNLIKELY (cy))
+ {
+ if (cy == 1)
+ mpn_add_1 (rp + m + 2, rp + m + 2, m, 1);
+ else /* cy == -1 */
+ mpn_sub_1 (rp + m + 2, rp + m + 2, m, 1);
+ }
+
+ /* e3 at rp[2*m] */
+ ADDC_LIMB (cy, rp[2*m], rp[2*m], e3l);
+ rp[2*m + 1] = (rp[2*m + 1] + e3h + cy) & GMP_NUMB_MASK;
+
+ /* e4 at p1[0] */
+ ADDC_LIMB (cy, p1[0], p1[0], e4l);
+ ADDC_LIMB (cy, p1[1], p1[1], e4h + cy);
+ if (UNLIKELY (cy))
+ mpn_add_1 (p1 + 2, p1 + 2, m, 1);
+
+ /* -e5 at p1[m] */
+ SUBC_LIMB (cy, p1[m], p1[m], e5l);
+ p1[m + 1] = (p1[m + 1] - e5h - cy) & GMP_NUMB_MASK;
+
+ /* adjustment if p1 ends up negative */
+ cy = (p1[m + 1] >> (GMP_NUMB_BITS - 1));
+
+ /* add (-1)^neg * (p1 - B^m * p1) to output */
+ if (neg)
+ {
+ mpn_sub_1 (rp + m + 2, rp + m + 2, m, cy);
+ mpn_add (rp, rp, 2*m + 2, p1, m + 2); /* A + C */
+ mpn_sub_n (rp + m, rp + m, p1, m + 2); /* B + D */
+ }
+ else
+ {
+ mpn_add_1 (rp + m + 2, rp + m + 2, m, cy);
+ mpn_sub (rp, rp, 2*m + 2, p1, m + 2); /* A + C */
+ mpn_add_n (rp + m, rp + m, p1, m + 2); /* B + D */
+ }
+
+ /* odd row and diagonal */
+ if (n & 1)
+ {
+ /*
+ Products marked E are already done. We need to do products marked O.
+
+ OOOOO----
+ -EEEEO---
+ --EEEEO--
+ ---EEEEO-
+ ----EEEEO
+ */
+
+ /* first row of O's */
+ cy = mpn_addmul_1 (rp, ap - 1, n, bp[n - 1]);
+ ADDC_LIMB (rp[n + 1], rp[n], rp[n], cy);
+
+ /* O's on diagonal */
+ /* FIXME: should probably define an interface "mpn_mulmid_diag_1"
+ that can handle the sum below. Currently we're relying on
+ mulmid_basecase being pretty fast for a diagonal sum like this,
+ which is true at least for the K8 asm verion, but surely false
+ for the generic version. */
+ mpn_mulmid_basecase (e, ap + n - 1, n - 1, bp, n - 1);
+ mpn_add_n (rp + n - 1, rp + n - 1, e, 3);
+ }
+}
#define b1d bsm1
/* Compute as2 and asm2. */
- flags = toom6_vm2_neg & mpn_toom_eval_dgr3_pm2 (as2, asm2, ap, n, s, a1a3);
+ flags = (enum toom6_flags) (toom6_vm2_neg & mpn_toom_eval_dgr3_pm2 (as2, asm2, ap, n, s, a1a3));
/* Compute bs2 and bsm2. */
b1d[n] = mpn_lshift (b1d, b1, n, 1); /* 2b1 */
if (mpn_cmp (b0b2, b1d, n+1) < 0)
{
mpn_add_n_sub_n (bs2, bsm2, b1d, b0b2, n+1);
- flags ^= toom6_vm2_neg;
+ flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
}
else
{
if (mpn_cmp (b0b2, b1d, n+1) < 0)
{
mpn_sub_n (bsm2, b1d, b0b2, n+1);
- flags ^= toom6_vm2_neg;
+ flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
}
else
{
#endif
/* Compute as1 and asm1. */
- flags ^= toom6_vm1_neg & mpn_toom_eval_dgr3_pm1 (as1, asm1, ap, n, s, a0a2);
+ flags = (enum toom6_flags) (flags ^ toom6_vm1_neg & mpn_toom_eval_dgr3_pm1 (as1, asm1, ap, n, s, a0a2));
/* Compute bs1 and bsm1. */
bsm1[n] = mpn_add (bsm1, b0, n, b2, t);
{
cy = mpn_add_n_sub_n (bs1, bsm1, b1, bsm1, n);
bs1[n] = cy >> 1;
- flags ^= toom6_vm1_neg;
+ flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
}
else
{
if (bsm1[n] == 0 && mpn_cmp (bsm1, b1, n) < 0)
{
mpn_sub_n (bsm1, b1, bsm1, n);
- flags ^= toom6_vm1_neg;
+ flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
}
else
{
SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008, 2013 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#define MAYBE_mul_toom22 \
(MUL_TOOM44_THRESHOLD < 4 * MUL_TOOM33_THRESHOLD)
#define MAYBE_mul_toom44 \
- (MUL_FFT_THRESHOLD >= 4 * MUL_TOOM44_THRESHOLD)
+ (MUL_TOOM6H_THRESHOLD >= 4 * MUL_TOOM44_THRESHOLD)
#endif
#define TOOM44_MUL_N_REC(p, a, b, n, ws) \
gives roughly 32 n/3 + log term. */
/* Compute apx = a0 + 2 a1 + 4 a2 + 8 a3 and amx = a0 - 2 a1 + 4 a2 - 8 a3. */
- flags = toom7_w1_neg & mpn_toom_eval_dgr3_pm2 (apx, amx, ap, n, s, tp);
+ flags = (enum toom7_flags) (toom7_w1_neg & mpn_toom_eval_dgr3_pm2 (apx, amx, ap, n, s, tp));
/* Compute bpx = b0 + 2 b1 + 4 b2 + 8 b3 and bmx = b0 - 2 b1 + 4 b2 - 8 b3. */
- flags ^= toom7_w1_neg & mpn_toom_eval_dgr3_pm2 (bpx, bmx, bp, n, t, tp);
+ flags = (enum toom7_flags) (flags ^ toom7_w1_neg & mpn_toom_eval_dgr3_pm2 (bpx, bmx, bp, n, t, tp));
TOOM44_MUL_N_REC (v2, apx, bpx, n + 1, tp); /* v2, 2n+1 limbs */
TOOM44_MUL_N_REC (vm2, amx, bmx, n + 1, tp); /* vm2, 2n+1 limbs */
TOOM44_MUL_N_REC (vh, apx, bpx, n + 1, tp); /* vh, 2n+1 limbs */
/* Compute apx = a0 + a1 + a2 + a3 and amx = a0 - a1 + a2 - a3. */
- flags |= toom7_w3_neg & mpn_toom_eval_dgr3_pm1 (apx, amx, ap, n, s, tp);
+ flags = (enum toom7_flags) (flags | toom7_w3_neg & mpn_toom_eval_dgr3_pm1 (apx, amx, ap, n, s, tp));
/* Compute bpx = b0 + b1 + b2 + b3 bnd bmx = b0 - b1 + b2 - b3. */
- flags ^= toom7_w3_neg & mpn_toom_eval_dgr3_pm1 (bpx, bmx, bp, n, t, tp);
+ flags = (enum toom7_flags) (flags ^ toom7_w3_neg & mpn_toom_eval_dgr3_pm1 (bpx, bmx, bp, n, t, tp));
TOOM44_MUL_N_REC (vm1, amx, bmx, n + 1, tp); /* vm1, 2n+1 limbs */
/* Clobbers amx, bmx. */
SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008, 2009, 2010, 2013 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#define MAYBE_sqr_toom2 \
(SQR_TOOM4_THRESHOLD < 4 * SQR_TOOM3_THRESHOLD)
#define MAYBE_sqr_toom4 \
- (SQR_FFT_THRESHOLD >= 4 * SQR_TOOM4_THRESHOLD)
+ (SQR_TOOM6_THRESHOLD >= 4 * SQR_TOOM4_THRESHOLD)
#endif
#define TOOM4_SQR_REC(p, a, n, ws) \
TOOM4_SQR_REC (v0, a0, n, tp);
TOOM4_SQR_REC (vinf, a3, s, tp); /* vinf, 2s limbs */
- mpn_toom_interpolate_7pts (pp, n, 0, vm2, vm1, v2, vh, 2*s, tp);
+ mpn_toom_interpolate_7pts (pp, n, (enum toom7_flags) 0, vm2, vm1, v2, vh, 2*s, tp);
}
#define a1a3 asm1
/* Compute as2 and asm2. */
- flags = toom6_vm2_neg & mpn_toom_eval_pm2 (as2, asm2, 4, ap, n, s, a1a3);
+ flags = (enum toom6_flags) (toom6_vm2_neg & mpn_toom_eval_pm2 (as2, asm2, 4, ap, n, s, a1a3));
/* Compute bs1 and bsm1. */
if (t == n)
if (mpn_cmp (b0, b1, n) < 0)
{
cy = mpn_add_n_sub_n (bs1, bsm1, b1, b0, n);
- flags ^= toom6_vm1_neg;
+ flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
}
else
{
if (mpn_cmp (b0, b1, n) < 0)
{
mpn_sub_n (bsm1, b1, b0, n);
- flags ^= toom6_vm1_neg;
+ flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
}
else
{
{
mpn_sub_n (bsm1, b1, b0, t);
MPN_ZERO (bsm1 + t, n - t);
- flags ^= toom6_vm1_neg;
+ flags = (enum toom6_flags) (flags ^ toom6_vm1_neg);
}
else
{
if (flags & toom6_vm1_neg )
{
bsm2[n] = mpn_add (bsm2, bsm1, n, b1, t);
- flags ^= toom6_vm2_neg;
+ flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
}
else
{
if (mpn_cmp (bsm1, b1, n) < 0)
{
mpn_sub_n (bsm2, b1, bsm1, n);
- flags ^= toom6_vm2_neg;
+ flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
}
else
{
{
mpn_sub_n (bsm2, b1, bsm1, t);
MPN_ZERO (bsm2 + t, n - t);
- flags ^= toom6_vm2_neg;
+ flags = (enum toom6_flags) (flags ^ toom6_vm2_neg);
}
else
{
}
/* Compute as1 and asm1. */
- flags ^= toom6_vm1_neg & mpn_toom_eval_pm1 (as1, asm1, 4, ap, n, s, a0a2);
+ flags = (enum toom6_flags) (flags ^ toom6_vm1_neg & mpn_toom_eval_pm1 (as1, asm1, 4, ap, n, s, a0a2));
ASSERT (as1[n] <= 4);
ASSERT (bs1[n] <= 1);
SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
gp = pp;
/* Compute as1 and asm1. */
- flags = toom7_w3_neg & mpn_toom_eval_pm1 (as1, asm1, 4, ap, n, s, gp);
+ flags = (enum toom7_flags) (toom7_w3_neg & mpn_toom_eval_pm1 (as1, asm1, 4, ap, n, s, gp));
/* Compute as2 and asm2. */
- flags |= toom7_w1_neg & mpn_toom_eval_pm2 (as2, asm2, 4, ap, n, s, gp);
+ flags = (enum toom7_flags) (flags | toom7_w1_neg & mpn_toom_eval_pm2 (as2, asm2, 4, ap, n, s, gp));
/* Compute ash = 16 a0 + 8 a1 + 4 a2 + 2 a3 + a4
= 2*(2*(2*(2*a0 + a1) + a2) + a3) + a4 */
{
bs1[n] = mpn_add_n_sub_n (bs1, bsm1, b1, bs1, n) >> 1;
bsm1[n] = 0;
- flags ^= toom7_w3_neg;
+ flags = (enum toom7_flags) (flags ^ toom7_w3_neg);
}
else
{
{
mpn_sub_n (bsm1, b1, bs1, n);
bsm1[n] = 0;
- flags ^= toom7_w3_neg;
+ flags = (enum toom7_flags) (flags ^ toom7_w3_neg);
}
else
{
if (mpn_cmp (bs2, gp, n+1) < 0)
{
ASSERT_NOCARRY (mpn_add_n_sub_n (bs2, bsm2, gp, bs2, n+1));
- flags ^= toom7_w1_neg;
+ flags = (enum toom7_flags) (flags ^ toom7_w1_neg);
}
else
{
if (mpn_cmp (bs2, gp, n+1) < 0)
{
ASSERT_NOCARRY (mpn_sub_n (bsm2, gp, bs2, n+1));
- flags ^= toom7_w1_neg;
+ flags = (enum toom7_flags) (flags ^ toom7_w1_neg);
}
else
{
mpn_add_n (bs2, bs2, gp, n+1);
#endif
- /* Compute bsh = 4 b0 + 2 b1 + b0 = 2*(2*b0 + b1)+b0. */
+ /* Compute bsh = 4 b0 + 2 b1 + b2 = 2*(2*b0 + b1)+b2. */
#if HAVE_NATIVE_mpn_addlsh1_n
cy = mpn_addlsh1_n (bsh, b1, b0, n);
if (t < n)
--- /dev/null
+/* Implementation of the algorithm for Toom-Cook 4.5-way.
+
+ Contributed to the GNU project by Marco Bodrato.
+
+ THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY
+ SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Toom-4.5, the splitting 5x4 unbalanced version.
+ Evaluate in: infinity, +4, -4, +2, -2, +1, -1, 0.
+
+ <--s-><--n--><--n--><--n--><--n-->
+ ____ ______ ______ ______ ______
+ |_a4_|__a3__|__a2__|__a1__|__a0__|
+ |b3_|__b2__|__b1__|__b0__|
+ <-t-><--n--><--n--><--n-->
+
+*/
+#define TOOM_54_MUL_N_REC(p, a, b, n, ws) \
+ do { mpn_mul_n (p, a, b, n); \
+ } while (0)
+
+#define TOOM_54_MUL_REC(p, a, na, b, nb, ws) \
+ do { mpn_mul (p, a, na, b, nb); \
+ } while (0)
+
+void
+mpn_toom54_mul (mp_ptr pp,
+ mp_srcptr ap, mp_size_t an,
+ mp_srcptr bp, mp_size_t bn, mp_ptr scratch)
+{
+ mp_size_t n, s, t;
+ int sign;
+
+ /***************************** decomposition *******************************/
+#define a4 (ap + 4 * n)
+#define b3 (bp + 3 * n)
+
+ ASSERT (an >= bn);
+ n = 1 + (4 * an >= 5 * bn ? (an - 1) / (size_t) 5 : (bn - 1) / (size_t) 4);
+
+ s = an - 4 * n;
+ t = bn - 3 * n;
+
+ ASSERT (0 < s && s <= n);
+ ASSERT (0 < t && t <= n);
+ /* Required by mpn_toom_interpolate_8pts. */
+ ASSERT ( s + t >= n );
+ ASSERT ( s + t > 4);
+ ASSERT ( n > 2);
+
+#define r8 pp /* 2n */
+#define r7 scratch /* 3n+1 */
+#define r5 (pp + 3*n) /* 3n+1 */
+#define v0 (pp + 3*n) /* n+1 */
+#define v1 (pp + 4*n+1) /* n+1 */
+#define v2 (pp + 5*n+2) /* n+1 */
+#define v3 (pp + 6*n+3) /* n+1 */
+#define r3 (scratch + 3 * n + 1) /* 3n+1 */
+#define r1 (pp + 7*n) /* s+t <= 2*n */
+#define ws (scratch + 6 * n + 2) /* ??? */
+
+ /* Alloc also 3n+1 limbs for ws... mpn_toom_interpolate_8pts may
+ need all of them, when DO_mpn_sublsh_n usea a scratch */
+ /********************** evaluation and recursive calls *********************/
+ /* $\pm4$ */
+ sign = mpn_toom_eval_pm2exp (v2, v0, 4, ap, n, s, 2, pp)
+ ^ mpn_toom_eval_pm2exp (v3, v1, 3, bp, n, t, 2, pp);
+ TOOM_54_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-4)*B(-4) */
+ TOOM_54_MUL_N_REC(r3, v2, v3, n + 1, ws); /* A(+4)*B(+4) */
+ mpn_toom_couple_handling (r3, 2*n+1, pp, sign, n, 2, 4);
+
+ /* $\pm1$ */
+ sign = mpn_toom_eval_pm1 (v2, v0, 4, ap, n, s, pp)
+ ^ mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t, pp);
+ TOOM_54_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-1)*B(-1) */
+ TOOM_54_MUL_N_REC(r7, v2, v3, n + 1, ws); /* A(1)*B(1) */
+ mpn_toom_couple_handling (r7, 2*n+1, pp, sign, n, 0, 0);
+
+ /* $\pm2$ */
+ sign = mpn_toom_eval_pm2 (v2, v0, 4, ap, n, s, pp)
+ ^ mpn_toom_eval_dgr3_pm2 (v3, v1, bp, n, t, pp);
+ TOOM_54_MUL_N_REC(pp, v0, v1, n + 1, ws); /* A(-2)*B(-2) */
+ TOOM_54_MUL_N_REC(r5, v2, v3, n + 1, ws); /* A(+2)*B(+2) */
+ mpn_toom_couple_handling (r5, 2*n+1, pp, sign, n, 1, 2);
+
+ /* A(0)*B(0) */
+ TOOM_54_MUL_N_REC(pp, ap, bp, n, ws);
+
+ /* Infinity */
+ if (s > t) {
+ TOOM_54_MUL_REC(r1, a4, s, b3, t, ws);
+ } else {
+ TOOM_54_MUL_REC(r1, b3, t, a4, s, ws);
+ };
+
+ mpn_toom_interpolate_8pts (pp, n, r3, r7, s + t, ws);
+
+#undef a4
+#undef b3
+#undef r1
+#undef r3
+#undef r5
+#undef v0
+#undef v1
+#undef v2
+#undef v3
+#undef r7
+#undef r8
+#undef ws
+}
SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2006, 2007, 2008 Free Software Foundation, Inc.
+Copyright 2006, 2007, 2008, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
gp = pp;
/* Compute as1 and asm1. */
- aflags = toom7_w3_neg & mpn_toom_eval_pm1 (as1, asm1, 5, ap, n, s, gp);
+ aflags = (enum toom7_flags) (toom7_w3_neg & mpn_toom_eval_pm1 (as1, asm1, 5, ap, n, s, gp));
/* Compute as2 and asm2. */
- aflags |= toom7_w1_neg & mpn_toom_eval_pm2 (as2, asm2, 5, ap, n, s, gp);
+ aflags = (enum toom7_flags) (aflags | toom7_w1_neg & mpn_toom_eval_pm2 (as2, asm2, 5, ap, n, s, gp));
/* Compute ash = 32 a0 + 16 a1 + 8 a2 + 4 a3 + 2 a4 + a5
= 2*(2*(2*(2*(2*a0 + a1) + a2) + a3) + a4) + a5 */
else
{
cy = mpn_add_n_sub_n (bs1, bsm1, b0, b1, n);
- bflags = 0;
+ bflags = (enum toom7_flags) 0;
}
bs1[n] = cy >> 1;
#else
else
{
mpn_sub_n (bsm1, b0, b1, n);
- bflags = 0;
+ bflags = (enum toom7_flags) 0;
}
#endif
}
else
{
mpn_sub (bsm1, b0, n, b1, t);
- bflags = 0;
+ bflags = (enum toom7_flags) 0;
}
}
if (bflags & toom7_w3_neg)
{
bsm2[n] = mpn_add (bsm2, bsm1, n, b1, t);
- bflags |= toom7_w1_neg;
+ bflags = (enum toom7_flags) (bflags | toom7_w1_neg);
}
else
{
{
ASSERT_NOCARRY (mpn_sub_n (bsm2, b1, bsm1, t));
MPN_ZERO (bsm2 + t, n + 1 - t);
- bflags |= toom7_w1_neg;
+ bflags = (enum toom7_flags) (bflags | toom7_w1_neg);
}
else
{
if (mpn_cmp (bsm1, b1, n) < 0)
{
ASSERT_NOCARRY (mpn_sub_n (bsm2, b1, bsm1, n));
- bflags |= toom7_w1_neg;
+ bflags = (enum toom7_flags) (bflags | toom7_w1_neg);
}
else
{
- ASSERT_NOCARRY (mpn_sub (bsm2, bsm1, n, b1, n));
+ ASSERT_NOCARRY (mpn_sub_n (bsm2, bsm1, b1, n));
}
bsm2[n] = 0;
}
}
- /* Compute bsh, recycling bs1 and bsm1. bsh=bs1+b0; */
- mpn_add (bsh, bs1, n + 1, b0, n);
+ /* Compute bsh, recycling bs1. bsh=bs1+b0; */
+ bsh[n] = bs1[n] + mpn_add_n (bsh, bs1, b0, n);
ASSERT (as1[n] <= 5);
ASSERT (bs1[n] <= 1);
if (s > t) mpn_mul (vinf, a5, s, b1, t);
else mpn_mul (vinf, b1, t, a5, s);
- mpn_toom_interpolate_7pts (pp, n, aflags ^ bflags,
+ mpn_toom_interpolate_7pts (pp, n, (enum toom7_flags) (aflags ^ bflags),
vm2, vm1, v2, vh, s + t, scratch_out);
TMP_FREE;
SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2009, 2010 Free Software Foundation, Inc.
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
(MUL_FFT_THRESHOLD >= 6 * MUL_TOOM6H_THRESHOLD)
#endif
-#define TOOM6H_MUL_N_REC(p, a, b, n, ws) \
+#define TOOM6H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws) \
do { \
if (MAYBE_mul_basecase \
- && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) \
+ && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) { \
mpn_mul_basecase (p, a, n, b, n); \
- else if (MAYBE_mul_toom22 \
- && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) \
+ if (f) \
+ mpn_mul_basecase (p2, a2, n, b2, n); \
+ } else if (MAYBE_mul_toom22 \
+ && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) { \
mpn_toom22_mul (p, a, n, b, n, ws); \
- else if (MAYBE_mul_toom33 \
- && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) \
+ if (f) \
+ mpn_toom22_mul (p2, a2, n, b2, n, ws); \
+ } else if (MAYBE_mul_toom33 \
+ && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) { \
mpn_toom33_mul (p, a, n, b, n, ws); \
- else if (! MAYBE_mul_toom6h \
- || BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) \
+ if (f) \
+ mpn_toom33_mul (p2, a2, n, b2, n, ws); \
+ } else if (! MAYBE_mul_toom6h \
+ || BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) { \
mpn_toom44_mul (p, a, n, b, n, ws); \
- else \
+ if (f) \
+ mpn_toom44_mul (p2, a2, n, b2, n, ws); \
+ } else { \
mpn_toom6h_mul (p, a, n, b, n, ws); \
+ if (f) \
+ mpn_toom6h_mul (p2, a2, n, b2, n, ws); \
+ } \
} while (0)
#define TOOM6H_MUL_REC(p, a, na, b, nb, ws) \
- do { mpn_mul (p, a, na, b, nb); \
+ do { mpn_mul (p, a, na, b, nb); \
} while (0)
/* Toom-6.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
/***************************** decomposition *******************************/
- ASSERT( an >= bn);
+ ASSERT (an >= bn);
/* Can not handle too much unbalancement */
- ASSERT( bn >= 42 );
+ ASSERT (bn >= 42);
/* Can not handle too much unbalancement */
- ASSERT((an*3 < bn * 8) || ( bn >= 46 && an*6 < bn * 17 ));
+ ASSERT ((an*3 < bn * 8) || (bn >= 46 && an * 6 < bn * 17));
/* Limit num/den is a rational number between
(12/11)^(log(4)/log(2*4-1)) and (12/11)^(log(6)/log(2*6-1)) */
#define LIMIT_numerator (18)
#define LIMIT_denominat (17)
- if( an * LIMIT_denominat < LIMIT_numerator * bn ) /* is 6*... < 6*... */
- { p = q = 6; }
- else if( an * 5 * LIMIT_numerator < LIMIT_denominat * 7 * bn )
- { p = 7; q = 6; }
- else if( an * 5 * LIMIT_denominat < LIMIT_numerator * 7 * bn )
- { p = 7; q = 5; }
- else if( an * LIMIT_numerator < LIMIT_denominat * 2 * bn ) /* is 4*... < 8*... */
- { p = 8; q = 5; }
- else if( an * LIMIT_denominat < LIMIT_numerator * 2 * bn ) /* is 4*... < 8*... */
- { p = 8; q = 4; }
- else
- { p = 9; q = 4; }
-
- half = (p ^ q) & 1;
- n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
- p--; q--;
-
- s = an - p * n;
- t = bn - q * n;
-
- /* With LIMIT = 16/15, the following recover is needed only if bn<=73*/
- if (half) { /* Recover from badly chosen splitting */
- if (s<1) {p--; s+=n; half=0;}
- else if (t<1) {q--; t+=n; half=0;}
+ if (LIKELY (an * LIMIT_denominat < LIMIT_numerator * bn)) /* is 6*... < 6*... */
+ {
+ n = 1 + (an - 1) / (size_t) 6;
+ p = q = 5;
+ half = 0;
+
+ s = an - 5 * n;
+ t = bn - 5 * n;
+ }
+ else {
+ if (an * 5 * LIMIT_numerator < LIMIT_denominat * 7 * bn)
+ { p = 7; q = 6; }
+ else if (an * 5 * LIMIT_denominat < LIMIT_numerator * 7 * bn)
+ { p = 7; q = 5; }
+ else if (an * LIMIT_numerator < LIMIT_denominat * 2 * bn) /* is 4*... < 8*... */
+ { p = 8; q = 5; }
+ else if (an * LIMIT_denominat < LIMIT_numerator * 2 * bn) /* is 4*... < 8*... */
+ { p = 8; q = 4; }
+ else
+ { p = 9; q = 4; }
+
+ half = (p ^ q) & 1;
+ n = 1 + (q * an >= p * bn ? (an - 1) / (size_t) p : (bn - 1) / (size_t) q);
+ p--; q--;
+
+ s = an - p * n;
+ t = bn - q * n;
+
+ /* With LIMIT = 16/15, the following recover is needed only if bn<=73*/
+ if (half) { /* Recover from badly chosen splitting */
+ if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
+ else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
+ }
}
#undef LIMIT_numerator
#undef LIMIT_denominat
/* $\pm1/2$ */
sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
- TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */
- TOOM6H_MUL_N_REC(r5, v2, v3, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */
+ /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
+ TOOM6H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 1+half , half);
/* $\pm1$ */
sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s, pp);
- if (q == 3)
+ if (UNLIKELY (q == 3))
sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t, pp);
else
sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t, pp);
- TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1)*B(-1) */
- TOOM6H_MUL_N_REC(r3, v2, v3, n + 1, wse); /* A(1)*B(1) */
+ /* A(-1)*B(-1) */ /* A(1)*B(1) */
+ TOOM6H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 0, 0);
/* $\pm4$ */
sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
- TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-4)*B(-4) */
- TOOM6H_MUL_N_REC(r1, v2, v3, n + 1, wse); /* A(+4)*B(+4) */
+ /* A(-4)*B(-4) */
+ TOOM6H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse); /* A(+4)*B(+4) */
mpn_toom_couple_handling (r1, 2 * n + 1, pp, sign, n, 2, 4);
/* $\pm1/4$ */
sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
- TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */
- TOOM6H_MUL_N_REC(r4, v2, v3, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */
+ /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
+ TOOM6H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
/* $\pm2$ */
sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
- TOOM6H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-2)*B(-2) */
- TOOM6H_MUL_N_REC(r2, v2, v3, n + 1, wse); /* A(+2)*B(+2) */
+ /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
+ TOOM6H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 1, 2);
#undef v0
#undef wse
/* A(0)*B(0) */
- TOOM6H_MUL_N_REC(pp, ap, bp, n, wsi);
+ TOOM6H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
/* Infinity */
- if( half != 0) {
- if(s>t) {
+ if (UNLIKELY (half != 0)) {
+ if (s > t) {
TOOM6H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
} else {
TOOM6H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2009 Free Software Foundation, Inc.
+Copyright 2009, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
(SQR_TOOM8_MAX >= SQR_TOOM8_THRESHOLD)
#endif
-#define TOOM8_SQR_REC(p, a, n, ws) \
+#define TOOM8_SQR_REC(p, a, f, p2, a2, n, ws) \
do { \
if (MAYBE_sqr_basecase && ( !MAYBE_sqr_above_basecase \
- || BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))) \
+ || BELOW_THRESHOLD (n, SQR_TOOM2_THRESHOLD))) { \
mpn_sqr_basecase (p, a, n); \
- else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2 \
- || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))) \
+ if (f) mpn_sqr_basecase (p2, a2, n); \
+ } else if (MAYBE_sqr_toom2 && ( !MAYBE_sqr_above_toom2 \
+ || BELOW_THRESHOLD (n, SQR_TOOM3_THRESHOLD))) { \
mpn_toom2_sqr (p, a, n, ws); \
- else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3 \
- || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))) \
+ if (f) mpn_toom2_sqr (p2, a2, n, ws); \
+ } else if (MAYBE_sqr_toom3 && ( !MAYBE_sqr_above_toom3 \
+ || BELOW_THRESHOLD (n, SQR_TOOM4_THRESHOLD))) { \
mpn_toom3_sqr (p, a, n, ws); \
- else if (MAYBE_sqr_toom4 && ( !MAYBE_sqr_above_toom4 \
- || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))) \
+ if (f) mpn_toom3_sqr (p2, a2, n, ws); \
+ } else if (MAYBE_sqr_toom4 && ( !MAYBE_sqr_above_toom4 \
+ || BELOW_THRESHOLD (n, SQR_TOOM6_THRESHOLD))) { \
mpn_toom4_sqr (p, a, n, ws); \
- else if (! MAYBE_sqr_above_toom6 \
- || BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD)) \
+ if (f) mpn_toom4_sqr (p2, a2, n, ws); \
+ } else if (! MAYBE_sqr_above_toom6 \
+ || BELOW_THRESHOLD (n, SQR_TOOM8_THRESHOLD)) { \
mpn_toom6_sqr (p, a, n, ws); \
- else \
+ if (f) mpn_toom6_sqr (p2, a2, n, ws); \
+ } else { \
mpn_toom8_sqr (p, a, n, ws); \
+ if (f) mpn_toom8_sqr (p2, a2, n, ws); \
+ } \
} while (0)
void
/********************** evaluation and recursive calls *********************/
/* $\pm1/8$ */
mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 3, pp);
- TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-1/8)*B(-1/8)*8^. */
- TOOM8_SQR_REC(r7, v2, n + 1, wse); /* A(+1/8)*B(+1/8)*8^. */
+ /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
+ TOOM8_SQR_REC(pp, v0, 2, r7, v2, n + 1, wse);
mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 0);
/* $\pm1/4$ */
mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 2, pp);
- TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */
- TOOM8_SQR_REC(r5, v2, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */
+ /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
+ TOOM8_SQR_REC(pp, v0, 2, r5, v2, n + 1, wse);
mpn_toom_couple_handling (r5, 2 * n + 1, pp, 0, n, 2, 0);
/* $\pm2$ */
mpn_toom_eval_pm2 (v2, v0, 7, ap, n, s, pp);
- TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-2)*B(-2) */
- TOOM8_SQR_REC(r3, v2, n + 1, wse); /* A(+2)*B(+2) */
+ /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
+ TOOM8_SQR_REC(pp, v0, 2, r3, v2, n + 1, wse);
mpn_toom_couple_handling (r3, 2 * n + 1, pp, 0, n, 1, 2);
/* $\pm8$ */
mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 3, pp);
- TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-8)*B(-8) */
- TOOM8_SQR_REC(r1, v2, n + 1, wse); /* A(+8)*B(+8) */
+ /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
+ TOOM8_SQR_REC(pp, v0, 2, r1, v2, n + 1, wse);
mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, 0, n, 3, 6);
/* $\pm1/2$ */
mpn_toom_eval_pm2rexp (v2, v0, 7, ap, n, s, 1, pp);
- TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */
- TOOM8_SQR_REC(r6, v2, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */
+ /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
+ TOOM8_SQR_REC(pp, v0, 2, r6, v2, n + 1, wse);
mpn_toom_couple_handling (r6, 2 * n + 1, pp, 0, n, 1, 0);
/* $\pm1$ */
mpn_toom_eval_pm1 (v2, v0, 7, ap, n, s, pp);
- TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-1)*B(-1) */
- TOOM8_SQR_REC(r4, v2, n + 1, wse); /* A(1)*B(1) */
+ /* A(-1)*B(-1) */ /* A(1)*B(1) */
+ TOOM8_SQR_REC(pp, v0, 2, r4, v2, n + 1, wse);
mpn_toom_couple_handling (r4, 2 * n + 1, pp, 0, n, 0, 0);
/* $\pm4$ */
mpn_toom_eval_pm2exp (v2, v0, 7, ap, n, s, 2, pp);
- TOOM8_SQR_REC(pp, v0, n + 1, wse); /* A(-4)*B(-4) */
- TOOM8_SQR_REC(r2, v2, n + 1, wse); /* A(+4)*B(+4) */
+ /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
+ TOOM8_SQR_REC(pp, v0, 2, r2, v2, n + 1, wse);
mpn_toom_couple_handling (r2, 2 * n + 1, pp, 0, n, 2, 4);
#undef v0
#undef v2
/* A(0)*B(0) */
- TOOM8_SQR_REC(pp, ap, n, wse);
+ TOOM8_SQR_REC(pp, ap, 0, pp, ap, n, wse);
mpn_toom_interpolate_16pts (pp, r1, r3, r5, r7, n, 2 * s, 0, wse);
SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2009, 2010 Free Software Foundation, Inc.
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
(MUL_FFT_THRESHOLD >= 8 * MUL_TOOM8H_THRESHOLD)
#endif
-#define TOOM8H_MUL_N_REC(p, a, b, n, ws) \
+#define TOOM8H_MUL_N_REC(p, a, b, f, p2, a2, b2, n, ws) \
do { \
if (MAYBE_mul_basecase \
- && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) \
+ && BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD)) { \
mpn_mul_basecase (p, a, n, b, n); \
- else if (MAYBE_mul_toom22 \
- && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) \
+ if (f) mpn_mul_basecase (p2, a2, n, b2, n); \
+ } else if (MAYBE_mul_toom22 \
+ && BELOW_THRESHOLD (n, MUL_TOOM33_THRESHOLD)) { \
mpn_toom22_mul (p, a, n, b, n, ws); \
- else if (MAYBE_mul_toom33 \
- && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) \
+ if (f) mpn_toom22_mul (p2, a2, n, b2, n, ws); \
+ } else if (MAYBE_mul_toom33 \
+ && BELOW_THRESHOLD (n, MUL_TOOM44_THRESHOLD)) { \
mpn_toom33_mul (p, a, n, b, n, ws); \
- else if (MAYBE_mul_toom44 \
- && BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) \
+ if (f) mpn_toom33_mul (p2, a2, n, b2, n, ws); \
+ } else if (MAYBE_mul_toom44 \
+ && BELOW_THRESHOLD (n, MUL_TOOM6H_THRESHOLD)) { \
mpn_toom44_mul (p, a, n, b, n, ws); \
- else if (! MAYBE_mul_toom8h \
- || BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD)) \
+ if (f) mpn_toom44_mul (p2, a2, n, b2, n, ws); \
+ } else if (! MAYBE_mul_toom8h \
+ || BELOW_THRESHOLD (n, MUL_TOOM8H_THRESHOLD)) { \
mpn_toom6h_mul (p, a, n, b, n, ws); \
- else \
+ if (f) mpn_toom6h_mul (p2, a2, n, b2, n, ws); \
+ } else { \
mpn_toom8h_mul (p, a, n, b, n, ws); \
+ if (f) mpn_toom8h_mul (p2, a2, n, b2, n, ws); \
+ } \
} while (0)
#define TOOM8H_MUL_REC(p, a, na, b, nb, ws) \
- do { mpn_mul (p, a, na, b, nb); \
- } while (0)
+ do { mpn_mul (p, a, na, b, nb); } while (0)
/* Toom-8.5 , compute the product {pp,an+bn} <- {ap,an} * {bp,bn}
With: an >= bn >= 86, an*5 < bn * 11.
/* Can not handle too small operands */
ASSERT (bn >= 86);
/* Can not handle too much unbalancement */
- ASSERT (an*4 <= bn*13);
- ASSERT (GMP_NUMB_BITS > 12*3 || an*4 <= bn*12);
- ASSERT (GMP_NUMB_BITS > 11*3 || an*5 <= bn*11);
- ASSERT (GMP_NUMB_BITS > 10*3 || an*6 <= bn*10);
- ASSERT (GMP_NUMB_BITS > 9*3 || an*7 <= bn* 9);
+ ASSERT (an <= bn*4);
+ ASSERT (GMP_NUMB_BITS > 11*3 || an*4 <= bn*11);
+ ASSERT (GMP_NUMB_BITS > 10*3 || an*1 <= bn* 2);
+ ASSERT (GMP_NUMB_BITS > 9*3 || an*2 <= bn* 3);
/* Limit num/den is a rational number between
(16/15)^(log(6)/log(2*6-1)) and (16/15)^(log(8)/log(2*8-1)) */
half = 0;
n = 1 + ((an - 1)>>3);
p = q = 7;
- s = an - p * n;
- t = bn - q * n;
+ s = an - 7 * n;
+ t = bn - 7 * n;
}
else
{
else if (GMP_NUMB_BITS <= 11*3 ||
an * 4 < 9 * bn)
{ p =11; q = 5; }
- else if (an *(LIMIT_numerator/3) < LIMIT_denominat * bn ) /* is 4*... <12*... */
+ else if (an *(LIMIT_numerator/3) < LIMIT_denominat * bn) /* is 4*... <12*... */
{ p =12; q = 5; }
else if (GMP_NUMB_BITS <= 12*3 ||
an * 9 < 28 * bn ) /* is 4*... <12*... */
t = bn - q * n;
if(half) { /* Recover from badly chosen splitting */
- if (s<1) {p--; s+=n; half=0;}
- else if (t<1) {q--; t+=n; half=0;}
+ if (UNLIKELY (s<1)) {p--; s+=n; half=0;}
+ else if (UNLIKELY (t<1)) {q--; t+=n; half=0;}
}
}
#undef LIMIT_numerator
/* $\pm1/8$ */
sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 3, pp) ^
mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 3, pp);
- TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/8)*B(-1/8)*8^. */
- TOOM8H_MUL_N_REC(r7, v2, v3, n + 1, wse); /* A(+1/8)*B(+1/8)*8^. */
+ /* A(-1/8)*B(-1/8)*8^. */ /* A(+1/8)*B(+1/8)*8^. */
+ TOOM8H_MUL_N_REC(pp, v0, v1, 2, r7, v2, v3, n + 1, wse);
mpn_toom_couple_handling (r7, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3*(1+half), 3*(half));
/* $\pm1/4$ */
sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 2, pp) ^
mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 2, pp);
- TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/4)*B(-1/4)*4^. */
- TOOM8H_MUL_N_REC(r5, v2, v3, n + 1, wse); /* A(+1/4)*B(+1/4)*4^. */
+ /* A(-1/4)*B(-1/4)*4^. */ /* A(+1/4)*B(+1/4)*4^. */
+ TOOM8H_MUL_N_REC(pp, v0, v1, 2, r5, v2, v3, n + 1, wse);
mpn_toom_couple_handling (r5, 2 * n + 1, pp, sign, n, 2*(1+half), 2*(half));
/* $\pm2$ */
sign = mpn_toom_eval_pm2 (v2, v0, p, ap, n, s, pp) ^
mpn_toom_eval_pm2 (v3, v1, q, bp, n, t, pp);
- TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-2)*B(-2) */
- TOOM8H_MUL_N_REC(r3, v2, v3, n + 1, wse); /* A(+2)*B(+2) */
+ /* A(-2)*B(-2) */ /* A(+2)*B(+2) */
+ TOOM8H_MUL_N_REC(pp, v0, v1, 2, r3, v2, v3, n + 1, wse);
mpn_toom_couple_handling (r3, 2 * n + 1, pp, sign, n, 1, 2);
/* $\pm8$ */
sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 3, pp) ^
mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 3, pp);
- TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-8)*B(-8) */
- TOOM8H_MUL_N_REC(r1, v2, v3, n + 1, wse); /* A(+8)*B(+8) */
+ /* A(-8)*B(-8) */ /* A(+8)*B(+8) */
+ TOOM8H_MUL_N_REC(pp, v0, v1, 2, r1, v2, v3, n + 1, wse);
mpn_toom_couple_handling (r1, 2 * n + 1 + BIT_CORRECTION, pp, sign, n, 3, 6);
/* $\pm1/2$ */
sign = mpn_toom_eval_pm2rexp (v2, v0, p, ap, n, s, 1, pp) ^
mpn_toom_eval_pm2rexp (v3, v1, q, bp, n, t, 1, pp);
- TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1/2)*B(-1/2)*2^. */
- TOOM8H_MUL_N_REC(r6, v2, v3, n + 1, wse); /* A(+1/2)*B(+1/2)*2^. */
+ /* A(-1/2)*B(-1/2)*2^. */ /* A(+1/2)*B(+1/2)*2^. */
+ TOOM8H_MUL_N_REC(pp, v0, v1, 2, r6, v2, v3, n + 1, wse);
mpn_toom_couple_handling (r6, 2 * n + 1, pp, sign, n, 1+half, half);
/* $\pm1$ */
sign = mpn_toom_eval_pm1 (v2, v0, p, ap, n, s, pp);
- if (q == 3)
+ if (GMP_NUMB_BITS > 12*3 && UNLIKELY (q == 3))
sign ^= mpn_toom_eval_dgr3_pm1 (v3, v1, bp, n, t, pp);
else
sign ^= mpn_toom_eval_pm1 (v3, v1, q, bp, n, t, pp);
- TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-1)*B(-1) */
- TOOM8H_MUL_N_REC(r4, v2, v3, n + 1, wse); /* A(1)*B(1) */
+ /* A(-1)*B(-1) */ /* A(1)*B(1) */
+ TOOM8H_MUL_N_REC(pp, v0, v1, 2, r4, v2, v3, n + 1, wse);
mpn_toom_couple_handling (r4, 2 * n + 1, pp, sign, n, 0, 0);
/* $\pm4$ */
sign = mpn_toom_eval_pm2exp (v2, v0, p, ap, n, s, 2, pp) ^
mpn_toom_eval_pm2exp (v3, v1, q, bp, n, t, 2, pp);
- TOOM8H_MUL_N_REC(pp, v0, v1, n + 1, wse); /* A(-4)*B(-4) */
- TOOM8H_MUL_N_REC(r2, v2, v3, n + 1, wse); /* A(+4)*B(+4) */
+ /* A(-4)*B(-4) */ /* A(+4)*B(+4) */
+ TOOM8H_MUL_N_REC(pp, v0, v1, 2, r2, v2, v3, n + 1, wse);
mpn_toom_couple_handling (r2, 2 * n + 1, pp, sign, n, 2, 4);
#undef v0
#undef wse
/* A(0)*B(0) */
- TOOM8H_MUL_N_REC(pp, ap, bp, n, wsi);
+ TOOM8H_MUL_N_REC(pp, ap, bp, 0, pp, ap, bp, n, wsi);
/* Infinity */
- if( half != 0) {
- if(s>t) {
+ if (UNLIKELY (half != 0)) {
+ if (s > t) {
TOOM8H_MUL_REC(r0, ap + p * n, s, bp + q * n, t, wsi);
} else {
TOOM8H_MUL_REC(r0, bp + q * n, t, ap + p * n, s, wsi);
} while (0)
#else
/* The following is not a general substitute for addlsh2.
- It is correct if d == b, but it is not if d == a. */
+ It is correct if d == b, but it is not if d == a. */
#define DO_addlsh2(d, a, b, n, cy) \
do { \
(cy) <<= 2; \
SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2009, 2010 Free Software Foundation, Inc.
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
mpn_sub_n (r4, r4, r5, n3p1); /* can be negative */
DO_mpn_sublsh_n (r4, r5, n3p1, 8, wsi); /* can be negative */
#endif
- /* A division by 2835x4 followsi. Warning: the operand can be negative! */
+ /* A division by 2835x4 follows. Warning: the operand can be negative! */
mpn_divexact_by2835x4(r4, r4, n3p1);
if ((r4[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0)
r4[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2));
mpn_divexact_by255x188513325(r7, r7, n3p1);
mpn_submul_1 (r5, r7, n3p1, 12567555); /* can be negative */
- /* A division by 2835x64 followsi. Warning: the operand can be negative! */
+ /* A division by 2835x64 follows. Warning: the operand can be negative! */
mpn_divexact_by2835x64(r5, r5, n3p1);
if ((r5[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-7))) != 0)
r5[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-6));
DO_mpn_addlsh_n (r6, r5, n3p1, 8, wsi); /* can give a carry */
DO_mpn_sublsh_n (r6, r5, n3p1, 4, wsi); /* can be negative */
#endif
- /* A division by 255x4 followsi. Warning: the operand can be negative! */
+ /* A division by 255x4 follows. Warning: the operand can be negative! */
mpn_divexact_by255x4(r6, r6, n3p1);
if ((r6[n3] & (GMP_NUMB_MAX << (GMP_NUMB_BITS-3))) != 0)
r6[n3] |= (GMP_NUMB_MAX << (GMP_NUMB_BITS-2));
result is v2 >= 0 */
saved = vinf[0]; /* Remember v1's highest byte (will be overwritten). */
vinf[0] = vinf0; /* Set the right value for vinf0 */
-#ifdef HAVE_NATIVE_mpn_sublsh1_n
- cy = mpn_sublsh1_n (v2, v2, vinf, twor);
+#ifdef HAVE_NATIVE_mpn_sublsh1_n_ip1
+ cy = mpn_sublsh1_n_ip1 (v2, vinf, twor);
#else
/* Overwrite unused vm1 */
cy = mpn_lshift (vm1, vinf, twor, 1);
SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2009, 2010 Free Software Foundation, Inc.
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
MPN_INCR_U (pp + 3 * n + 1, n, cy);
/* W2 -= W0<<2 */
-#if HAVE_NATIVE_mpn_sublsh_n || HAVE_NATIVE_mpn_sublsh2_n
-#if HAVE_NATIVE_mpn_sublsh2_n
- cy = mpn_sublsh2_n(w2, w2, w0, w0n);
+#if HAVE_NATIVE_mpn_sublsh_n || HAVE_NATIVE_mpn_sublsh2_n_ip1
+#if HAVE_NATIVE_mpn_sublsh2_n_ip1
+ cy = mpn_sublsh2_n_ip1 (w2, w0, w0n);
#else
- cy = mpn_sublsh_n(w2, w2, w0, w0n, 2);
+ cy = mpn_sublsh_n (w2, w2, w0, w0n, 2);
#endif
#else
/* {W4,2*n+1} is now free and can be overwritten. */
embankment = w0[w0n - 1] - 1;
w0[w0n - 1] = 1;
if (LIKELY (w0n > n)) {
- if ( LIKELY(cy4 > cy6) )
+ if (cy4 > cy6)
MPN_INCR_U (pp + 4 * n, w0n + n, cy4 - cy6);
else
MPN_DECR_U (pp + 4 * n, w0n + n, cy6 - cy4);
SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2009 Free Software Foundation, Inc.
+Copyright 2009, 2011, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#endif
#endif
+#if HAVE_NATIVE_mpn_sublsh2_n_ip1
+#define DO_mpn_sublsh2_n(dst,src,n,ws) mpn_sublsh2_n_ip1(dst,src,n)
+#else
+#define DO_mpn_sublsh2_n(dst,src,n,ws) DO_mpn_sublsh_n(dst,src,n,2,ws)
+#endif
+
#if HAVE_NATIVE_mpn_sublsh_n
-#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,src,n,s)
+#define DO_mpn_sublsh_n(dst,src,n,s,ws) mpn_sublsh_n (dst,dst,src,n,s)
#else
static mp_limb_t
DO_mpn_sublsh_n (mp_ptr dst, mp_srcptr src, mp_size_t n, unsigned int s, mp_ptr ws)
{
-#if USE_MUL_1
+#if USE_MUL_1 && 0
return mpn_submul_1(dst,src,n,CNST_LIMB(1) <<(s));
#else
mp_limb_t __cy;
__cy = mpn_lshift (ws,src,n,s);
- return __cy + mpn_sub_n (dst,dst,ws,n);
+ return __cy + mpn_sub_n (dst,dst,ws,n);
#endif
}
#endif
ASSERT_NOCARRY(mpn_divexact_by3 (r5, r5, 3 * n + 1));
- ASSERT_NOCARRY(DO_mpn_sublsh_n (r5, r3, 3 * n + 1, 2, ws));
+ ASSERT_NOCARRY(DO_mpn_sublsh2_n (r5, r3, 3 * n + 1, ws));
/* last interpolation steps... */
/* ... are mixed with recomposition */
cy = mpn_add_1 (pp + 6*n, r3 + n, n, pp[6*n]);
MPN_INCR_U (r3 + 2*n, n + 1, cy);
- cy = r3[3*n] + mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n);
+ cy = mpn_add_n (pp + 7*n, pp + 7*n, r3 + 2*n, n);
if (LIKELY(spt != n))
- MPN_INCR_U (pp + 8*n, spt - n, cy);
+ MPN_INCR_U (pp + 8*n, spt - n, cy + r3[3*n]);
else
- ASSERT (cy == 0);
+ ASSERT (r3[3*n] | cy == 0);
}
SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2009 Free Software Foundation, Inc.
+Copyright 2009, 2010, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
License for more details.
You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
/*
- Fast, division-free trial division for GMP.
-
- This function will find the first (smallest) factor represented in
+ This function finds the first (smallest) factor represented in
trialdivtab.h. It does not stop the factoring effort just because it has
reached some sensible limit, such as the square root of the input number.
The caller can limit the factoring effort by passing NPRIMES. The function
- well then divide to *at least* that limit. A position which only
- mpn_trialdiv can make sense of is returned in the WHERE parameter. It can
- be used for restarting the factoring effort; the first call should pass 0
- here.
+ will then divide until that limit, or perhaps a few primes more. A position
+ which only mpn_trialdiv can make sense of is returned in the WHERE
+ parameter. It can be used for restarting the factoring effort; the first
+ call should pass 0 here.
+
+ Input: 1. A non-negative number T = {tp,tn}
+ 2. NPRIMES as described above,
+ 3. *WHERE as described above.
+ Output: 1. *WHERE updated as described above.
+ 2. Return value is non-zero if we found a factor, else zero
+ To get the actual prime factor, compute the mod B inverse
+ of the return value.
*/
#include "gmp.h"
#define PTAB_LINES (sizeof (gmp_primes_ptab) / sizeof (gmp_primes_ptab[0]))
-/* Attempt to find a factor of T using trial division.
- Input: A non-negative number T.
- Output: non-zero if we found a factor, zero otherwise. To get the actual
- prime factor, compute the mod B inverse of the return value. */
/* FIXME: We could optimize out one of the outer loop conditions if we
had a final ptab entry with a huge nd field. */
mp_limb_t
ppp = gmp_primes_ptab[i].ppp;
cps = gmp_primes_ptab[i].cps;
-#if __GNU_MP_VERSION == 4 && __GNU_MP_VERSION_MINOR < 4
- if (tn < 4)
- r = mpn_mod_1 (tp, tn, ppp); /* FIXME */
- else
-#endif
- r = mpn_mod_1s_4p (tp, tn, ppp << cps[1], cps);
+ r = mpn_mod_1s_4p (tp, tn, ppp << cps[1], cps);
idx = gmp_primes_ptab[i].idx;
np = gmp_primes_ptab[i].np;
--- /dev/null
+dnl IA-64 mpn_add_n_sub_n -- mpn parallel addition and subtraction.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Itanium: ?
+C Itanium 2: 2.25
+
+C INPUT PARAMETERS
+define(`sp', `r32')
+define(`dp', `r33')
+define(`up', `r34')
+define(`vp', `r35')
+define(`n', `r36')
+
+C Some useful aliases for registers we use
+define(`u0',`r16') define(`u1',`r17') define(`u2',`r18') define(`u3',`r19')
+define(`v0',`r20') define(`v1',`r21') define(`v2',`r22') define(`v3',`r23')
+define(`s0',`r24') define(`s1',`r25') define(`s2',`r26') define(`s3',`r27')
+define(`d0',`r28') define(`d1',`r29') define(`d2',`r30') define(`d3',`r31')
+define(`up0',`up')
+define(`up1',`r14')
+define(`vp0',`vp')
+define(`vp1',`r15')
+
+define(`cmpltu', `cmp.ltu')
+define(`cmpeqor', `cmp.eq.or')
+
+ASM_START()
+PROLOGUE(mpn_add_n_sub_n)
+ .prologue
+ .save ar.lc, r2
+ .body
+ifdef(`HAVE_ABI_32',`
+ addp4 sp = 0, sp C M I
+ addp4 dp = 0, dp C M I
+ addp4 up = 0, up C M I
+ addp4 vp = 0, vp C M I
+ zxt4 n = n C I
+ ;;
+')
+
+ and r9 = 3, n C M I
+ mov.i r2 = ar.lc C I0
+ add up1 = 8, up0 C M I
+ add vp1 = 8, vp0 C M I
+ add r8 = -2, n C M I
+ add r10 = 256, up C M I
+ ;;
+ shr.u r8 = r8, 2 C I0
+ cmp.eq p10, p0 = 0, r9 C M I
+ cmp.eq p11, p0 = 2, r9 C M I
+ cmp.eq p12, p0 = 3, r9 C M I
+ add r11 = 256, vp C M I
+ ;;
+ mov.i ar.lc = r8 C I0
+ (p10) br L(b0) C B
+ (p11) br L(b2) C B
+ (p12) br L(b3) C B
+
+L(b1): ld8 u3 = [up0], 8 C M01
+ add up1 = 8, up1 C M I
+ cmpltu p14, p15 = 4, n C M I
+ ld8 v3 = [vp0], 8 C M01
+ add vp1 = 8, vp1 C M I
+ ;;
+ add s3 = u3, v3 C M I
+ sub d3 = u3, v3 C M I
+ mov r8 = 0 C M I
+ ;;
+ cmpltu p9, p0 = s3, v3 C carry from add3 M I
+ cmpltu p13, p0 = u3, v3 C borrow from sub3 M I
+ (p15) br L(cj1) C B
+ st8 [sp] = s3, 8 C M23
+ st8 [dp] = d3, 8 C M23
+ br L(c0) C B
+
+L(b0): cmp.ne p9, p0 = r0, r0 C M I
+ cmp.ne p13, p0 = r0, r0 C M I
+L(c0): ld8 u0 = [up0], 16 C M01
+ ld8 u1 = [up1], 16 C M01
+ ;;
+ ld8 v0 = [vp0], 16 C M01
+ ld8 v1 = [vp1], 16 C M01
+ ;;
+ ld8 u2 = [up0], 16 C M01
+ ld8 u3 = [up1], 16 C M01
+ ;;
+ ld8 v2 = [vp0], 16 C M01
+ ld8 v3 = [vp1], 16 C M01
+ ;;
+ add s0 = u0, v0 C M I
+ add s1 = u1, v1 C M I
+ sub d0 = u0, v0 C M I
+ sub d1 = u1, v1 C M I
+ ;;
+ cmpltu p6, p0 = s0, v0 C carry from add0 M I
+ cmpltu p7, p0 = s1, v1 C carry from add1 M I
+ cmpltu p10, p0 = u0, v0 C borrow from sub0 M I
+ cmpltu p11, p0 = u1, v1 C borrow from sub1 M I
+ ;;
+ nop 0 C
+ br.cloop.dptk L(top) C B
+ br L(end) C B
+
+L(b3): ld8 u1 = [up0], 8 C M01
+ add up1 = 8, up1 C M I
+ ld8 v1 = [vp0], 8 C M01
+ ;;
+ add vp1 = 8, vp1 C M I
+ add s1 = u1, v1 C M I
+ sub d1 = u1, v1 C M I
+ ;;
+ cmpltu p7, p0 = s1, v1 C carry from add1 M I
+ cmpltu p11, p0 = u1, v1 C borrow from sub1 M I
+ ;;
+ st8 [sp] = s1, 8 C M23
+ st8 [dp] = d1, 8 C M23
+ br L(c2) C B
+
+ ALIGN(32)
+L(b2): cmp.ne p7, p0 = r0, r0 C M I
+ cmp.ne p11, p0 = r0, r0 C M I
+ nop 0
+L(c2): ld8 u2 = [up0], 16 C M01
+ ld8 u3 = [up1], 16 C M01
+ cmpltu p14, p0 = 4, n C M I
+ ;;
+ ld8 v2 = [vp0], 16 C M01
+ ld8 v3 = [vp1], 16 C M01
+ (p14) br L(gt4) C B
+ ;;
+ add s2 = u2, v2 C M I
+ add s3 = u3, v3 C M I
+ sub d2 = u2, v2 C M I
+ sub d3 = u3, v3 C M I
+ ;;
+ cmpltu p8, p0 = s2, v2 C carry from add0 M I
+ cmpltu p9, p0 = s3, v3 C carry from add3 M I
+ cmpltu p12, p0 = u2, v2 C borrow from sub2 M I
+ cmpltu p13, p0 = u3, v3 C borrow from sub3 M I
+ br L(cj2) C B
+ ;;
+L(gt4): ld8 u0 = [up0], 16 C M01
+ ld8 u1 = [up1], 16 C M01
+ ;;
+ ld8 v0 = [vp0], 16 C M01
+ ld8 v1 = [vp1], 16 C M01
+ ;;
+ add s2 = u2, v2 C M I
+ add s3 = u3, v3 C M I
+ sub d2 = u2, v2 C M I
+ sub d3 = u3, v3 C M I
+ ;;
+ cmpltu p8, p0 = s2, v2 C carry from add0 M I
+ cmpltu p9, p0 = s3, v3 C carry from add1 M I
+ cmpltu p12, p0 = u2, v2 C borrow from sub0 M I
+ cmpltu p13, p0 = u3, v3 C borrow from sub1 M I
+ br.cloop.dptk L(mid) C B
+
+ ALIGN(32)
+L(top):
+ ld8 u0 = [up0], 16 C M01
+ ld8 u1 = [up1], 16 C M01
+ (p9) cmpeqor p6, p0 = -1, s0 C M I
+ (p9) add s0 = 1, s0 C M I
+ (p13) cmpeqor p10, p0 = 0, d0 C M I
+ (p13) add d0 = -1, d0 C M I
+ ;;
+ ld8 v0 = [vp0], 16 C M01
+ ld8 v1 = [vp1], 16 C M01
+ (p6) cmpeqor p7, p0 = -1, s1 C M I
+ (p6) add s1 = 1, s1 C M I
+ (p10) cmpeqor p11, p0 = 0, d1 C M I
+ (p10) add d1 = -1, d1 C M I
+ ;;
+ st8 [sp] = s0, 8 C M23
+ st8 [dp] = d0, 8 C M23
+ add s2 = u2, v2 C M I
+ add s3 = u3, v3 C M I
+ sub d2 = u2, v2 C M I
+ sub d3 = u3, v3 C M I
+ ;;
+ st8 [sp] = s1, 8 C M23
+ st8 [dp] = d1, 8 C M23
+ cmpltu p8, p0 = s2, v2 C carry from add2 M I
+ cmpltu p9, p0 = s3, v3 C carry from add3 M I
+ cmpltu p12, p0 = u2, v2 C borrow from sub2 M I
+ cmpltu p13, p0 = u3, v3 C borrow from sub3 M I
+ ;;
+L(mid):
+ ld8 u2 = [up0], 16 C M01
+ ld8 u3 = [up1], 16 C M01
+ (p7) cmpeqor p8, p0 = -1, s2 C M I
+ (p7) add s2 = 1, s2 C M I
+ (p11) cmpeqor p12, p0 = 0, d2 C M I
+ (p11) add d2 = -1, d2 C M I
+ ;;
+ ld8 v2 = [vp0], 16 C M01
+ ld8 v3 = [vp1], 16 C M01
+ (p8) cmpeqor p9, p0 = -1, s3 C M I
+ (p8) add s3 = 1, s3 C M I
+ (p12) cmpeqor p13, p0 = 0, d3 C M I
+ (p12) add d3 = -1, d3 C M I
+ ;;
+ st8 [sp] = s2, 8 C M23
+ st8 [dp] = d2, 8 C M23
+ add s0 = u0, v0 C M I
+ add s1 = u1, v1 C M I
+ sub d0 = u0, v0 C M I
+ sub d1 = u1, v1 C M I
+ ;;
+ st8 [sp] = s3, 8 C M23
+ st8 [dp] = d3, 8 C M23
+ cmpltu p6, p0 = s0, v0 C carry from add0 M I
+ cmpltu p7, p0 = s1, v1 C carry from add1 M I
+ cmpltu p10, p0 = u0, v0 C borrow from sub0 M I
+ cmpltu p11, p0 = u1, v1 C borrow from sub1 M I
+ ;;
+ lfetch [r10], 32 C M?
+ lfetch [r11], 32 C M?
+ br.cloop.dptk L(top) C B
+ ;;
+
+L(end):
+ nop 0
+ nop 0
+ (p9) cmpeqor p6, p0 = -1, s0 C M I
+ (p9) add s0 = 1, s0 C M I
+ (p13) cmpeqor p10, p0 = 0, d0 C M I
+ (p13) add d0 = -1, d0 C M I
+ ;;
+ nop 0
+ nop 0
+ (p6) cmpeqor p7, p0 = -1, s1 C M I
+ (p6) add s1 = 1, s1 C M I
+ (p10) cmpeqor p11, p0 = 0, d1 C M I
+ (p10) add d1 = -1, d1 C M I
+ ;;
+ st8 [sp] = s0, 8 C M23
+ st8 [dp] = d0, 8 C M23
+ add s2 = u2, v2 C M I
+ add s3 = u3, v3 C M I
+ sub d2 = u2, v2 C M I
+ sub d3 = u3, v3 C M I
+ ;;
+ st8 [sp] = s1, 8 C M23
+ st8 [dp] = d1, 8 C M23
+ cmpltu p8, p0 = s2, v2 C carry from add2 M I
+ cmpltu p9, p0 = s3, v3 C carry from add3 M I
+ cmpltu p12, p0 = u2, v2 C borrow from sub2 M I
+ cmpltu p13, p0 = u3, v3 C borrow from sub3 M I
+ ;;
+L(cj2):
+ (p7) cmpeqor p8, p0 = -1, s2 C M I
+ (p7) add s2 = 1, s2 C M I
+ (p11) cmpeqor p12, p0 = 0, d2 C M I
+ (p11) add d2 = -1, d2 C M I
+ mov r8 = 0 C M I
+ nop 0
+ ;;
+ st8 [sp] = s2, 8 C M23
+ st8 [dp] = d2, 8 C M23
+ (p8) cmpeqor p9, p0 = -1, s3 C M I
+ (p8) add s3 = 1, s3 C M I
+ (p12) cmpeqor p13, p0 = 0, d3 C M I
+ (p12) add d3 = -1, d3 C M I
+ ;;
+L(cj1):
+ (p9) mov r8 = 2 C M I
+ ;;
+ mov.i ar.lc = r2 C I0
+ (p13) add r8 = 1, r8 C M I
+ st8 [sp] = s3 C M23
+ st8 [dp] = d3 C M23
+ br.ret.sptk.many b0 C B
+EPILOGUE()
+ASM_END()
dnl IA-64 mpn_addmul_1 -- Multiply a limb vector with a limb and add the
dnl result to a second limb vector.
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
dnl Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2007 Free Software
dnl Foundation, Inc.
dnl IA-64 mpn_addmul_2 -- Multiply a n-limb number with a 2-limb number and
dnl add the result to a (n+1)-limb number.
-dnl Copyright 2004, 2005 Free Software Foundation, Inc.
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2004, 2005, 2011 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
C Itanium: 3.65
C Itanium 2: 1.625
-C Note that this is very similar to mul_2.asm. If you change this file,
-C please change that file too.
-
C TODO
C * Clean up variable names, and try to decrease the number of distinct
C registers used.
-C * Cleanup feed-in code to not require zeroing several registers.
-C * Make sure we don't depend on uninitialized predicate registers.
-C * We currently cross-jump very aggressively, at the expense of a few cycles
-C per operation. Consider changing that.
+C * Clean up feed-in code to not require zeroing several registers.
+C * Make sure we don't depend on uninitialised predicate registers.
C * Could perhaps save a few cycles by using 1 c/l carry propagation in
C wind-down code.
C * Ultimately rewrite. The problem with this code is that it first uses a
define(`uy',`f51')
ASM_START()
+PROLOGUE(mpn_addmul_2s)
+ .prologue
+ .save ar.lc, r2
+ .body
+
+ifdef(`HAVE_ABI_32',`
+.mmi; addp4 rp = 0, rp C M I
+ addp4 up = 0, up C M I
+ addp4 vp = 0, vp C M I
+.mmi; nop 1
+ nop 1
+ zxt4 n = n C I
+ ;;')
+
+.mmi; ldf8 ux = [up], 8 C M
+ ldf8 v0 = [vp], 8 C M
+ mov r2 = ar.lc C I0
+.mmi; ldf8 rx = [rp], 8 C M
+ and r14 = 3, n C M I
+ add n = -2, n C M I
+ ;;
+.mmi; ldf8 uy = [up], 8 C M
+ ldf8 v1 = [vp] C M
+ shr.u n = n, 2 C I0
+.mmi; ldf8 ry = [rp], -8 C M
+ cmp.eq p14, p0 = 1, r14 C M I
+ cmp.eq p11, p0 = 2, r14 C M I
+ ;;
+.mmi; add srp = 16, rp C M I
+ cmp.eq p15, p0 = 3, r14 C M I
+ mov ar.lc = n C I0
+.bbb; (p14) br.dptk L(x01) C B
+ (p11) br.dptk L(x10) C B
+ (p15) br.dptk L(x11) C B
+ ;;
+
+L(x00): cmp.ne p6, p0 = r0, r0 C suppress initial xma pair
+ mov fp2a_3 = f0
+ br L(b00)
+L(x01): cmp.ne p14, p0 = r0, r0 C suppress initial xma pair
+ mov fp2a_2 = f0
+ br L(b01)
+L(x10): cmp.ne p11, p0 = r0, r0 C suppress initial xma pair
+ mov fp2a_1 = f0
+ br L(b10)
+L(x11): cmp.ne p15, p0 = r0, r0 C suppress initial xma pair
+ mov fp2a_0 = f0
+ br L(b11)
+
+EPILOGUE()
+
PROLOGUE(mpn_addmul_2)
.prologue
.save ar.lc, r2
.body
-ifdef(`HAVE_ABI_32',
-` addp4 rp = 0, rp C M I
- addp4 up = 0, up C M I
- addp4 vp = 0, vp C M I
- zxt4 n = n C I
+ifdef(`HAVE_ABI_32',`
+.mmi; addp4 rp = 0, rp C M I
+ addp4 up = 0, up C M I
+ addp4 vp = 0, vp C M I
+.mmi; nop 1
+ nop 1
+ zxt4 n = n C I
;;')
-{.mmi C 00
- ldf8 ux = [up], 8 C M
- ldf8 v0 = [vp], 8 C M
- mov.i r2 = ar.lc C I0
-}{.mmi
- ldf8 rx = [rp], 8 C M
- and r14 = 3, n C M I
- add n = -2, n C M I
- ;;
-}{.mmi C 01
- ldf8 uy = [up], 8 C M
- ldf8 v1 = [vp] C M
- shr.u n = n, 2 C I0
-}{.mmi
- ldf8 ry = [rp], -8 C M
- cmp.eq p10, p0 = 1, r14 C M I
- cmp.eq p11, p0 = 2, r14 C M I
- ;;
-}{.mmi C 02
- add srp = 16, rp C M I
- cmp.eq p12, p0 = 3, r14 C M I
- mov.i ar.lc = n C I0
-}{.bbb
- (p10) br.dptk .Lb01 C B
- (p11) br.dptk .Lb10 C B
- (p12) br.dptk .Lb11 C B
- ;;
-}
+.mmi; ldf8 ux = [up], 8 C M
+ ldf8 v0 = [vp], 8 C M
+ mov r2 = ar.lc C I0
+.mmi; ldf8 rx = [rp], 8 C M
+ and r14 = 3, n C M I
+ add n = -2, n C M I
+ ;;
+.mmi; ldf8 uy = [up], 8 C M
+ ldf8 v1 = [vp] C M
+ shr.u n = n, 2 C I0
+.mmi; ldf8 ry = [rp], -8 C M
+ cmp.eq p14, p0 = 1, r14 C M I
+ cmp.eq p11, p0 = 2, r14 C M I
+ ;;
+.mmi; add srp = 16, rp C M I
+ cmp.eq p15, p6 = 3, r14 C M I
+ mov ar.lc = n C I0
+.bbb; (p14) br.dptk L(b01) C B
+ (p11) br.dptk L(b10) C B
+ (p15) br.dptk L(b11) C B
+ ;;
ALIGN(32)
-.Lb00: ldf8 r_1 = [srp], 8
- ldf8 u_1 = [up], 8
- mov acc1_2 = 0
- mov pr1_2 = 0
- mov pr0_3 = 0
- cmp.ne p8, p9 = r0, r0
- ;;
- ldf8 r_2 = [srp], 8
- xma.l fp0b_3 = ux, v0, rx
- cmp.ne p12, p13 = r0, r0
- ldf8 u_2 = [up], 8
- xma.hu fp1a_3 = ux, v0, rx
- br.cloop.dptk .grt4
-
- xma.l fp0b_0 = uy, v0, ry
- xma.hu fp1a_0 = uy, v0, ry
- ;;
- getf.sig acc0 = fp0b_3
- xma.l fp1b_3 = ux, v1, fp1a_3
- xma.hu fp2a_3 = ux, v1, fp1a_3
- ;;
- xma.l fp0b_1 = u_1, v0, r_1
- xma.hu fp1a_1 = u_1, v0, r_1
- ;;
- getf.sig pr0_0 = fp0b_0
- xma.l fp1b_0 = uy, v1, fp1a_0
- xma.hu fp2a_0 = uy, v1, fp1a_0
- ;;
- getf.sig pr1_3 = fp1b_3
- getf.sig acc1_3 = fp2a_3
- xma.l fp0b_2 = u_2, v0, r_2
- xma.hu fp1a_2 = u_2, v0, r_2
- br .Lcj4
-
-.grt4: xma.l fp0b_0 = uy, v0, ry
- xma.hu fp1a_0 = uy, v0, ry
- ;;
- ldf8 r_3 = [srp], 8
- getf.sig acc0 = fp0b_3
- xma.l fp1b_3 = ux, v1, fp1a_3
- ldf8 u_3 = [up], 8
- xma.hu fp2a_3 = ux, v1, fp1a_3
- ;;
- xma.l fp0b_1 = u_1, v0, r_1
- xma.hu fp1a_1 = u_1, v0, r_1
- ;;
- ldf8 r_0 = [srp], 8
- getf.sig pr0_0 = fp0b_0
- xma.l fp1b_0 = uy, v1, fp1a_0
- xma.hu fp2a_0 = uy, v1, fp1a_0
- ;;
- ldf8 u_0 = [up], 8
- getf.sig pr1_3 = fp1b_3
- ;;
- getf.sig acc1_3 = fp2a_3
- xma.l fp0b_2 = u_2, v0, r_2
- xma.hu fp1a_2 = u_2, v0, r_2
- br .LL00
+L(b00):
+.mmi; ldf8 r_1 = [srp], 8
+ ldf8 u_1 = [up], 8
+ mov acc1_2 = 0
+.mmi; mov pr1_2 = 0
+ mov pr0_3 = 0
+ cmp.ne p8, p9 = r0, r0
+ ;;
+.mfi; ldf8 r_2 = [srp], 8
+ xma.l fp0b_3 = ux, v0, rx
+ cmp.ne p12, p13 = r0, r0
+.mfb; ldf8 u_2 = [up], 8
+ xma.hu fp1b_3 = ux, v0, rx
+ br.cloop.dptk L(gt4)
+
+ xma.l fp0b_0 = uy, v0, ry
+ xma.hu fp1a_0 = uy, v0, ry
+ ;;
+ getfsig acc0 = fp0b_3
+ (p6) xma.hu fp2a_3 = ux, v1, fp1b_3 C suppressed for addmul_2s
+ (p6) xma.l fp1b_3 = ux, v1, fp1b_3 C suppressed for addmul_2s
+ ;;
+ xma.l fp0b_1 = u_1, v0, r_1
+ xma.hu fp1a_1 = u_1, v0, r_1
+ ;;
+ getfsig pr0_0 = fp0b_0
+ xma.l fp1b_0 = uy, v1, fp1a_0
+ xma.hu fp2a_0 = uy, v1, fp1a_0
+ ;;
+ getfsig pr1_3 = fp1b_3
+ getfsig acc1_3 = fp2a_3
+ xma.l fp0b_2 = u_2, v0, r_2
+ xma.hu fp1a_2 = u_2, v0, r_2
+ br L(cj4)
+
+L(gt4): xma.l fp0b_0 = uy, v0, ry
+ xma.hu fp1a_0 = uy, v0, ry
+ ;;
+ ldf8 r_3 = [srp], 8
+ getfsig acc0 = fp0b_3
+ (p6) xma.hu fp2a_3 = ux, v1, fp1b_3 C suppressed for addmul_2s
+ ldf8 u_3 = [up], 8
+ (p6) xma.l fp1b_3 = ux, v1, fp1b_3 C suppressed for addmul_2s
+ ;;
+ xma.l fp0b_1 = u_1, v0, r_1
+ xma.hu fp1a_1 = u_1, v0, r_1
+ ;;
+ ldf8 r_0 = [srp], 8
+ getfsig pr0_0 = fp0b_0
+ xma.l fp1b_0 = uy, v1, fp1a_0
+ xma.hu fp2a_0 = uy, v1, fp1a_0
+ ;;
+ ldf8 u_0 = [up], 8
+ getfsig pr1_3 = fp1b_3
+ xma.l fp0b_2 = u_2, v0, r_2
+ ;;
+ getfsig acc1_3 = fp2a_3
+ xma.hu fp1a_2 = u_2, v0, r_2
+ br L(00)
ALIGN(32)
-.Lb01: ldf8 r_0 = [srp], 8 C M
- ldf8 u_0 = [up], 8 C M
- mov acc1_1 = 0 C M I
- mov pr1_1 = 0 C M I
- mov pr0_2 = 0 C M I
- cmp.ne p6, p7 = r0, r0 C M I
- ;;
- ldf8 r_1 = [srp], 8 C M
- xma.l fp0b_2 = ux, v0, rx C F
- cmp.ne p10, p11 = r0, r0 C M I
- ldf8 u_1 = [up], 8 C M
- xma.hu fp1a_2 = ux, v0, rx C F
- ;;
- xma.l fp0b_3 = uy, v0, ry C F
- xma.hu fp1a_3 = uy, v0, ry C F
- ;;
- getf.sig acc0 = fp0b_2 C M
- ldf8 r_2 = [srp], 8 C M
- xma.l fp1b_2 = ux, v1,fp1a_2 C F
- xma.hu fp2a_2 = ux, v1,fp1a_2 C F
- ldf8 u_2 = [up], 8 C M
- br.cloop.dptk .grt5
-
- xma.l fp0b_0 = u_0, v0, r_0 C F
- xma.hu fp1a_0 = u_0, v0, r_0 C F
- ;;
- getf.sig pr0_3 = fp0b_3 C M
- xma.l fp1b_3 = uy, v1,fp1a_3 C F
- xma.hu fp2a_3 = uy, v1,fp1a_3 C F
- ;;
- getf.sig pr1_2 = fp1b_2 C M
- getf.sig acc1_2 = fp2a_2 C M
- xma.l fp0b_1 = u_1, v0, r_1 C F
- xma.hu fp1a_1 = u_1, v0, r_1 C F
- br .Lcj5
-
-.grt5: xma.l fp0b_0 = u_0, v0, r_0
- xma.hu fp1a_0 = u_0, v0, r_0
- ;;
- getf.sig pr0_3 = fp0b_3
- ldf8 r_3 = [srp], 8
- xma.l fp1b_3 = uy, v1, fp1a_3
- xma.hu fp2a_3 = uy, v1, fp1a_3
- ;;
- ldf8 u_3 = [up], 8
- getf.sig pr1_2 = fp1b_2
- ;;
- getf.sig acc1_2 = fp2a_2
- xma.l fp0b_1 = u_1, v0, r_1
- xma.hu fp1a_1 = u_1, v0, r_1
- br .LL01
+L(b01):
+.mmi; ldf8 r_0 = [srp], 8 C M
+ ldf8 u_0 = [up], 8 C M
+ mov acc1_1 = 0 C M I
+.mmi; mov pr1_1 = 0 C M I
+ mov pr0_2 = 0 C M I
+ cmp.ne p6, p7 = r0, r0 C M I
+ ;;
+.mfi; ldf8 r_1 = [srp], 8 C M
+ xma.l fp0b_2 = ux, v0, rx C F
+ cmp.ne p10, p11 = r0, r0 C M I
+.mfi; ldf8 u_1 = [up], 8 C M
+ xma.hu fp1b_2 = ux, v0, rx C F
+ nop 1
+ ;;
+ xma.l fp0b_3 = uy, v0, ry C F
+ xma.hu fp1a_3 = uy, v0, ry C F
+ ;;
+.mmf; getfsig acc0 = fp0b_2 C M
+ ldf8 r_2 = [srp], 8 C M
+ (p14) xma.hu fp2a_2 = ux, v1,fp1b_2 C F suppressed for addmul_2s
+.mfb; ldf8 u_2 = [up], 8 C M
+ (p14) xma.l fp1b_2 = ux, v1,fp1b_2 C F suppressed for addmul_2s
+ br.cloop.dptk L(gt5)
+
+ xma.l fp0b_0 = u_0, v0, r_0 C F
+ xma.hu fp1a_0 = u_0, v0, r_0 C F
+ ;;
+ getfsig pr0_3 = fp0b_3 C M
+ xma.l fp1b_3 = uy, v1,fp1a_3 C F
+ xma.hu fp2a_3 = uy, v1,fp1a_3 C F
+ ;;
+ getfsig pr1_2 = fp1b_2 C M
+ getfsig acc1_2 = fp2a_2 C M
+ xma.l fp0b_1 = u_1, v0, r_1 C F
+ xma.hu fp1a_1 = u_1, v0, r_1 C F
+ br L(cj5)
+
+L(gt5): xma.l fp0b_0 = u_0, v0, r_0
+ xma.hu fp1a_0 = u_0, v0, r_0
+ ;;
+ getfsig pr0_3 = fp0b_3
+ ldf8 r_3 = [srp], 8
+ xma.l fp1b_3 = uy, v1, fp1a_3
+ xma.hu fp2a_3 = uy, v1, fp1a_3
+ ;;
+ ldf8 u_3 = [up], 8
+ getfsig pr1_2 = fp1b_2
+ xma.l fp0b_1 = u_1, v0, r_1
+ ;;
+ getfsig acc1_2 = fp2a_2
+ xma.hu fp1a_1 = u_1, v0, r_1
+ br L(01)
ALIGN(32)
-.Lb10: C 03
- br.cloop.dptk .grt2
- C 04
- C 05
- C 06
- xma.l fp0b_1 = ux, v0, rx
- xma.hu fp1a_1 = ux, v0, rx
- ;; C 07
- xma.l fp0b_2 = uy, v0, ry
- xma.hu fp1a_2 = uy, v0, ry
- ;; C 08
- C 09
- C 10
- stf8 [rp] = fp0b_1, 8
- xma.l fp1b_1 = ux, v1, fp1a_1
- xma.hu fp2a_1 = ux, v1, fp1a_1
- ;; C 11
- getf.sig acc0 = fp0b_2
- xma.l fp1b_2 = uy, v1, fp1a_2
- xma.hu fp2a_2 = uy, v1, fp1a_2
- ;; C 12
- C 13
- C 14
- getf.sig pr1_1 = fp1b_1
- C 15
- getf.sig acc1_1 = fp2a_1
- C 16
- getf.sig pr1_2 = fp1b_2
- C 17
- getf.sig r8 = fp2a_2
- ;; C 18
- C 19
- add s0 = pr1_1, acc0
- ;; C 20
- st8 [rp] = s0, 8
- cmp.ltu p8, p9 = s0, pr1_1
- sub r31 = -1, acc1_1
- ;; C 21
- .pred.rel "mutex", p8, p9
- (p8) add acc0 = pr1_2, acc1_1, 1
- (p9) add acc0 = pr1_2, acc1_1
- (p8) cmp.leu p10, p0 = r31, pr1_2
- (p9) cmp.ltu p10, p0 = r31, pr1_2
- ;; C 22
- st8 [rp] = acc0, 8
- mov.i ar.lc = r2
- (p10) add r8 = 1, r8
- br.ret.sptk.many b0
-
-
-.grt2: ldf8 r_3 = [srp], 8
- ldf8 u_3 = [up], 8
- mov acc1_0 = 0
- ;;
- ldf8 r_0 = [srp], 8
- xma.l fp0b_1 = ux, v0, rx
- mov pr1_0 = 0
- ldf8 u_0 = [up], 8
- xma.hu fp1a_1 = ux, v0, rx
- mov pr0_1 = 0
- ;;
- xma.l fp0b_2 = uy, v0, ry
- xma.hu fp1a_2 = uy, v0, ry
- ;;
- getf.sig acc0 = fp0b_1
- ldf8 r_1 = [srp], 8
- xma.l fp1b_1 = ux, v1, fp1a_1
- xma.hu fp2a_1 = ux, v1, fp1a_1
- ;;
- ldf8 u_1 = [up], 8
- xma.l fp0b_3 = u_3, v0, r_3
- xma.hu fp1a_3 = u_3, v0, r_3
- ;;
- getf.sig pr0_2 = fp0b_2
- ldf8 r_2 = [srp], 8
- xma.l fp1b_2 = uy, v1, fp1a_2
- xma.hu fp2a_2 = uy, v1, fp1a_2
- ;;
- ldf8 u_2 = [up], 8
- getf.sig pr1_1 = fp1b_1
- ;;
- getf.sig acc1_1 = fp2a_1
- xma.l fp0b_0 = u_0, v0, r_0
- cmp.ne p8, p9 = r0, r0
- cmp.ne p12, p13 = r0, r0
- xma.hu fp1a_0 = u_0, v0, r_0
- br .LL10
+L(b10): br.cloop.dptk L(gt2)
+ xma.l fp0b_1 = ux, v0, rx
+ xma.hu fp1b_1 = ux, v0, rx
+ ;;
+ xma.l fp0b_2 = uy, v0, ry
+ xma.hu fp1a_2 = uy, v0, ry
+ ;;
+ stf8 [rp] = fp0b_1, 8
+ (p11) xma.hu fp2a_1 = ux, v1, fp1b_1 C suppressed for addmul_2s
+ (p11) xma.l fp1b_1 = ux, v1, fp1b_1 C suppressed for addmul_2s
+ ;;
+ getfsig acc0 = fp0b_2
+ xma.l fp1b_2 = uy, v1, fp1a_2
+ xma.hu fp2a_2 = uy, v1, fp1a_2
+ ;;
+ getfsig pr1_1 = fp1b_1
+ getfsig acc1_1 = fp2a_1
+ mov ar.lc = r2
+ getfsig pr1_2 = fp1b_2
+ getfsig r8 = fp2a_2
+ ;;
+ add s0 = pr1_1, acc0
+ ;;
+ st8 [rp] = s0, 8
+ cmp.ltu p8, p9 = s0, pr1_1
+ sub r31 = -1, acc1_1
+ ;;
+ .pred.rel "mutex", p8, p9
+ (p8) add acc0 = pr1_2, acc1_1, 1
+ (p9) add acc0 = pr1_2, acc1_1
+ (p8) cmp.leu p10, p0 = r31, pr1_2
+ (p9) cmp.ltu p10, p0 = r31, pr1_2
+ ;;
+ st8 [rp] = acc0, 8
+ (p10) add r8 = 1, r8
+ br.ret.sptk.many b0
+
+
+L(gt2):
+.mmi; ldf8 r_3 = [srp], 8
+ ldf8 u_3 = [up], 8
+ mov acc1_0 = 0
+ ;;
+.mfi; ldf8 r_0 = [srp], 8
+ xma.l fp0b_1 = ux, v0, rx
+ mov pr1_0 = 0
+.mfi; ldf8 u_0 = [up], 8
+ xma.hu fp1b_1 = ux, v0, rx
+ mov pr0_1 = 0
+ ;;
+ xma.l fp0b_2 = uy, v0, ry
+ xma.hu fp1a_2 = uy, v0, ry
+ ;;
+ getfsig acc0 = fp0b_1
+ ldf8 r_1 = [srp], 8
+ (p11) xma.hu fp2a_1 = ux, v1, fp1b_1 C suppressed for addmul_2s
+ (p11) xma.l fp1b_1 = ux, v1, fp1b_1 C suppressed for addmul_2s
+ ;;
+ ldf8 u_1 = [up], 8
+ xma.l fp0b_3 = u_3, v0, r_3
+ xma.hu fp1a_3 = u_3, v0, r_3
+ ;;
+ getfsig pr0_2 = fp0b_2
+ ldf8 r_2 = [srp], 8
+ xma.l fp1b_2 = uy, v1, fp1a_2
+ xma.hu fp2a_2 = uy, v1, fp1a_2
+ ;;
+ ldf8 u_2 = [up], 8
+ getfsig pr1_1 = fp1b_1
+ ;;
+.mfi; getfsig acc1_1 = fp2a_1
+ xma.l fp0b_0 = u_0, v0, r_0
+ cmp.ne p8, p9 = r0, r0
+.mfb; cmp.ne p12, p13 = r0, r0
+ xma.hu fp1a_0 = u_0, v0, r_0
+ br.cloop.sptk.clr L(top)
+ br.many L(end)
ALIGN(32)
-.Lb11: mov acc1_3 = 0
- mov pr1_3 = 0
- mov pr0_0 = 0
- cmp.ne p6, p7 = r0, r0
+L(b11): ldf8 r_2 = [srp], 8
+ mov pr1_3 = 0
+ mov pr0_0 = 0
;;
- ldf8 r_2 = [srp], 8
- ldf8 u_2 = [up], 8
- br.cloop.dptk .grt3
+ ldf8 u_2 = [up], 8
+ mov acc1_3 = 0
+ br.cloop.dptk L(gt3)
;;
- xma.l fp0b_0 = ux, v0, rx
- xma.hu fp1a_0 = ux, v0, rx
+ cmp.ne p6, p7 = r0, r0
+ xma.l fp0b_0 = ux, v0, rx
+ xma.hu fp1b_0 = ux, v0, rx
;;
- cmp.ne p10, p11 = r0, r0
- xma.l fp0b_1 = uy, v0, ry
- xma.hu fp1a_1 = uy, v0, ry
+ cmp.ne p10, p11 = r0, r0
+ xma.l fp0b_1 = uy, v0, ry
+ xma.hu fp1a_1 = uy, v0, ry
;;
- getf.sig acc0 = fp0b_0
- xma.l fp1b_0 = ux, v1, fp1a_0
- xma.hu fp2a_0 = ux, v1, fp1a_0
+ getfsig acc0 = fp0b_0
+ (p15) xma.hu fp2a_0 = ux, v1, fp1b_0 C suppressed for addmul_2s
+ (p15) xma.l fp1b_0 = ux, v1, fp1b_0 C suppressed for addmul_2s
;;
- xma.l fp0b_2 = u_2, v0, r_2
- xma.hu fp1a_2 = u_2, v0, r_2
+ xma.l fp0b_2 = uy, v1, r_2
+ xma.hu fp1a_2 = uy, v1, r_2
;;
- getf.sig pr0_1 = fp0b_1
- xma.l fp1b_1 = uy, v1, fp1a_1
- xma.hu fp2a_1 = uy, v1, fp1a_1
+ getfsig pr0_1 = fp0b_1
+ xma.l fp1b_1 = u_2, v0, fp1a_1
+ xma.hu fp2a_1 = u_2, v0, fp1a_1
;;
- getf.sig pr1_0 = fp1b_0
- getf.sig acc1_0 = fp2a_0
- br .Lcj3
+ getfsig pr1_0 = fp1b_0
+ getfsig acc1_0 = fp2a_0
+ br L(cj3)
-.grt3: ldf8 r_3 = [srp], 8
- xma.l fp0b_0 = ux, v0, rx
- cmp.ne p10, p11 = r0, r0
- ldf8 u_3 = [up], 8
- xma.hu fp1a_0 = ux, v0, rx
+L(gt3): ldf8 r_3 = [srp], 8
+ xma.l fp0b_0 = ux, v0, rx
+ cmp.ne p10, p11 = r0, r0
+ ldf8 u_3 = [up], 8
+ xma.hu fp1b_0 = ux, v0, rx
+ cmp.ne p6, p7 = r0, r0
;;
- xma.l fp0b_1 = uy, v0, ry
- xma.hu fp1a_1 = uy, v0, ry
+ xma.l fp0b_1 = uy, v0, ry
+ xma.hu fp1a_1 = uy, v0, ry
;;
- getf.sig acc0 = fp0b_0
- ldf8 r_0 = [srp], 8
- xma.l fp1b_0 = ux, v1, fp1a_0
- ldf8 u_0 = [up], 8
- xma.hu fp2a_0 = ux, v1, fp1a_0
+ getfsig acc0 = fp0b_0
+ ldf8 r_0 = [srp], 8
+ (p15) xma.hu fp2a_0 = ux, v1, fp1b_0 C suppressed for addmul_2s
+ ldf8 u_0 = [up], 8
+ (p15) xma.l fp1b_0 = ux, v1, fp1b_0 C suppressed for addmul_2s
;;
- xma.l fp0b_2 = u_2, v0, r_2
- xma.hu fp1a_2 = u_2, v0, r_2
+ xma.l fp0b_2 = u_2, v0, r_2
+ xma.hu fp1a_2 = u_2, v0, r_2
;;
- getf.sig pr0_1 = fp0b_1
- ldf8 r_1 = [srp], 8
- xma.l fp1b_1 = uy, v1, fp1a_1
- xma.hu fp2a_1 = uy, v1, fp1a_1
+ getfsig pr0_1 = fp0b_1
+ ldf8 r_1 = [srp], 8
+ xma.l fp1b_1 = uy, v1, fp1a_1
+ xma.hu fp2a_1 = uy, v1, fp1a_1
;;
- ldf8 u_1 = [up], 8
- getf.sig pr1_0 = fp1b_0
+ ldf8 u_1 = [up], 8
+ getfsig pr1_0 = fp1b_0
;;
- getf.sig acc1_0 = fp2a_0
- xma.l fp0b_3 = u_3, v0, r_3
- xma.hu fp1a_3 = u_3, v0, r_3
- br .LL11
+ getfsig acc1_0 = fp2a_0
+ xma.l fp0b_3 = u_3, v0, r_3
+ xma.hu fp1a_3 = u_3, v0, r_3
+ br L(11)
C *** MAIN LOOP START ***
ALIGN(32)
-.Loop: C 00
- .pred.rel "mutex", p12, p13
- getf.sig pr0_3 = fp0b_3
- ldf8 r_3 = [srp], 8
- xma.l fp1b_3 = u_3, v1, fp1a_3
- (p12) add s0 = pr1_0, acc0, 1
- (p13) add s0 = pr1_0, acc0
- xma.hu fp2a_3 = u_3, v1, fp1a_3
+L(top): C 00
+ .pred.rel "mutex", p12, p13
+ getfsig pr0_3 = fp0b_3
+ ldf8 r_3 = [srp], 8
+ xma.l fp1b_3 = u_3, v1, fp1a_3
+ (p12) add s0 = pr1_0, acc0, 1
+ (p13) add s0 = pr1_0, acc0
+ xma.hu fp2a_3 = u_3, v1, fp1a_3
;; C 01
- .pred.rel "mutex", p8, p9
- .pred.rel "mutex", p12, p13
- ldf8 u_3 = [up], 8
- getf.sig pr1_2 = fp1b_2
- (p8) cmp.leu p6, p7 = acc0, pr0_1
- (p9) cmp.ltu p6, p7 = acc0, pr0_1
- (p12) cmp.leu p10, p11 = s0, pr1_0
- (p13) cmp.ltu p10, p11 = s0, pr1_0
+ .pred.rel "mutex", p8, p9
+ .pred.rel "mutex", p12, p13
+ ldf8 u_3 = [up], 8
+ getfsig pr1_2 = fp1b_2
+ (p8) cmp.leu p6, p7 = acc0, pr0_1
+ (p9) cmp.ltu p6, p7 = acc0, pr0_1
+ (p12) cmp.leu p10, p11 = s0, pr1_0
+ (p13) cmp.ltu p10, p11 = s0, pr1_0
;; C 02
- .pred.rel "mutex", p6, p7
- getf.sig acc1_2 = fp2a_2
- st8 [rp] = s0, 8
- xma.l fp0b_1 = u_1, v0, r_1
- (p6) add acc0 = pr0_2, acc1_0, 1
- (p7) add acc0 = pr0_2, acc1_0
- xma.hu fp1a_1 = u_1, v0, r_1
+ .pred.rel "mutex", p6, p7
+ getfsig acc1_2 = fp2a_2
+ st8 [rp] = s0, 8
+ xma.l fp0b_1 = u_1, v0, r_1
+ (p6) add acc0 = pr0_2, acc1_0, 1
+ (p7) add acc0 = pr0_2, acc1_0
+ xma.hu fp1a_1 = u_1, v0, r_1
;; C 03
-.LL01:
- .pred.rel "mutex", p10, p11
- getf.sig pr0_0 = fp0b_0
- ldf8 r_0 = [srp], 8
- xma.l fp1b_0 = u_0, v1, fp1a_0
- (p10) add s0 = pr1_1, acc0, 1
- (p11) add s0 = pr1_1, acc0
- xma.hu fp2a_0 = u_0, v1, fp1a_0
+L(01):
+ .pred.rel "mutex", p10, p11
+ getfsig pr0_0 = fp0b_0
+ ldf8 r_0 = [srp], 8
+ xma.l fp1b_0 = u_0, v1, fp1a_0
+ (p10) add s0 = pr1_1, acc0, 1
+ (p11) add s0 = pr1_1, acc0
+ xma.hu fp2a_0 = u_0, v1, fp1a_0
;; C 04
- .pred.rel "mutex", p6, p7
- .pred.rel "mutex", p10, p11
- ldf8 u_0 = [up], 8
- getf.sig pr1_3 = fp1b_3
- (p6) cmp.leu p8, p9 = acc0, pr0_2
- (p7) cmp.ltu p8, p9 = acc0, pr0_2
- (p10) cmp.leu p12, p13 = s0, pr1_1
- (p11) cmp.ltu p12, p13 = s0, pr1_1
+ .pred.rel "mutex", p6, p7
+ .pred.rel "mutex", p10, p11
+ ldf8 u_0 = [up], 8
+ getfsig pr1_3 = fp1b_3
+ (p6) cmp.leu p8, p9 = acc0, pr0_2
+ (p7) cmp.ltu p8, p9 = acc0, pr0_2
+ (p10) cmp.leu p12, p13 = s0, pr1_1
+ (p11) cmp.ltu p12, p13 = s0, pr1_1
;; C 05
- .pred.rel "mutex", p8, p9
- getf.sig acc1_3 = fp2a_3
- st8 [rp] = s0, 8
- xma.l fp0b_2 = u_2, v0, r_2
- (p8) add acc0 = pr0_3, acc1_1, 1
- (p9) add acc0 = pr0_3, acc1_1
- xma.hu fp1a_2 = u_2, v0, r_2
+ .pred.rel "mutex", p8, p9
+ getfsig acc1_3 = fp2a_3
+ st8 [rp] = s0, 8
+ xma.l fp0b_2 = u_2, v0, r_2
+ (p8) add acc0 = pr0_3, acc1_1, 1
+ (p9) add acc0 = pr0_3, acc1_1
+ xma.hu fp1a_2 = u_2, v0, r_2
;; C 06
-.LL00:
- .pred.rel "mutex", p12, p13
- getf.sig pr0_1 = fp0b_1
- ldf8 r_1 = [srp], 8
- xma.l fp1b_1 = u_1, v1, fp1a_1
- (p12) add s0 = pr1_2, acc0, 1
- (p13) add s0 = pr1_2, acc0
- xma.hu fp2a_1 = u_1, v1, fp1a_1
+L(00):
+ .pred.rel "mutex", p12, p13
+ getfsig pr0_1 = fp0b_1
+ ldf8 r_1 = [srp], 8
+ xma.l fp1b_1 = u_1, v1, fp1a_1
+ (p12) add s0 = pr1_2, acc0, 1
+ (p13) add s0 = pr1_2, acc0
+ xma.hu fp2a_1 = u_1, v1, fp1a_1
;; C 07
- .pred.rel "mutex", p8, p9
- .pred.rel "mutex", p12, p13
- ldf8 u_1 = [up], 8
- getf.sig pr1_0 = fp1b_0
- (p8) cmp.leu p6, p7 = acc0, pr0_3
- (p9) cmp.ltu p6, p7 = acc0, pr0_3
- (p12) cmp.leu p10, p11 = s0, pr1_2
- (p13) cmp.ltu p10, p11 = s0, pr1_2
+ .pred.rel "mutex", p8, p9
+ .pred.rel "mutex", p12, p13
+ ldf8 u_1 = [up], 8
+ getfsig pr1_0 = fp1b_0
+ (p8) cmp.leu p6, p7 = acc0, pr0_3
+ (p9) cmp.ltu p6, p7 = acc0, pr0_3
+ (p12) cmp.leu p10, p11 = s0, pr1_2
+ (p13) cmp.ltu p10, p11 = s0, pr1_2
;; C 08
- .pred.rel "mutex", p6, p7
- getf.sig acc1_0 = fp2a_0
- st8 [rp] = s0, 8
- xma.l fp0b_3 = u_3, v0, r_3
- (p6) add acc0 = pr0_0, acc1_2, 1
- (p7) add acc0 = pr0_0, acc1_2
- xma.hu fp1a_3 = u_3, v0, r_3
+ .pred.rel "mutex", p6, p7
+ getfsig acc1_0 = fp2a_0
+ st8 [rp] = s0, 8
+ xma.l fp0b_3 = u_3, v0, r_3
+ (p6) add acc0 = pr0_0, acc1_2, 1
+ (p7) add acc0 = pr0_0, acc1_2
+ xma.hu fp1a_3 = u_3, v0, r_3
;; C 09
-.LL11:
- .pred.rel "mutex", p10, p11
- getf.sig pr0_2 = fp0b_2
- ldf8 r_2 = [srp], 8
- xma.l fp1b_2 = u_2, v1, fp1a_2
- (p10) add s0 = pr1_3, acc0, 1
- (p11) add s0 = pr1_3, acc0
- xma.hu fp2a_2 = u_2, v1, fp1a_2
+L(11):
+ .pred.rel "mutex", p10, p11
+ getfsig pr0_2 = fp0b_2
+ ldf8 r_2 = [srp], 8
+ xma.l fp1b_2 = u_2, v1, fp1a_2
+ (p10) add s0 = pr1_3, acc0, 1
+ (p11) add s0 = pr1_3, acc0
+ xma.hu fp2a_2 = u_2, v1, fp1a_2
;; C 10
- .pred.rel "mutex", p6, p7
- .pred.rel "mutex", p10, p11
- ldf8 u_2 = [up], 8
- getf.sig pr1_1 = fp1b_1
- (p6) cmp.leu p8, p9 = acc0, pr0_0
- (p7) cmp.ltu p8, p9 = acc0, pr0_0
- (p10) cmp.leu p12, p13 = s0, pr1_3
- (p11) cmp.ltu p12, p13 = s0, pr1_3
+ .pred.rel "mutex", p6, p7
+ .pred.rel "mutex", p10, p11
+ ldf8 u_2 = [up], 8
+ getfsig pr1_1 = fp1b_1
+ (p6) cmp.leu p8, p9 = acc0, pr0_0
+ (p7) cmp.ltu p8, p9 = acc0, pr0_0
+ (p10) cmp.leu p12, p13 = s0, pr1_3
+ (p11) cmp.ltu p12, p13 = s0, pr1_3
;; C 11
- .pred.rel "mutex", p8, p9
- getf.sig acc1_1 = fp2a_1
- st8 [rp] = s0, 8
- xma.l fp0b_0 = u_0, v0, r_0
- (p8) add acc0 = pr0_1, acc1_3, 1
- (p9) add acc0 = pr0_1, acc1_3
- xma.hu fp1a_0 = u_0, v0, r_0
-.LL10: br.cloop.dptk .Loop C 12
+ .pred.rel "mutex", p8, p9
+ getfsig acc1_1 = fp2a_1
+ st8 [rp] = s0, 8
+ xma.l fp0b_0 = u_0, v0, r_0
+ (p8) add acc0 = pr0_1, acc1_3, 1
+ (p9) add acc0 = pr0_1, acc1_3
+ xma.hu fp1a_0 = u_0, v0, r_0
+L(10): br.cloop.sptk.clr L(top) C 12
;;
C *** MAIN LOOP END ***
-
-.Lcj6:
- .pred.rel "mutex", p12, p13
- getf.sig pr0_3 = fp0b_3
- xma.l fp1b_3 = u_3, v1, fp1a_3
- (p12) add s0 = pr1_0, acc0, 1
- (p13) add s0 = pr1_0, acc0
- xma.hu fp2a_3 = u_3, v1, fp1a_3
- ;;
- .pred.rel "mutex", p8, p9
- .pred.rel "mutex", p12, p13
- getf.sig pr1_2 = fp1b_2
- (p8) cmp.leu p6, p7 = acc0, pr0_1
- (p9) cmp.ltu p6, p7 = acc0, pr0_1
- (p12) cmp.leu p10, p11 = s0, pr1_0
- (p13) cmp.ltu p10, p11 = s0, pr1_0
- ;;
- .pred.rel "mutex", p6, p7
- getf.sig acc1_2 = fp2a_2
- st8 [rp] = s0, 8
- xma.l fp0b_1 = u_1, v0, r_1
- (p6) add acc0 = pr0_2, acc1_0, 1
- (p7) add acc0 = pr0_2, acc1_0
- xma.hu fp1a_1 = u_1, v0, r_1
- ;;
-.Lcj5:
- .pred.rel "mutex", p10, p11
- getf.sig pr0_0 = fp0b_0
- xma.l fp1b_0 = u_0, v1, fp1a_0
- (p10) add s0 = pr1_1, acc0, 1
- (p11) add s0 = pr1_1, acc0
- xma.hu fp2a_0 = u_0, v1, fp1a_0
- ;;
- .pred.rel "mutex", p6, p7
- .pred.rel "mutex", p10, p11
- getf.sig pr1_3 = fp1b_3
- (p6) cmp.leu p8, p9 = acc0, pr0_2
- (p7) cmp.ltu p8, p9 = acc0, pr0_2
- (p10) cmp.leu p12, p13 = s0, pr1_1
- (p11) cmp.ltu p12, p13 = s0, pr1_1
- ;;
- .pred.rel "mutex", p8, p9
- getf.sig acc1_3 = fp2a_3
- st8 [rp] = s0, 8
- xma.l fp0b_2 = u_2, v0, r_2
- (p8) add acc0 = pr0_3, acc1_1, 1
- (p9) add acc0 = pr0_3, acc1_1
- xma.hu fp1a_2 = u_2, v0, r_2
- ;;
-.Lcj4:
- .pred.rel "mutex", p12, p13
- getf.sig pr0_1 = fp0b_1
- xma.l fp1b_1 = u_1, v1, fp1a_1
- (p12) add s0 = pr1_2, acc0, 1
- (p13) add s0 = pr1_2, acc0
- xma.hu fp2a_1 = u_1, v1, fp1a_1
- ;;
- .pred.rel "mutex", p8, p9
- .pred.rel "mutex", p12, p13
- getf.sig pr1_0 = fp1b_0
- (p8) cmp.leu p6, p7 = acc0, pr0_3
- (p9) cmp.ltu p6, p7 = acc0, pr0_3
- (p12) cmp.leu p10, p11 = s0, pr1_2
- (p13) cmp.ltu p10, p11 = s0, pr1_2
- ;;
- .pred.rel "mutex", p6, p7
- getf.sig acc1_0 = fp2a_0
- st8 [rp] = s0, 8
- (p6) add acc0 = pr0_0, acc1_2, 1
- (p7) add acc0 = pr0_0, acc1_2
- ;;
-.Lcj3:
- .pred.rel "mutex", p10, p11
- getf.sig pr0_2 = fp0b_2
- xma.l fp1b_2 = u_2, v1, fp1a_2
- (p10) add s0 = pr1_3, acc0, 1
- (p11) add s0 = pr1_3, acc0
- xma.hu fp2a_2 = u_2, v1, fp1a_2
- ;;
- .pred.rel "mutex", p6, p7
- .pred.rel "mutex", p10, p11
- getf.sig pr1_1 = fp1b_1
- (p6) cmp.leu p8, p9 = acc0, pr0_0
- (p7) cmp.ltu p8, p9 = acc0, pr0_0
- (p10) cmp.leu p12, p13 = s0, pr1_3
- (p11) cmp.ltu p12, p13 = s0, pr1_3
- ;;
- .pred.rel "mutex", p8, p9
- getf.sig acc1_1 = fp2a_1
- st8 [rp] = s0, 8
- (p8) add acc0 = pr0_1, acc1_3, 1
- (p9) add acc0 = pr0_1, acc1_3
- ;;
-.Lcj2:
- .pred.rel "mutex", p12, p13
- (p12) add s0 = pr1_0, acc0, 1
- (p13) add s0 = pr1_0, acc0
- ;;
- .pred.rel "mutex", p8, p9
- .pred.rel "mutex", p12, p13
- getf.sig pr1_2 = fp1b_2
- (p8) cmp.leu p6, p7 = acc0, pr0_1
- (p9) cmp.ltu p6, p7 = acc0, pr0_1
- (p12) cmp.leu p10, p11 = s0, pr1_0
- (p13) cmp.ltu p10, p11 = s0, pr1_0
- ;;
- .pred.rel "mutex", p6, p7
- getf.sig acc1_2 = fp2a_2
- st8 [rp] = s0, 8
- (p6) add acc0 = pr0_2, acc1_0, 1
- (p7) add acc0 = pr0_2, acc1_0
- ;;
- .pred.rel "mutex", p10, p11
- (p10) add s0 = pr1_1, acc0, 1
- (p11) add s0 = pr1_1, acc0
- ;;
- .pred.rel "mutex", p6, p7
- .pred.rel "mutex", p10, p11
- (p6) cmp.leu p8, p9 = acc0, pr0_2
- (p7) cmp.ltu p8, p9 = acc0, pr0_2
- (p10) cmp.leu p12, p13 = s0, pr1_1
- (p11) cmp.ltu p12, p13 = s0, pr1_1
- ;;
- .pred.rel "mutex", p8, p9
- st8 [rp] = s0, 8
- (p8) add acc0 = pr1_2, acc1_1, 1
- (p9) add acc0 = pr1_2, acc1_1
- ;;
- .pred.rel "mutex", p8, p9
- (p8) cmp.leu p10, p11 = acc0, pr1_2
- (p9) cmp.ltu p10, p11 = acc0, pr1_2
- (p12) add acc0 = 1, acc0
- ;;
- st8 [rp] = acc0, 8
- (p12) cmp.eq.or p10, p0 = 0, acc0
- mov r8 = acc1_2
- ;;
- .pred.rel "mutex", p10, p11
- (p10) add r8 = 1, r8
- mov.i ar.lc = r2
- br.ret.sptk.many b0
+L(end):
+ .pred.rel "mutex", p12, p13
+.mfi; getfsig pr0_3 = fp0b_3
+ xma.l fp1b_3 = u_3, v1, fp1a_3
+ (p12) add s0 = pr1_0, acc0, 1
+.mfi; (p13) add s0 = pr1_0, acc0
+ xma.hu fp2a_3 = u_3, v1, fp1a_3
+ nop 1
+ ;;
+ .pred.rel "mutex", p8, p9
+ .pred.rel "mutex", p12, p13
+.mmi; getfsig pr1_2 = fp1b_2
+ st8 [rp] = s0, 8
+ (p8) cmp.leu p6, p7 = acc0, pr0_1
+.mmi; (p9) cmp.ltu p6, p7 = acc0, pr0_1
+ (p12) cmp.leu p10, p11 = s0, pr1_0
+ (p13) cmp.ltu p10, p11 = s0, pr1_0
+ ;;
+ .pred.rel "mutex", p6, p7
+.mfi; getfsig acc1_2 = fp2a_2
+ xma.l fp0b_1 = u_1, v0, r_1
+ nop 1
+.mmf; (p6) add acc0 = pr0_2, acc1_0, 1
+ (p7) add acc0 = pr0_2, acc1_0
+ xma.hu fp1a_1 = u_1, v0, r_1
+ ;;
+L(cj5):
+ .pred.rel "mutex", p10, p11
+.mfi; getfsig pr0_0 = fp0b_0
+ xma.l fp1b_0 = u_0, v1, fp1a_0
+ (p10) add s0 = pr1_1, acc0, 1
+.mfi; (p11) add s0 = pr1_1, acc0
+ xma.hu fp2a_0 = u_0, v1, fp1a_0
+ nop 1
+ ;;
+ .pred.rel "mutex", p6, p7
+ .pred.rel "mutex", p10, p11
+.mmi; getfsig pr1_3 = fp1b_3
+ st8 [rp] = s0, 8
+ (p6) cmp.leu p8, p9 = acc0, pr0_2
+.mmi; (p7) cmp.ltu p8, p9 = acc0, pr0_2
+ (p10) cmp.leu p12, p13 = s0, pr1_1
+ (p11) cmp.ltu p12, p13 = s0, pr1_1
+ ;;
+ .pred.rel "mutex", p8, p9
+.mfi; getfsig acc1_3 = fp2a_3
+ xma.l fp0b_2 = u_2, v0, r_2
+ nop 1
+.mmf; (p8) add acc0 = pr0_3, acc1_1, 1
+ (p9) add acc0 = pr0_3, acc1_1
+ xma.hu fp1a_2 = u_2, v0, r_2
+ ;;
+L(cj4):
+ .pred.rel "mutex", p12, p13
+.mfi; getfsig pr0_1 = fp0b_1
+ xma.l fp1b_1 = u_1, v1, fp1a_1
+ (p12) add s0 = pr1_2, acc0, 1
+.mfi; (p13) add s0 = pr1_2, acc0
+ xma.hu fp2a_1 = u_1, v1, fp1a_1
+ nop 1
+ ;;
+ .pred.rel "mutex", p8, p9
+ .pred.rel "mutex", p12, p13
+.mmi; getfsig pr1_0 = fp1b_0
+ st8 [rp] = s0, 8
+ (p8) cmp.leu p6, p7 = acc0, pr0_3
+.mmi; (p9) cmp.ltu p6, p7 = acc0, pr0_3
+ (p12) cmp.leu p10, p11 = s0, pr1_2
+ (p13) cmp.ltu p10, p11 = s0, pr1_2
+ ;;
+ .pred.rel "mutex", p6, p7
+.mmi; getfsig acc1_0 = fp2a_0
+ (p6) add acc0 = pr0_0, acc1_2, 1
+ (p7) add acc0 = pr0_0, acc1_2
+ ;;
+L(cj3):
+ .pred.rel "mutex", p10, p11
+.mfi; getfsig pr0_2 = fp0b_2
+ xma.l fp1b_2 = u_2, v1, fp1a_2
+ (p10) add s0 = pr1_3, acc0, 1
+.mfi; (p11) add s0 = pr1_3, acc0
+ xma.hu fp2a_2 = u_2, v1, fp1a_2
+ nop 1
+ ;;
+ .pred.rel "mutex", p6, p7
+ .pred.rel "mutex", p10, p11
+.mmi; getfsig pr1_1 = fp1b_1
+ st8 [rp] = s0, 8
+ (p6) cmp.leu p8, p9 = acc0, pr0_0
+.mmi; (p7) cmp.ltu p8, p9 = acc0, pr0_0
+ (p10) cmp.leu p12, p13 = s0, pr1_3
+ (p11) cmp.ltu p12, p13 = s0, pr1_3
+ ;;
+ .pred.rel "mutex", p8, p9
+.mmi; getfsig acc1_1 = fp2a_1
+ (p8) add acc0 = pr0_1, acc1_3, 1
+ (p9) add acc0 = pr0_1, acc1_3
+ ;;
+ .pred.rel "mutex", p12, p13
+.mmi; (p12) add s0 = pr1_0, acc0, 1
+ (p13) add s0 = pr1_0, acc0
+ nop 1
+ ;;
+ .pred.rel "mutex", p8, p9
+ .pred.rel "mutex", p12, p13
+.mmi; getfsig pr1_2 = fp1b_2
+ st8 [rp] = s0, 8
+ (p8) cmp.leu p6, p7 = acc0, pr0_1
+.mmi; (p9) cmp.ltu p6, p7 = acc0, pr0_1
+ (p12) cmp.leu p10, p11 = s0, pr1_0
+ (p13) cmp.ltu p10, p11 = s0, pr1_0
+ ;;
+ .pred.rel "mutex", p6, p7
+.mmi; getfsig r8 = fp2a_2
+ (p6) add acc0 = pr0_2, acc1_0, 1
+ (p7) add acc0 = pr0_2, acc1_0
+ ;;
+ .pred.rel "mutex", p10, p11
+.mmi; (p10) add s0 = pr1_1, acc0, 1
+ (p11) add s0 = pr1_1, acc0
+ (p6) cmp.leu p8, p9 = acc0, pr0_2
+ ;;
+ .pred.rel "mutex", p10, p11
+.mmi; (p7) cmp.ltu p8, p9 = acc0, pr0_2
+ (p10) cmp.leu p12, p13 = s0, pr1_1
+ (p11) cmp.ltu p12, p13 = s0, pr1_1
+ ;;
+ .pred.rel "mutex", p8, p9
+.mmi; st8 [rp] = s0, 8
+ (p8) add acc0 = pr1_2, acc1_1, 1
+ (p9) add acc0 = pr1_2, acc1_1
+ ;;
+ .pred.rel "mutex", p8, p9
+.mmi; (p8) cmp.leu p10, p11 = acc0, pr1_2
+ (p9) cmp.ltu p10, p11 = acc0, pr1_2
+ (p12) add acc0 = 1, acc0
+ ;;
+.mmi; st8 [rp] = acc0, 8
+ (p12) cmpeqor p10, p0 = 0, acc0
+ nop 1
+ ;;
+.mib; (p10) add r8 = 1, r8
+ mov ar.lc = r2
+ br.ret.sptk.many b0
EPILOGUE()
ASM_END()
dnl IA-64 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
-dnl Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2003, 2004, 2005, 2010, 2011 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
C TODO
C * Consider using special code for small n, using something like
C "switch (8 * (n >= 8) + (n mod 8))" to enter it and feed-in code.
+C * The non-nc code was trimmed cycle for cycle to its current state. It is
+C probably hard to save more that an odd cycle there. The nc code is much
+C rawer (since tune/speed doesn't have any applicable direct measurements).
+C * Without the nc entry points, this becomes around 1800 bytes of object
+C code; the nc code adds over 1000 bytes. We should perhaps sacrifice a
+C few cycles for the non-nc code and let it fall into the nc code.
C INPUT PARAMETERS
-define(`rp',`r32')
-define(`up',`r33')
-define(`vp',`r34')
-define(`n',`r35')
+define(`rp', `r32')
+define(`up', `r33')
+define(`vp', `r34')
+define(`n', `r35')
+define(`cy', `r36')
ifdef(`OPERATION_add_n',`
define(ADDSUB, add)
- define(PRED, ltu)
+ define(CND, ltu)
define(INCR, 1)
define(LIM, -1)
- define(func, mpn_add_n)
+ define(LIM2, 0)
+ define(func, mpn_add_n)
+ define(func_nc, mpn_add_nc)
')
ifdef(`OPERATION_sub_n',`
define(ADDSUB, sub)
- define(PRED, gtu)
+ define(CND, gtu)
define(INCR, -1)
define(LIM, 0)
- define(func, mpn_sub_n)
+ define(LIM2, -1)
+ define(func, mpn_sub_n)
+ define(func_nc, mpn_sub_nc)
')
+define(cmpeqor, `cmp.eq.or')
+define(PFDIST, 500)
+
C Some useful aliases for registers we use
define(`u0',`r14') define(`u1',`r15') define(`u2',`r16') define(`u3',`r17')
-define(`u4',`r18') define(`u5',`r19') define(`u6',`r20') define(`u7',`r21')
define(`v0',`r24') define(`v1',`r25') define(`v2',`r26') define(`v3',`r27')
-define(`v4',`r28') define(`v5',`r29') define(`v6',`r30') define(`v7',`r31')
-define(`w0',`r22') define(`w1',`r9') define(`w2',`r8') define(`w3',`r23')
-define(`w4',`r22') define(`w5',`r9') define(`w6',`r8') define(`w7',`r23')
+define(`w0',`r28') define(`w1',`r29') define(`w2',`r30') define(`w3',`r31')
define(`rpx',`r3')
+define(`upadv',`r20') define(`vpadv',`r21')
-MULFUNC_PROLOGUE(mpn_add_n mpn_sub_n)
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
ASM_START()
-PROLOGUE(func)
+PROLOGUE(func_nc)
.prologue
.save ar.lc, r2
.body
ifdef(`HAVE_ABI_32',`
- addp4 rp = 0, rp C M I
- addp4 up = 0, up C M I
- addp4 vp = 0, vp C M I
- zxt4 n = n C I
+ addp4 rp = 0, rp C M I
+ addp4 up = 0, up C M I
+ addp4 vp = 0, vp C M I
+ zxt4 n = n C I
;;
')
-{.mmi C 00
- ld8 r11 = [vp], 8 C M01
- ld8 r10 = [up], 8 C M01
- mov.i r2 = ar.lc C I0
-}
-{.mmi
- and r14 = 7, n C M I
- cmp.lt p15, p14 = 8, n C M I
- add n = -8, n C M I
+
+ {.mmi; ld8 r11 = [vp], 8 C M01
+ ld8 r10 = [up], 8 C M01
+ mov r2 = ar.lc C I0
+}{.mmi; and r14 = 7, n C M I
+ cmp.lt p15, p14 = 8, n C M I
+ add n = -6, n C M I
;;
}
-{.mmi C 01
- cmp.eq p6, p0 = 1, r14 C M I
- cmp.eq p7, p0 = 2, r14 C M I
- cmp.eq p8, p0 = 3, r14 C M I
-}
-{.bbb
- (p6) br.dptk .Lb001 C B
- (p7) br.dptk .Lb010 C B
- (p8) br.dptk .Lb011 C B
+.mmi; add upadv = PFDIST, up C Merging these lines into the feed-in
+ add vpadv = PFDIST, vp C code could save a cycle per call at
+ mov r23 = cy C the expense of code size.
+ ;;
+{.mmi; cmp.eq p6, p0 = 1, r14 C M I
+ cmp.eq p7, p0 = 2, r14 C M I
+ cmp.eq p8, p0 = 3, r14 C M I
+}{.bbb
+ (p6) br.dptk .Lc001 C B
+ (p7) br.dptk .Lc010 C B
+ (p8) br.dptk .Lc011 C B
;;
}
-{.mmi C 02
- cmp.eq p9, p0 = 4, r14 C M I
- cmp.eq p10, p0 = 5, r14 C M I
- cmp.eq p11, p0 = 6, r14 C M I
-}
-{.bbb
- (p9) br.dptk .Lb100 C B
- (p10) br.dptk .Lb101 C B
- (p11) br.dptk .Lb110 C B
- ;;
-} C 03
-{.mmb
- cmp.eq p12, p0 = 7, r14 C M I
- add n = -1, n C loop count M I
- (p12) br.dptk .Lb111 C B
+{.mmi; cmp.eq p9, p0 = 4, r14 C M I
+ cmp.eq p10, p0 = 5, r14 C M I
+ cmp.eq p11, p0 = 6, r14 C M I
+}{.bbb
+ (p9) br.dptk .Lc100 C B
+ (p10) br.dptk .Lc101 C B
+ (p11) br.dptk .Lc110 C B
+ ;;
+}{.mmi; ld8 r19 = [vp], 8 C M01
+ ld8 r18 = [up], 8 C M01
+ cmp.ne p13, p0 = 0, cy C copy cy to p13 M I
+}{.mmb; cmp.eq p12, p0 = 7, r14 C M I
+ nop 0
+ (p12) br.dptk .Lc111 C B
+ ;;
}
+.Lc000:
+.mmi; ld8 v3 = [vp], 8 C M01
+ ld8 u3 = [up], 8 C M01
+ shr.u n = n, 3 C I0
+ ;;
+.mmi; add vpadv = PFDIST, vp C M I
+ ld8 v0 = [vp], 8 C M01
+ mov ar.lc = n C I0
+.mmi; ld8 u0 = [up], 8 C M01
+ ADDSUB w1 = r10, r11 C M I
+ nop 0
+ ;;
+.mmi; add upadv = PFDIST, up C M I
+ ld8 v1 = [vp], 8 C M01
+ cmp.CND p7, p0 = w1, r10 C M I
+.mmi; ld8 u1 = [up], 8 C M01
+ ADDSUB w2 = r18, r19 C M I
+ add rpx = 8, rp C M I
+ ;;
+.mmi; ld8 v2 = [vp], 8 C M01
+ cmp.CND p8, p0 = w2, r18 C M I
+ (p13) cmpeqor p7, p0 = LIM, w1 C M I
+.mmi; ld8 u2 = [up], 8 C M01
+ (p13) add w1 = INCR, w1 C M I
+ ADDSUB w3 = u3, v3 C M I
+ ;;
+.mmi; ld8 v3 = [vp], 8 C M01
+ cmp.CND p9, p0 = w3, u3 C M I
+ (p7) cmpeqor p8, p0 = LIM, w2 C M I
+.mmb; ld8 u3 = [up], 8 C M01
+ (p7) add w2 = INCR, w2 C M I
+ br L(m0)
+
+
+.Lc001:
+.mmi;
+ (p15) ld8 v1 = [vp], 8 C M01
+ (p15) ld8 u1 = [up], 8 C M01
+ ADDSUB w0 = r10, r11 C M I
+.mmb; nop 0
+ nop 0
+ (p15) br 1f
+ ;;
+.mmi; cmp.ne p9, p0 = 0, r23 C M I
+ mov r8 = 0
+ cmp.CND p6, p0 = w0, r10 C M I
+ ;;
+.mmb;
+ (p9) cmpeqor p6, p0 = LIM, w0 C M I
+ (p9) add w0 = INCR, w0 C M I
+ br L(cj1) C B
+1:
+.mmi; ld8 v2 = [vp], 8 C M01
+ ld8 u2 = [up], 8 C M01
+ shr.u n = n, 3 C I0
+ ;;
+.mmi; ld8 v3 = [vp], 8 C M01
+ ld8 u3 = [up], 8 C M01
+ mov ar.lc = n C I0
+.mmi; nop 0
+ cmp.ne p9, p0 = 0, r23 C M I
+ nop 0
+ ;;
+.mmi; ld8 v0 = [vp], 8 C M01
+ cmp.CND p6, p0 = w0, r10 C M I
+ add rpx = 16, rp C M I
+.mmb; ld8 u0 = [up], 8 C M01
+ ADDSUB w1 = u1, v1 C M I
+ br L(c1) C B
+
+
+.Lc010:
+.mmi; ld8 v0 = [vp], 8 C M01
+ ld8 u0 = [up], 8 C M01
+ mov r8 = 0 C M I
+.mmb; ADDSUB w3 = r10, r11 C M I
+ cmp.ne p8, p0 = 0, r23 C M I
+ (p15) br 1f C B
+ ;;
+.mmi; cmp.CND p9, p0 = w3, r10 C M I
+ ADDSUB w0 = u0, v0 C M I
+ (p8) add w3 = INCR, w3 C M I
+ ;;
+.mmb; cmp.CND p6, p0 = w0, u0 C M I
+ (p8) cmpeqor p9, p0 = LIM2, w3 C M I
+ br L(cj2) C B
+1:
+.mmi; ld8 v1 = [vp], 8 C M01
+ ld8 u1 = [up], 8 C M01
+ shr.u n = n, 3 C I0
+ ;;
+.mmi; ld8 v2 = [vp], 8 C M01
+ ld8 u2 = [up], 8 C M01
+ mov ar.lc = n C I0
+ ;;
+.mmi; ld8 v3 = [vp], 8 C M01
+ ld8 u3 = [up], 8 C M01
+ cmp.CND p9, p0 = w3, r10 C M I
+ ;;
+.mmi;
+ (p8) cmpeqor p9, p0 = LIM, w3 C M I
+ (p8) add w3 = INCR, w3 C M I
+ ADDSUB w0 = u0, v0 C M I
+.mmb; add rpx = 24, rp C M I
+ nop 0
+ br L(m23) C B
+
+
+.Lc011:
+.mmi; ld8 v3 = [vp], 8 C M01
+ ld8 u3 = [up], 8 C M01
+ shr.u n = n, 3 C I0
+.mmi; ADDSUB w2 = r10, r11 C M I
+ cmp.ne p7, p0 = 0, r23 C M I
+ nop 0
+ ;;
+.mmb; ld8 v0 = [vp], 8 C M01
+ ld8 u0 = [up], 8 C M01
+ (p15) br 1f C B
+.mmi; cmp.CND p8, p0 = w2, r10 C M I
+ ADDSUB w3 = u3, v3 C M I
+ nop 0
+ ;;
+.mmb;
+ (p7) cmpeqor p8, p0 = LIM, w2 C M I
+ (p7) add w2 = INCR, w2 C M I
+ br L(cj3) C B
+1:
+.mmi; ld8 v1 = [vp], 8 C M01
+ ld8 u1 = [up], 8 C M01
+ ADDSUB w3 = u3, v3 C M I
+ ;;
+.mmi; ld8 v2 = [vp], 8 C M01
+ ld8 u2 = [up], 8 C M01
+ cmp.CND p8, p0 = w2, r10 C M I
+ ;;
+.mmi; ld8 v3 = [vp], 8 C M01
+ cmp.CND p9, p0 = w3, u3 C M I
+ mov ar.lc = n C I0
+.mmi; ld8 u3 = [up], 8 C M01
+ (p7) cmpeqor p8, p0 = LIM, w2 C M I
+ (p7) add w2 = INCR, w2 C M I
+ ;;
+.mmi; add rpx = 32, rp C M I
+ st8 [rp] = w2, 8 C M23
+ (p8) cmpeqor p9, p0 = LIM, w3 C M I
+.mmb;
+ (p8) add w3 = INCR, w3 C M I
+ ADDSUB w0 = u0, v0 C M I
+ br L(m23)
+
+
+.Lc100:
+.mmi; ld8 v2 = [vp], 8 C M01
+ ld8 u2 = [up], 8 C M01
+ shr.u n = n, 3 C I0
+.mmi; ADDSUB w1 = r10, r11 C M I
+ nop 0
+ nop 0
+ ;;
+.mmi; ld8 v3 = [vp], 8 C M01
+ ld8 u3 = [up], 8 C M01
+ add rpx = 8, rp C M I
+.mmi; cmp.ne p6, p0 = 0, r23 C M I
+ cmp.CND p7, p0 = w1, r10 C M I
+ nop 0
+ ;;
+.mmi; ld8 v0 = [vp], 8 C M01
+ ld8 u0 = [up], 8 C M01
+ ADDSUB w2 = u2, v2 C M I
+.mmb;
+ (p6) cmpeqor p7, p0 = LIM, w1 C M I
+ (p6) add w1 = INCR, w1 C M I
+ (p14) br L(cj4)
+ ;;
+.mmi; ld8 v1 = [vp], 8 C M01
+ ld8 u1 = [up], 8 C M01
+ mov ar.lc = n C I0
+ ;;
+.mmi; ld8 v2 = [vp], 8 C M01
+ cmp.CND p8, p0 = w2, u2 C M I
+ nop 0
+.mmi; ld8 u2 = [up], 8 C M01
+ nop 0
+ ADDSUB w3 = u3, v3 C M I
+ ;;
+.mmi; ld8 v3 = [vp], 8 C M01
+ cmp.CND p9, p0 = w3, u3 C M I
+ (p7) cmpeqor p8, p0 = LIM, w2 C M I
+.mmb; ld8 u3 = [up], 8 C M01
+ (p7) add w2 = INCR, w2 C M I
+ br L(m4)
+
+
+.Lc101:
+.mmi; ld8 v1 = [vp], 8 C M01
+ ld8 u1 = [up], 8 C M01
+ shr.u n = n, 3 C I0
+ ;;
+.mmi; ld8 v2 = [vp], 8 C M01
+ ld8 u2 = [up], 8 C M01
+ mov ar.lc = n C I0
+ ;;
+.mmi; ld8 v3 = [vp], 8 C M01
+ ld8 u3 = [up], 8 C M01
+ ADDSUB w0 = r10, r11 C M I
+.mmi; cmp.ne p9, p0 = 0, r23 C M I
+ add rpx = 16, rp C M I
+ nop 0
+ ;;
+.mmi; ld8 v0 = [vp], 8 C M01
+ cmp.CND p6, p0 = w0, r10 C M I
+ ld8 u0 = [up], 8 C M01
+.mbb; ADDSUB w1 = u1, v1 C M I
+ (p15) br L(c5) C B
+ br L(end) C B
+
+
+.Lc110:
+.mmi; ld8 v0 = [vp], 8 C M01
+ ld8 u0 = [up], 8 C M01
+ shr.u n = n, 3 C I0
+ ;;
+.mmi; add upadv = PFDIST, up C M I
+ add vpadv = PFDIST, vp C M I
+ mov ar.lc = n C I0
+.mmi; ld8 v1 = [vp], 8 C M01
+ ld8 u1 = [up], 8 C M01
+ ADDSUB w3 = r10, r11 C M I
+ ;;
+.mmi; ld8 v2 = [vp], 8 C M01
+ ld8 u2 = [up], 8 C M01
+ ADDSUB w0 = u0, v0 C M I
+.mmi; cmp.CND p9, p0 = w3, r10 C M I
+ cmp.ne p8, p0 = 0, r23 C M I
+ add rpx = 24, rp C M I
+ ;;
+.mmi; ld8 v3 = [vp], 8 C M01
+ ld8 u3 = [up], 8 C M01
+ nop 0
+.mmb;
+ (p8) cmpeqor p9, p0 = LIM, w3 C M I
+ (p8) add w3 = INCR, w3 C M I
+ br L(m67) C B
+
+
+.Lc111:
+.mmi; ld8 v0 = [vp], 8 C M01
+ ld8 u0 = [up], 8 C M01
+ shr.u n = n, 3 C I0
+ ;;
+.mmi; add upadv = PFDIST, up C M I
+ ld8 v1 = [vp], 8 C M01
+ mov ar.lc = n C I0
+.mmi; ld8 u1 = [up], 8 C M01
+ ADDSUB w2 = r10, r11 C M I
+ nop 0
+ ;;
+.mmi; add vpadv = PFDIST, vp C M I
+ ld8 v2 = [vp], 8 C M01
+ cmp.CND p8, p0 = w2, r10 C M I
+.mmi; ld8 u2 = [up], 8 C M01
+ ADDSUB w3 = r18, r19 C M I
+ nop 0
+ ;;
+.mmi; ld8 v3 = [vp], 8 C M01
+ cmp.CND p9, p0 = w3, r18 C M I
+ (p13) cmpeqor p8, p0 = LIM, w2 C M I
+.mmi; ld8 u3 = [up], 8 C M01
+ (p13) add w2 = INCR, w2 C M I
+ nop 0
+ ;;
+.mmi; add rpx = 32, rp C M I
+ st8 [rp] = w2, 8 C M23
+ (p8) cmpeqor p9, p0 = LIM, w3 C M I
+.mmb;
+ (p8) add w3 = INCR, w3 C M I
+ ADDSUB w0 = u0, v0 C M I
+ br L(m67)
-.Lb000: ld8 v2 = [vp], 8 C M01
- ld8 u2 = [up], 8 C M01
- add rpx = 8, rp C M I
- ;;
- ld8 v3 = [vp], 8 C M01
- ld8 u3 = [up], 8 C M01
- ADDSUB w1 = r10, r11 C M I
- ;;
- ld8 v4 = [vp], 8 C M01
- ld8 u4 = [up], 8 C M01
- cmp.PRED p7, p0 = w1, r10 C M I
- ;;
- ld8 v5 = [vp], 8 C M01
- ld8 u5 = [up], 8 C M01
- ADDSUB w2 = u2, v2 C M I
- ;;
- ld8 v6 = [vp], 8 C M01
- ld8 u6 = [up], 8 C M01
- cmp.PRED p8, p0 = w2, u2 C M I
- ;;
- ld8 v7 = [vp], 8 C M01
- ld8 u7 = [up], 8 C M01
- ADDSUB w3 = u3, v3 C M I
+EPILOGUE()
+
+ASM_START()
+PROLOGUE(func)
+ .prologue
+ .save ar.lc, r2
+ .body
+ifdef(`HAVE_ABI_32',`
+ addp4 rp = 0, rp C M I
+ addp4 up = 0, up C M I
+ addp4 vp = 0, vp C M I
+ zxt4 n = n C I
;;
- ld8 v0 = [vp], 8 C M01
- ld8 u0 = [up], 8 C M01
- cmp.PRED p9, p0 = w3, u3 C M I
- (p7) cmp.eq.or p8, p0 = LIM, w2 C M I
- (p7) add w2 = INCR, w2 C M I
- (p14) br.cond.dptk .Lcj8 C B
- ;;
-
-.grt8: ld8 v1 = [vp], 8 C M01
- ld8 u1 = [up], 8 C M01
- shr.u n = n, 3 C I0
- ;;
- add r11 = 512, vp
- ld8 v2 = [vp], 8 C M01
- add r10 = 512, up
- ld8 u2 = [up], 8 C M01
- nop.i 0
- nop.b 0
- ;;
- ld8 v3 = [vp], 8 C M01
- ld8 u3 = [up], 8 C M01
- mov.i ar.lc = n C I0
- br .LL000 C B
+')
-.Lb001: add rpx = 16, rp C M I
- ADDSUB w0 = r10, r11 C M I
- (p15) br.cond.dpnt .grt1 C B
+ {.mmi; ld8 r11 = [vp], 8 C M01
+ ld8 r10 = [up], 8 C M01
+ mov r2 = ar.lc C I0
+}{.mmi; and r14 = 7, n C M I
+ cmp.lt p15, p14 = 8, n C M I
+ add n = -6, n C M I
+ ;;
+}{.mmi; cmp.eq p6, p0 = 1, r14 C M I
+ cmp.eq p7, p0 = 2, r14 C M I
+ cmp.eq p8, p0 = 3, r14 C M I
+}{.bbb
+ (p6) br.dptk .Lb001 C B
+ (p7) br.dptk .Lb010 C B
+ (p8) br.dptk .Lb011 C B
+ ;;
+}{.mmi; cmp.eq p9, p0 = 4, r14 C M I
+ cmp.eq p10, p0 = 5, r14 C M I
+ cmp.eq p11, p0 = 6, r14 C M I
+}{.bbb
+ (p9) br.dptk .Lb100 C B
+ (p10) br.dptk .Lb101 C B
+ (p11) br.dptk .Lb110 C B
+ ;;
+}{.mmi; ld8 r19 = [vp], 8 C M01
+ ld8 r18 = [up], 8 C M01
+ cmp.ne p13, p0 = r0, r0 C clear "CF" M I
+}{.mmb; cmp.eq p12, p0 = 7, r14 C M I
+ mov r23 = 0 C M I
+ (p12) br.dptk .Lb111 C B
;;
- cmp.PRED p6, p0 = w0, r10 C M I
- mov r8 = 0 C M I
- br .Lcj1 C B
+}
+
+.Lb000:
+.mmi; ld8 v3 = [vp], 8 C M01
+ ld8 u3 = [up], 8 C M01
+ shr.u n = n, 3 C I0
+ ;;
+.mmi; ld8 v0 = [vp], 8 C M01
+ ld8 u0 = [up], 8 C M01
+ ADDSUB w1 = r10, r11 C M I
+ ;;
+.mmi; ld8 v1 = [vp], 8 C M01
+ cmp.CND p7, p0 = w1, r10 C M I
+ mov ar.lc = n C I0
+.mmi; ld8 u1 = [up], 8 C M01
+ ADDSUB w2 = r18, r19 C M I
+ add rpx = 8, rp C M I
+ ;;
+.mmi; add upadv = PFDIST, up
+ add vpadv = PFDIST, vp
+ cmp.CND p8, p0 = w2, r18 C M I
+.mmi; ld8 v2 = [vp], 8 C M01
+ ld8 u2 = [up], 8 C M01
+ ADDSUB w3 = u3, v3 C M I
+ ;;
+.mmi; ld8 v3 = [vp], 8 C M01
+ cmp.CND p9, p0 = w3, u3 C M I
+ (p7) cmpeqor p8, p0 = LIM, w2 C M I
+.mmb; ld8 u3 = [up], 8 C M01
+ (p7) add w2 = INCR, w2 C M I
+ br L(m0) C B
+
+
+ ALIGN(32)
+.Lb001:
+.mmi; ADDSUB w0 = r10, r11 C M I
+ (p15) ld8 v1 = [vp], 8 C M01
+ mov r8 = 0 C M I
+ ;;
+.mmb; cmp.CND p6, p0 = w0, r10 C M I
+ (p15) ld8 u1 = [up], 8 C M01
+ (p14) br L(cj1) C B
+ ;;
+.mmi; add upadv = PFDIST, up
+ add vpadv = PFDIST, vp
+ shr.u n = n, 3 C I0
+.mmi; ld8 v2 = [vp], 8 C M01
+ ld8 u2 = [up], 8 C M01
+ cmp.CND p6, p0 = w0, r10 C M I
+ ;;
+.mmi; ld8 v3 = [vp], 8 C M01
+ ld8 u3 = [up], 8 C M01
+ mov ar.lc = n C I0
+ ;;
+.mmi; ld8 v0 = [vp], 8 C M01
+ ld8 u0 = [up], 8 C M01
+ ADDSUB w1 = u1, v1 C M I
+ ;;
+.mmi; ld8 v1 = [vp], 8 C M01
+ cmp.CND p7, p0 = w1, u1 C M I
+ ADDSUB w2 = u2, v2 C M I
+.mmb; ld8 u1 = [up], 8 C M01
+ add rpx = 16, rp C M I
+ br L(m1) C B
+
+
+ ALIGN(32)
+.Lb010:
+.mmi; ld8 v0 = [vp], 8 C M01
+ ld8 u0 = [up], 8 C M01
+ shr.u n = n, 3 C I0
+.mmb; ADDSUB w3 = r10, r11 C M I
+ nop 0
+ (p15) br L(gt2) C B
+ ;;
+.mmi; cmp.CND p9, p0 = w3, r10 C M I
+ ADDSUB w0 = u0, v0 C M I
+ mov r8 = 0 C M I
+ ;;
+.mmb; nop 0
+ cmp.CND p6, p0 = w0, u0 C M I
+ br L(cj2) C B
+L(gt2):
+.mmi; ld8 v1 = [vp], 8 C M01
+ ld8 u1 = [up], 8 C M01
+ nop 0
+ ;;
+.mmi; add upadv = PFDIST, up
+ add vpadv = PFDIST, vp
+ mov ar.lc = n C I0
+.mmi; ld8 v2 = [vp], 8 C M01
+ ld8 u2 = [up], 8 C M01
+ nop 0
+ ;;
+.mmi; ld8 v3 = [vp], 8 C M01
+ cmp.CND p9, p0 = w3, r10 C M I
+ ADDSUB w0 = u0, v0 C M I
+.mmb; ld8 u3 = [up], 8 C M01
+ add rpx = 24, rp C M I
+ br L(m23) C B
+
+
+ ALIGN(32)
+.Lb011:
+.mmi; ld8 v3 = [vp], 8 C M01
+ ld8 u3 = [up], 8 C M01
+ ADDSUB w2 = r10, r11 C M I
+ ;;
+.mmb; ld8 v0 = [vp], 8 C M01
+ ld8 u0 = [up], 8 C M01
+ (p15) br 1f C B
+.mmb; cmp.CND p8, p0 = w2, r10 C M I
+ ADDSUB w3 = u3, v3 C M I
+ br L(cj3) C B
+1:
+.mmi; ld8 v1 = [vp], 8 C M01
+ ld8 u1 = [up], 8 C M01
+ shr.u n = n, 3 C I0
+ ;;
+.mmi; add upadv = PFDIST, up
+ add vpadv = PFDIST, vp
+ ADDSUB w3 = u3, v3 C M I
+.mmi; ld8 v2 = [vp], 8 C M01
+ ld8 u2 = [up], 8 C M01
+ cmp.CND p8, p0 = w2, r10 C M I
+ ;;
+.mmi; ld8 v3 = [vp], 8 C M01
+ cmp.CND p9, p0 = w3, u3 C M I
+ mov ar.lc = n C I0
+.mmi; ld8 u3 = [up], 8 C M01
+ nop 0
+ nop 0
+ ;;
+.mmi; add rpx = 32, rp C M I
+ st8 [rp] = w2, 8 C M23
+ (p8) cmpeqor p9, p0 = LIM, w3 C M I
+.mmb;
+ (p8) add w3 = INCR, w3 C M I
+ ADDSUB w0 = u0, v0 C M I
+ br L(m23) C B
+
+
+ ALIGN(32)
+.Lb100:
+.mmi; ld8 v2 = [vp], 8 C M01
+ ld8 u2 = [up], 8 C M01
+ shr.u n = n, 3 C I0
+ ;;
+.mmi; ld8 v3 = [vp], 8 C M01
+ ld8 u3 = [up], 8 C M01
+ ADDSUB w1 = r10, r11 C M I
+ ;;
+.mmi; ld8 v0 = [vp], 8 C M01
+ ld8 u0 = [up], 8 C M01
+ cmp.CND p7, p0 = w1, r10 C M I
+.mmb; nop 0
+ ADDSUB w2 = u2, v2 C M I
+ (p14) br L(cj4) C B
+ ;;
+L(gt4):
+.mmi; add upadv = PFDIST, up
+ add vpadv = PFDIST, vp
+ mov ar.lc = n C I0
+ ld8 v1 = [vp], 8 C M01
+ ld8 u1 = [up], 8 C M01
+ nop 0
+ ;;
+.mmi; ld8 v2 = [vp], 8 C M01
+ cmp.CND p8, p0 = w2, u2 C M I
+ nop 0
+.mmi; ld8 u2 = [up], 8 C M01
+ ADDSUB w3 = u3, v3 C M I
+ add rpx = 8, rp C M I
+ ;;
+.mmi; ld8 v3 = [vp], 8 C M01
+ cmp.CND p9, p0 = w3, u3 C M I
+ (p7) cmpeqor p8, p0 = LIM, w2 C M I
+.mmb; ld8 u3 = [up], 8 C M01
+ (p7) add w2 = INCR, w2 C M I
+ br L(m4) C B
+
+
+ ALIGN(32)
+.Lb101:
+.mmi; ld8 v1 = [vp], 8 C M01
+ ld8 u1 = [up], 8 C M01
+ shr.u n = n, 3 C I0
+ ;;
+.mmi; ld8 v2 = [vp], 8 C M01
+ ld8 u2 = [up], 8 C M01
+ ADDSUB w0 = r10, r11 C M I
+ ;;
+.mmi; add upadv = PFDIST, up
+ add vpadv = PFDIST, vp
+ add rpx = 16, rp C M I
+ ld8 v3 = [vp], 8 C M01
+ ld8 u3 = [up], 8 C M01
+ nop 0
+ ;;
+.mmi; ld8 v0 = [vp], 8 C M01
+ cmp.CND p6, p0 = w0, r10 C M I
+ nop 0
+.mmb; ld8 u0 = [up], 8 C M01
+ ADDSUB w1 = u1, v1 C M I
+ (p14) br L(cj5) C B
+ ;;
+L(gt5):
+.mmi; ld8 v1 = [vp], 8 C M01
+ cmp.CND p7, p0 = w1, u1 C M I
+ mov ar.lc = n C I0
+.mmb; ld8 u1 = [up], 8 C M01
+ ADDSUB w2 = u2, v2 C M I
+ br L(m5) C B
+
+
+ ALIGN(32)
+.Lb110:
+.mmi; ld8 v0 = [vp], 8 C M01
+ ld8 u0 = [up], 8 C M01
+ shr.u n = n, 3 C I0
+ ;;
+.mmi; ld8 v1 = [vp], 8 C M01
+ ld8 u1 = [up], 8 C M01
+ ADDSUB w3 = r10, r11 C M I
+ ;;
+.mmi; add upadv = PFDIST, up
+ add vpadv = PFDIST, vp
+ mov ar.lc = n C I0
+.mmi; ld8 v2 = [vp], 8 C M01
+ ld8 u2 = [up], 8 C M01
+ nop 0
+ ;;
+.mmi; ld8 v3 = [vp], 8 C M01
+ cmp.CND p9, p0 = w3, r10 C M I
+ ADDSUB w0 = u0, v0 C M I
+.mmb; ld8 u3 = [up], 8 C M01
+ add rpx = 24, rp C M I
+ br L(m67) C B
+
+
+ ALIGN(32)
+.Lb111:
+.mmi; ld8 v0 = [vp], 8 C M01
+ ld8 u0 = [up], 8 C M01
+ shr.u n = n, 3 C I0
+ ;;
+.mmi; ld8 v1 = [vp], 8 C M01
+ ld8 u1 = [up], 8 C M01
+ ADDSUB w2 = r10, r11 C M I
+ ;;
+.mmi; ld8 v2 = [vp], 8 C M01
+ cmp.CND p8, p0 = w2, r10 C M I
+ mov ar.lc = n C I0
+.mmi; ld8 u2 = [up], 8 C M01
+ ADDSUB w3 = r18, r19 C M I
+ nop 0
+ ;;
+.mmi; add upadv = PFDIST, up
+ add vpadv = PFDIST, vp
+ nop 0
+.mmi; ld8 v3 = [vp], 8 C M01
+ ld8 u3 = [up], 8 C M01
+ cmp.CND p9, p0 = w3, r18 C M I
+ ;;
+.mmi; add rpx = 32, rp C M I
+ st8 [rp] = w2, 8 C M23
+ (p8) cmpeqor p9, p0 = LIM, w3 C M I
+.mmb;
+ (p8) add w3 = INCR, w3 C M I
+ ADDSUB w0 = u0, v0 C M I
+ br L(m67) C B
-.grt1: ld8 v1 = [vp], 8 C M01
- ld8 u1 = [up], 8 C M01
- shr.u n = n, 3 C I0
- ;;
- ld8 v2 = [vp], 8 C M01
- ld8 u2 = [up], 8 C M01
- cmp.ne p9, p0 = r0, r0 C read near Loop
- ;;
- ld8 v3 = [vp], 8 C M01
- ld8 u3 = [up], 8 C M01
- mov.i ar.lc = n C I0
- ;;
- ld8 v4 = [vp], 8 C M01
- ld8 u4 = [up], 8 C M01
- cmp.PRED p6, p0 = w0, r10 C M I
- ;;
- ld8 v5 = [vp], 8 C M01
- ld8 u5 = [up], 8 C M01
- ADDSUB w1 = u1, v1 C M I
- ;;
- ld8 v6 = [vp], 8 C M01
- ld8 u6 = [up], 8 C M01
- cmp.PRED p7, p0 = w1, u1 C M I
- ;;
- ld8 v7 = [vp], 8 C M01
- ld8 u7 = [up], 8 C M01
- ADDSUB w2 = u2, v2 C M I
- ;;
- add r11 = 512, vp
- ld8 v0 = [vp], 8 C M01
- add r10 = 512, up
- ld8 u0 = [up], 8 C M01
- br.cloop.dptk .Loop C B
- br .Lcj9 C B
-
-.Lb010: ld8 v0 = [vp], 8 C M01
- ld8 u0 = [up], 8 C M01
- add rpx = 24, rp C M I
- ADDSUB w7 = r10, r11 C M I
- (p15) br.cond.dpnt .grt2 C B
- ;;
- cmp.PRED p9, p0 = w7, r10 C M I
- ADDSUB w0 = u0, v0 C M I
- br .Lcj2 C B
-
-.grt2: ld8 v1 = [vp], 8 C M01
- ld8 u1 = [up], 8 C M01
- shr.u n = n, 3 C I0
- ;;
- ld8 v2 = [vp], 8 C M01
- ld8 u2 = [up], 8 C M01
- ;;
- ld8 v3 = [vp], 8 C M01
- ld8 u3 = [up], 8 C M01
- mov.i ar.lc = n C I0
- ;;
- ld8 v4 = [vp], 8 C M01
- ld8 u4 = [up], 8 C M01
- ;;
- ld8 v5 = [vp], 8 C M01
- ld8 u5 = [up], 8 C M01
- cmp.PRED p9, p0 = w7, r10 C M I
- ;;
- ld8 v6 = [vp], 8 C M01
- ld8 u6 = [up], 8 C M01
- ADDSUB w0 = u0, v0 C M I
- ;;
- add r11 = 512, vp
- ld8 v7 = [vp], 8 C M01
- add r10 = 512, up
- ld8 u7 = [up], 8 C M01
- br .LL01x C B
-
-.Lb011: ld8 v7 = [vp], 8 C M01
- ld8 u7 = [up], 8 C M01
- ADDSUB w6 = r10, r11 C M I
- ;;
- ld8 v0 = [vp], 8 C M01
- ld8 u0 = [up], 8 C M01
- (p15) br.cond.dpnt .grt3 C B
- ;;
- cmp.PRED p8, p0 = w6, r10 C M I
- ADDSUB w7 = u7, v7 C M I
- ;;
- st8 [rp] = w6, 8 C M23
- cmp.PRED p9, p0 = w7, u7 C M I
- br .Lcj3 C B
-
-.grt3: ld8 v1 = [vp], 8 C M01
- ld8 u1 = [up], 8 C M01
- add rpx = 32, rp C M I
- ;;
- ld8 v2 = [vp], 8 C M01
- ld8 u2 = [up], 8 C M01
- shr.u n = n, 3 C I0
- ;;
- ld8 v3 = [vp], 8 C M01
- ld8 u3 = [up], 8 C M01
- cmp.PRED p8, p0 = w6, r10 C M I
- ;;
- ld8 v4 = [vp], 8 C M01
- ld8 u4 = [up], 8 C M01
- mov.i ar.lc = n C I0
- ADDSUB w7 = u7, v7 C M I
- nop.i 0
- nop.b 0
- ;;
- ld8 v5 = [vp], 8 C M01
- ld8 u5 = [up], 8 C M01
- cmp.PRED p9, p0 = w7, u7 C M I
- ;;
- add r11 = 512, vp
- ld8 v6 = [vp], 8 C M01
- add r10 = 512, up
- ld8 u6 = [up], 8 C M01
- (p8) cmp.eq.or p9, p0 = LIM, w7 C M I
- ;;
- ld8 v7 = [vp], 8 C M01
- ld8 u7 = [up], 8 C M01
- (p8) add w7 = INCR, w7 C M I
- st8 [rp] = w6, 8 C M23
- ADDSUB w0 = u0, v0 C M I
- br .LL01x C B
-
-.Lb100: ld8 v6 = [vp], 8 C M01
- ld8 u6 = [up], 8 C M01
- add rpx = 8, rp C M I
- ;;
- ld8 v7 = [vp], 8 C M01
- ld8 u7 = [up], 8 C M01
- ADDSUB w5 = r10, r11 C M I
- ;;
- ld8 v0 = [vp], 8 C M01
- ld8 u0 = [up], 8 C M01
- (p15) br.cond.dpnt .grt4 C B
- ;;
- cmp.PRED p7, p0 = w5, r10 C M I
- ADDSUB w6 = u6, v6 C M I
- ;;
- cmp.PRED p8, p0 = w6, u6 C M I
- ADDSUB w7 = u7, v7 C M I
- br .Lcj4 C B
-
-.grt4: ld8 v1 = [vp], 8 C M01
- ld8 u1 = [up], 8 C M01
- shr.u n = n, 3 C I0
- cmp.PRED p7, p0 = w5, r10 C M I
- ;;
- ld8 v2 = [vp], 8 C M01
- ld8 u2 = [up], 8 C M01
- ADDSUB w6 = u6, v6 C M I
- ;;
- ld8 v3 = [vp], 8 C M01
- ld8 u3 = [up], 8 C M01
- cmp.PRED p8, p0 = w6, u6 C M I
- ;;
- ld8 v4 = [vp], 8 C M01
- ld8 u4 = [up], 8 C M01
- mov.i ar.lc = n C I0
- ;;
- ld8 v5 = [vp], 8 C M01
- ld8 u5 = [up], 8 C M01
- ADDSUB w7 = u7, v7 C M I
- ;;
- add r11 = 512, vp
- ld8 v6 = [vp], 8 C M01
- add r10 = 512, up
- ld8 u6 = [up], 8 C M01
- cmp.PRED p9, p0 = w7, u7 C M I
- ;;
- ld8 v7 = [vp], 8 C M01
- ld8 u7 = [up], 8 C M01
- (p7) cmp.eq.or p8, p0 = LIM, w6 C M I
- (p7) add w6 = INCR, w6 C M I
- br .LL100 C B
-
-.Lb101: ld8 v5 = [vp], 8 C M01
- ld8 u5 = [up], 8 C M01
- add rpx = 16, rp C M I
- ;;
- ld8 v6 = [vp], 8 C M01
- ld8 u6 = [up], 8 C M01
- ADDSUB w4 = r10, r11 C M I
- ;;
- ld8 v7 = [vp], 8 C M01
- ld8 u7 = [up], 8 C M01
- cmp.PRED p6, p0 = w4, r10 C M I
- ;;
- ld8 v0 = [vp], 8 C M01
- ld8 u0 = [up], 8 C M01
- ADDSUB w5 = u5, v5 C M I
- shr.u n = n, 3 C I0
- (p15) br.cond.dpnt .grt5 C B
- ;;
- cmp.PRED p7, p0 = w5, u5 C M I
- ADDSUB w6 = u6, v6 C M I
- br .Lcj5 C B
-
-.grt5: ld8 v1 = [vp], 8 C M01
- ld8 u1 = [up], 8 C M01
- ;;
- ld8 v2 = [vp], 8 C M01
- ld8 u2 = [up], 8 C M01
- mov.i ar.lc = n C I0
- ;;
- ld8 v3 = [vp], 8 C M01
- ld8 u3 = [up], 8 C M01
- cmp.PRED p7, p0 = w5, u5 C M I
- ;;
- ld8 v4 = [vp], 8 C M01
- ld8 u4 = [up], 8 C M01
- ADDSUB w6 = u6, v6 C M I
- ;;
- add r11 = 512, vp
- ld8 v5 = [vp], 8 C M01
- add r10 = 512, up
- ld8 u5 = [up], 8 C M01
- br .LL101 C B
-
-.Lb110: ld8 v4 = [vp], 8 C M01
- ld8 u4 = [up], 8 C M01
- add rpx = 24, rp C M I
- ;;
- ld8 v5 = [vp], 8 C M01
- ld8 u5 = [up], 8 C M01
- ADDSUB w3 = r10, r11 C M I
- ;;
- ld8 v6 = [vp], 8 C M01
- ld8 u6 = [up], 8 C M01
- shr.u n = n, 3 C I0
- ;;
- ld8 v7 = [vp], 8 C M01
- ld8 u7 = [up], 8 C M01
- cmp.PRED p9, p0 = w3, r10 C M I
- ;;
- ld8 v0 = [vp], 8 C M01
- ld8 u0 = [up], 8 C M01
- ADDSUB w4 = u4, v4 C M I
- (p14) br.cond.dptk .Lcj67 C B
- ;;
-
-.grt6: ld8 v1 = [vp], 8 C M01
- ld8 u1 = [up], 8 C M01
- mov.i ar.lc = n C I0
- cmp.PRED p9, p0 = w3, r10 C M I
- nop.i 0
- nop.b 0
- ;;
- ld8 v2 = [vp], 8 C M01
- ld8 u2 = [up], 8 C M01
- ADDSUB w4 = u4, v4 C M I
- ;;
- add r11 = 512, vp
- ld8 v3 = [vp], 8 C M01
- add r10 = 512, up
- ld8 u3 = [up], 8 C M01
- br .LL11x C B
-
-.Lb111: ld8 v3 = [vp], 8 C M01
- ld8 u3 = [up], 8 C M01
- add rpx = 32, rp C M I
- ;;
- ld8 v4 = [vp], 8 C M01
- ld8 u4 = [up], 8 C M01
- ADDSUB w2 = r10, r11 C M I
- ;;
- ld8 v5 = [vp], 8 C M01
- ld8 u5 = [up], 8 C M01
- cmp.PRED p8, p0 = w2, r10 C M I
- ;;
- ld8 v6 = [vp], 8 C M01
- ld8 u6 = [up], 8 C M01
- ADDSUB w3 = u3, v3 C M I
- ;;
- ld8 v7 = [vp], 8 C M01
- ld8 u7 = [up], 8 C M01
- cmp.PRED p9, p0 = w3, u3 C M I
- ;;
- ld8 v0 = [vp], 8 C M01
- ld8 u0 = [up], 8 C M01
- (p15) br.cond.dpnt .grt7 C B
- ;;
- st8 [rp] = w2, 8 C M23
- (p8) cmp.eq.or p9, p0 = LIM, w3 C M I
- (p8) add w3 = INCR, w3 C M I
- ADDSUB w4 = u4, v4 C M I
- br .Lcj67 C B
-
-.grt7: ld8 v1 = [vp], 8 C M01
- ld8 u1 = [up], 8 C M01
- shr.u n = n, 3 C I0
- (p8) cmp.eq.or p9, p0 = LIM, w3 C M I
- nop.i 0
- nop.b 0
- ;;
- add r11 = 512, vp
- ld8 v2 = [vp], 8 C M01
- add r10 = 512, up
- ld8 u2 = [up], 8 C M01
- (p8) add w3 = INCR, w3 C M I
- nop.b 0
- ;;
- ld8 v3 = [vp], 8 C M01
- ld8 u3 = [up], 8 C M01
- mov.i ar.lc = n C I0
- st8 [rp] = w2, 8 C M23
- ADDSUB w4 = u4, v4 C M I
- br .LL11x C B
C *** MAIN LOOP START ***
ALIGN(32)
-.Loop: ld8 v1 = [vp], 8 C M01
- cmp.PRED p7, p0 = w1, u1 C M I
- (p9) cmp.eq.or p6, p0 = LIM, w0 C M I
- ld8 u1 = [up], 8 C M01
- (p9) add w0 = INCR, w0 C M I
- ADDSUB w2 = u2, v2 C M I
- ;;
- ld8 v2 = [vp], 8 C M01
- cmp.PRED p8, p0 = w2, u2 C M I
- (p6) cmp.eq.or p7, p0 = LIM, w1 C M I
- ld8 u2 = [up], 8 C M01
- (p6) add w1 = INCR, w1 C M I
- ADDSUB w3 = u3, v3 C M I
- ;;
- st8 [rp] = w0, 8 C M23
- ld8 v3 = [vp], 8 C M01
- cmp.PRED p9, p0 = w3, u3 C M I
- (p7) cmp.eq.or p8, p0 = LIM, w2 C M I
- ld8 u3 = [up], 8 C M01
- (p7) add w2 = INCR, w2 C M I
- ;;
-.LL000: st8 [rp] = w1, 16 C M23
- st8 [rpx] = w2, 32 C M23
- (p8) cmp.eq.or p9, p0 = LIM, w3 C M I
- lfetch [r10], 64
- (p8) add w3 = INCR, w3 C M I
- ADDSUB w4 = u4, v4 C M I
- ;;
-.LL11x: st8 [rp] = w3, 8 C M23
- ld8 v4 = [vp], 8 C M01
- cmp.PRED p6, p0 = w4, u4 C M I
- ld8 u4 = [up], 8 C M01
- ADDSUB w5 = u5, v5 C M I
- ;;
- ld8 v5 = [vp], 8 C M01
- cmp.PRED p7, p0 = w5, u5 C M I
- (p9) cmp.eq.or p6, p0 = LIM, w4 C M I
- ld8 u5 = [up], 8 C M01
- (p9) add w4 = INCR, w4 C M I
- ADDSUB w6 = u6, v6 C M I
- ;;
-.LL101: ld8 v6 = [vp], 8 C M01
- cmp.PRED p8, p0 = w6, u6 C M I
- (p6) cmp.eq.or p7, p0 = LIM, w5 C M I
- ld8 u6 = [up], 8 C M01
- (p6) add w5 = INCR, w5 C M I
- ADDSUB w7 = u7, v7 C M I
- ;;
- st8 [rp] = w4, 8 C M23
- ld8 v7 = [vp], 8 C M01
- cmp.PRED p9, p0 = w7, u7 C M I
- (p7) cmp.eq.or p8, p0 = LIM, w6 C M I
- ld8 u7 = [up], 8 C M01
- (p7) add w6 = INCR, w6 C M I
- ;;
-.LL100: st8 [rp] = w5, 16 C M23
- st8 [rpx] = w6, 32 C M23
- (p8) cmp.eq.or p9, p0 = LIM, w7 C M I
- lfetch [r11], 64
- (p8) add w7 = INCR, w7 C M I
- ADDSUB w0 = u0, v0 C M I
- ;;
-.LL01x: st8 [rp] = w7, 8 C M23
- ld8 v0 = [vp], 8 C M01
- cmp.PRED p6, p0 = w0, u0 C M I
- ld8 u0 = [up], 8 C M01
- ADDSUB w1 = u1, v1 C M I
- br.cloop.dptk .Loop C B
+L(top):
+L(c5): ld8 v1 = [vp], 8 C M01
+ cmp.CND p7, p0 = w1, u1 C M I
+ (p9) cmpeqor p6, p0 = LIM, w0 C M I
+ ld8 u1 = [up], 8 C M01
+ (p9) add w0 = INCR, w0 C M I
+ ADDSUB w2 = u2, v2 C M I
+ ;;
+L(m5): ld8 v2 = [vp], 8 C M01
+ cmp.CND p8, p0 = w2, u2 C M I
+ (p6) cmpeqor p7, p0 = LIM, w1 C M I
+ ld8 u2 = [up], 8 C M01
+ (p6) add w1 = INCR, w1 C M I
+ ADDSUB w3 = u3, v3 C M I
+ ;;
+ st8 [rp] = w0, 8 C M23
+ ld8 v3 = [vp], 8 C M01
+ cmp.CND p9, p0 = w3, u3 C M I
+ (p7) cmpeqor p8, p0 = LIM, w2 C M I
+ ld8 u3 = [up], 8 C M01
+ (p7) add w2 = INCR, w2 C M I
+ ;;
+L(m4): st8 [rp] = w1, 16 C M23
+ st8 [rpx] = w2, 32 C M23
+ (p8) cmpeqor p9, p0 = LIM, w3 C M I
+ lfetch [upadv], 64
+ (p8) add w3 = INCR, w3 C M I
+ ADDSUB w0 = u0, v0 C M I
+ ;;
+L(m23): st8 [rp] = w3, 8 C M23
+ ld8 v0 = [vp], 8 C M01
+ cmp.CND p6, p0 = w0, u0 C M I
+ ld8 u0 = [up], 8 C M01
+ ADDSUB w1 = u1, v1 C M I
+ nop.b 0
+ ;;
+L(c1): ld8 v1 = [vp], 8 C M01
+ cmp.CND p7, p0 = w1, u1 C M I
+ (p9) cmpeqor p6, p0 = LIM, w0 C M I
+ ld8 u1 = [up], 8 C M01
+ (p9) add w0 = INCR, w0 C M I
+ ADDSUB w2 = u2, v2 C M I
+ ;;
+L(m1): ld8 v2 = [vp], 8 C M01
+ cmp.CND p8, p0 = w2, u2 C M I
+ (p6) cmpeqor p7, p0 = LIM, w1 C M I
+ ld8 u2 = [up], 8 C M01
+ (p6) add w1 = INCR, w1 C M I
+ ADDSUB w3 = u3, v3 C M I
+ ;;
+ st8 [rp] = w0, 8 C M23
+ ld8 v3 = [vp], 8 C M01
+ cmp.CND p9, p0 = w3, u3 C M I
+ (p7) cmpeqor p8, p0 = LIM, w2 C M I
+ ld8 u3 = [up], 8 C M01
+ (p7) add w2 = INCR, w2 C M I
+ ;;
+L(m0): st8 [rp] = w1, 16 C M23
+ st8 [rpx] = w2, 32 C M23
+ (p8) cmpeqor p9, p0 = LIM, w3 C M I
+ lfetch [vpadv], 64
+ (p8) add w3 = INCR, w3 C M I
+ ADDSUB w0 = u0, v0 C M I
+ ;;
+L(m67): st8 [rp] = w3, 8 C M23
+ ld8 v0 = [vp], 8 C M01
+ cmp.CND p6, p0 = w0, u0 C M I
+ ld8 u0 = [up], 8 C M01
+ ADDSUB w1 = u1, v1 C M I
+ br.cloop.dptk L(top) C B
;;
C *** MAIN LOOP END ***
- cmp.PRED p7, p0 = w1, u1 C M I
- (p9) cmp.eq.or p6, p0 = LIM, w0 C M I
- (p9) add w0 = INCR, w0 C M I
- ADDSUB w2 = u2, v2 C M I
- ;;
-.Lcj9: cmp.PRED p8, p0 = w2, u2 C M I
- (p6) cmp.eq.or p7, p0 = LIM, w1 C M I
- st8 [rp] = w0, 8 C M23
- (p6) add w1 = INCR, w1 C M I
- ADDSUB w3 = u3, v3 C M I
- ;;
- cmp.PRED p9, p0 = w3, u3 C M I
- (p7) cmp.eq.or p8, p0 = LIM, w2 C M I
- (p7) add w2 = INCR, w2 C M I
- ;;
-.Lcj8: st8 [rp] = w1, 16 C M23
- st8 [rpx] = w2, 32 C M23
- (p8) cmp.eq.or p9, p0 = LIM, w3 C M I
- (p8) add w3 = INCR, w3 C M I
- ADDSUB w4 = u4, v4 C M I
- ;;
-.Lcj67: st8 [rp] = w3, 8 C M23
- cmp.PRED p6, p0 = w4, u4 C M I
- ADDSUB w5 = u5, v5 C M I
- ;;
- cmp.PRED p7, p0 = w5, u5 C M I
- (p9) cmp.eq.or p6, p0 = LIM, w4 C M I
- (p9) add w4 = INCR, w4 C M I
- ADDSUB w6 = u6, v6 C M I
- ;;
-.Lcj5: cmp.PRED p8, p0 = w6, u6 C M I
- (p6) cmp.eq.or p7, p0 = LIM, w5 C M I
- st8 [rp] = w4, 8 C M23
- (p6) add w5 = INCR, w5 C M I
- ADDSUB w7 = u7, v7 C M I
- ;;
-.Lcj4: cmp.PRED p9, p0 = w7, u7 C M I
- (p7) cmp.eq.or p8, p0 = LIM, w6 C M I
- (p7) add w6 = INCR, w6 C M I
- ;;
- st8 [rp] = w5, 16 C M23
- st8 [rpx] = w6, 32 C M23
-.Lcj3:
- (p8) cmp.eq.or p9, p0 = LIM, w7 C M I
- (p8) add w7 = INCR, w7 C M I
- ADDSUB w0 = u0, v0 C M I
- ;;
-.Lcj2: st8 [rp] = w7, 8 C M23
- cmp.PRED p6, p0 = w0, u0 C M I
- ;;
- (p9) cmp.eq.or p6, p0 = LIM, w0 C M I
- (p9) add w0 = INCR, w0 C M I
- mov r8 = 0 C M I
- ;;
-.Lcj1: st8 [rp] = w0, 8 C M23
- mov.i ar.lc = r2 C I0
- (p6) mov r8 = 1 C M I
- br.ret.sptk.many b0 C B
+L(end):
+.mmi;
+ (p9) cmpeqor p6, p0 = LIM, w0 C M I
+ (p9) add w0 = INCR, w0 C M I
+ mov ar.lc = r2 C I0
+L(cj5):
+.mmi; cmp.CND p7, p0 = w1, u1 C M I
+ ADDSUB w2 = u2, v2 C M I
+ nop 0
+ ;;
+.mmi; st8 [rp] = w0, 8 C M23
+ (p6) cmpeqor p7, p0 = LIM, w1 C M I
+ (p6) add w1 = INCR, w1 C M I
+L(cj4):
+.mmi; cmp.CND p8, p0 = w2, u2 C M I
+ ADDSUB w3 = u3, v3 C M I
+ nop 0
+ ;;
+.mmi; st8 [rp] = w1, 8 C M23
+ (p7) cmpeqor p8, p0 = LIM, w2 C M I
+ (p7) add w2 = INCR, w2 C M I
+L(cj3):
+.mmi; cmp.CND p9, p0 = w3, u3 C M I
+ ADDSUB w0 = u0, v0 C M I
+ nop 0
+ ;;
+.mmi; st8 [rp] = w2, 8 C M23
+ (p8) cmpeqor p9, p0 = LIM, w3 C M I
+ (p8) add w3 = INCR, w3 C M I
+.mmi; cmp.CND p6, p0 = w0, u0 C M I
+ nop 0
+ mov r8 = 0 C M I
+ ;;
+L(cj2):
+.mmi; st8 [rp] = w3, 8 C M23
+ (p9) cmpeqor p6, p0 = LIM, w0 C M I
+ (p9) add w0 = INCR, w0 C M I
+ ;;
+L(cj1):
+.mmb; st8 [rp] = w0, 8 C M23
+ (p6) mov r8 = 1 C M I
+ br.ret.sptk.many b0 C B
EPILOGUE()
ASM_END()
dnl IA-64 mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
-dnl Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2003, 2004, 2005, 2010 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
C Itanium: 3.0
C Itanium 2: 1.5
-C TODO
-C * Use shladd in feed-in code (for mpn_addlsh1_n).
-C INPUT PARAMETERS
-define(`rp',`r32')
-define(`up',`r33')
-define(`vp',`r34')
-define(`n',`r35')
+define(LSH, 1)
+define(RSH, 63)
ifdef(`OPERATION_addlsh1_n',`
define(ADDSUB, add)
- define(PRED, ltu)
+ define(ADDP, 1)
+ define(CND, ltu)
define(INCR, 1)
define(LIM, -1)
define(func, mpn_addlsh1_n)
')
ifdef(`OPERATION_sublsh1_n',`
define(ADDSUB, sub)
- define(PRED, gtu)
+ define(CND, gtu)
define(INCR, -1)
define(LIM, 0)
define(func, mpn_sublsh1_n)
')
-C Some useful aliases for registers we use
-define(`u0',`r14') define(`u1',`r15') define(`u2',`r16') define(`u3',`r17')
-define(`v0',`r18') define(`v1',`r19') define(`v2',`r20') define(`v3',`r21')
-define(`w0',`r22') define(`w1',`r23') define(`w2',`r24') define(`w3',`r25')
-define(`s0',`r26') define(`s1',`r27') define(`s2',`r28') define(`s3',`r29')
-define(`x0',`r30') define(`x1',`r31') define(`x2',`r30') define(`x3',`r31')
MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
-ASM_START()
-PROLOGUE(func)
- .prologue
- .save ar.lc, r2
- .body
-ifdef(`HAVE_ABI_32',`
- addp4 rp = 0, rp C M I
- addp4 up = 0, up C M I
- addp4 vp = 0, vp C M I
- zxt4 n = n C I
- ;;
-')
- {.mmi; ld8 r11 = [vp], 8 C M01
- ld8 r10 = [up], 8 C M01
- mov.i r2 = ar.lc C I0
-}{.mmi; and r14 = 3, n C M I
- cmp.lt p15, p0 = 4, n C M I
- add n = -4, n C M I
- ;;
-}{.mmi; cmp.eq p6, p0 = 1, r14 C M I
- cmp.eq p7, p0 = 2, r14 C M I
- cmp.eq p8, p0 = 3, r14 C M I
-}{.bbb
- (p6) br.dptk .Lb01 C B
- (p7) br.dptk .Lb10 C B
- (p8) br.dptk .Lb11 C B
-}
-
-.Lb00: ld8 v0 = [vp], 8 C M01
- ld8 u0 = [up], 8 C M01
- shr.u n = n, 2 C I0
- ;;
- ld8 v1 = [vp], 8 C M01
- ld8 u1 = [up], 8 C M01
- add x3 = r11, r11 C M I
- ;;
- ld8 v2 = [vp], 8 C M01
- ld8 u2 = [up], 8 C M01
- ADDSUB w3 = r10, x3 C M I
- (p15) br.dpnt .grt4 C B
- ;;
- shrp x0 = v0, r11, 63 C I0
- cmp.PRED p8, p0 = w3, r10 C M I
- ;;
- shrp x1 = v1, v0, 63 C I0
- ADDSUB w0 = u0, x0 C M I
- ;;
- cmp.PRED p6, p0 = w0, u0 C M I
- ADDSUB w1 = u1, x1 C M I
- br .Lcj4 C B
-
-.grt4: ld8 v3 = [vp], 8 C M01
- shrp x0 = v0, r11, 63 C I0
- cmp.PRED p8, p0 = w3, r10 C M I
- add n = -1, n
- ;;
- ld8 u3 = [up], 8 C M01
- mov.i ar.lc = n C I0
- shrp x1 = v1, v0, 63 C I0
- ld8 v0 = [vp], 8 C M01
- ADDSUB w0 = u0, x0 C M I
- ;;
- cmp.PRED p6, p0 = w0, u0 C M I
- ld8 u0 = [up], 8 C M01
- ADDSUB w1 = u1, x1 C M I
- br .LL00 C B
-
-.Lb01: add x2 = r11, r11 C M I
- shr.u n = n, 2 C I0
- (p15) br.dpnt .grt1 C B
- ;;
- ADDSUB w2 = r10, x2 C M I
- shr.u r8 = r11, 63 C retval I0
- ;;
- cmp.PRED p6, p0 = w2, r10 C M I
- ;;
- st8 [rp] = w2, 8 C M23
- (p6) add r8 = 1, r8 C M I
- br.ret.sptk.many b0 C B
-
-.grt1: ld8 v3 = [vp], 8 C M01
- ld8 u3 = [up], 8 C M01
- mov.i ar.lc = n C FIXME swap with next I0
- ;;
- ld8 v0 = [vp], 8 C M01
- ld8 u0 = [up], 8 C M01
- ADDSUB w2 = r10, x2
- ;;
- ld8 v1 = [vp], 8 C M01
- ld8 u1 = [up], 8 C M01
- shrp x3 = v3, r11, 63 C I0
- ;;
- ld8 v2 = [vp], 8 C M01
- ld8 u2 = [up], 8 C M01
- cmp.PRED p6, p0 = w2, r10 C M I
- ADDSUB w3 = u3, x3 C M I
- br.cloop.dptk .grt5 C B
- ;;
- shrp x0 = v0, v3, 63 C I0
- cmp.PRED p8, p0 = w3, u3 C M I
- br .Lcj5 C B
-
-.grt5: shrp x0 = v0, v3, 63 C I0
- ld8 v3 = [vp], 8 C M01
- cmp.PRED p8, p0 = w3, u3 C M I
- br .LL01 C B
-
-.Lb10: ld8 v2 = [vp], 8 C M01
- ld8 u2 = [up], 8 C M01
- shr.u n = n, 2 C I0
- add x1 = r11, r11 C M I
- (p15) br.dpnt .grt2 C B
- ;;
- ADDSUB w1 = r10, x1 C M I
- shrp x2 = v2, r11, 63 C I0
- ;;
- cmp.PRED p8, p0 = w1, r10 C M I
- ADDSUB w2 = u2, x2 C M I
- shr.u r8 = v2, 63 C retval I0
- ;;
- cmp.PRED p6, p0 = w2, u2 C M I
- br .Lcj2 C B
-
-.grt2: ld8 v3 = [vp], 8 C M01
- ld8 u3 = [up], 8 C M01
- mov.i ar.lc = n C I0
- ;;
- ld8 v0 = [vp], 8 C M01
- ld8 u0 = [up], 8 C M01
- ADDSUB w1 = r10, x1 C M I
- ;;
- ld8 v1 = [vp], 8 C M01
- shrp x2 = v2, r11, 63 C I0
- cmp.PRED p8, p0 = w1, r10 C M I
- ;;
- ld8 u1 = [up], 8 C M01
- shrp x3 = v3, v2, 63 C I0
- ld8 v2 = [vp], 8 C M01
- ADDSUB w2 = u2, x2 C M I
- ;;
- cmp.PRED p6, p0 = w2, u2 C M I
- ld8 u2 = [up], 8 C M01
- ADDSUB w3 = u3, x3 C M I
- br.cloop.dpnt .Loop C B
- br .Lskip C B
-
-.Lb11: ld8 v1 = [vp], 8 C M01
- ld8 u1 = [up], 8 C M01
- shr.u n = n, 2 C I0
- add x0 = r11, r11 C M I
- ;;
- ld8 v2 = [vp], 8 C M01
- ld8 u2 = [up], 8 C M01
- (p15) br.dpnt .grt3 C B
- ;;
-
- shrp x1 = v1, r11, 63 C I0
- ADDSUB w0 = r10, x0 C M I
- ;;
- cmp.PRED p6, p0 = w0, r10 C M I
- ADDSUB w1 = u1, x1 C M I
- ;;
- shrp x2 = v2, v1, 63 C I0
- cmp.PRED p8, p0 = w1, u1 C M I
- br .Lcj3 C B
-
-.grt3: ld8 v3 = [vp], 8 C M01
- ld8 u3 = [up], 8 C M01
- mov.i ar.lc = n C I0
- shrp x1 = v1, r11, 63 C I0
- ADDSUB w0 = r10, x0 C M I
- ;;
- ld8 v0 = [vp], 8 C M01
- cmp.PRED p6, p0 = w0, r10 C M I
- ld8 u0 = [up], 8 C M01
- ADDSUB w1 = u1, x1 C M I
- ;;
- shrp x2 = v2, v1, 63 C I0
- ld8 v1 = [vp], 8 C M01
- cmp.PRED p8, p0 = w1, u1 C M I
- br .LL11 C B
-
-
-C *** MAIN LOOP START ***
- ALIGN(32)
-.Loop: st8 [rp] = w1, 8 C M23
- shrp x0 = v0, v3, 63 C I0
- (p8) cmp.eq.or p6, p0 = LIM, w2 C M I
- (p8) add w2 = INCR, w2 C M I
- ld8 v3 = [vp], 8 C M01
- cmp.PRED p8, p0 = w3, u3 C M I
- ;;
-.LL01: ld8 u3 = [up], 8 C M01
- shrp x1 = v1, v0, 63 C I0
- (p6) cmp.eq.or p8, p0 = LIM, w3 C M I
- (p6) add w3 = INCR, w3 C M I
- ld8 v0 = [vp], 8 C M01
- ADDSUB w0 = u0, x0 C M I
- ;;
- st8 [rp] = w2, 8 C M23
- cmp.PRED p6, p0 = w0, u0 C M I
- ld8 u0 = [up], 8 C M01
- ADDSUB w1 = u1, x1 C M I
- ;;
-.LL00: st8 [rp] = w3, 8 C M23
- shrp x2 = v2, v1, 63 C I0
- (p8) cmp.eq.or p6, p0 = LIM, w0 C M I
- (p8) add w0 = INCR, w0 C M I
- ld8 v1 = [vp], 8 C M01
- cmp.PRED p8, p0 = w1, u1 C M I
- ;;
-.LL11: ld8 u1 = [up], 8 C M01
- shrp x3 = v3, v2, 63 C I0
- (p6) cmp.eq.or p8, p0 = LIM, w1 C M I
- (p6) add w1 = INCR, w1 C M I
- ld8 v2 = [vp], 8 C M01
- ADDSUB w2 = u2, x2 C M I
- ;;
- st8 [rp] = w0, 8 C M23
- cmp.PRED p6, p0 = w2, u2 C M I
- ld8 u2 = [up], 8 C M01
- ADDSUB w3 = u3, x3 C M I
- br.cloop.dptk .Loop C B
- ;;
-C *** MAIN LOOP END ***
-
-.Lskip: st8 [rp] = w1, 8 C M23
- shrp x0 = v0, v3, 63 C I0
- (p8) cmp.eq.or p6, p0 = LIM, w2 C M I
- (p8) add w2 = INCR, w2 C M I
- cmp.PRED p8, p0 = w3, u3 C M I
- ;;
-.Lcj5: shrp x1 = v1, v0, 63 C I0
- (p6) cmp.eq.or p8, p0 = LIM, w3 C M I
- (p6) add w3 = INCR, w3 C M I
- ADDSUB w0 = u0, x0 C M I
- ;;
- st8 [rp] = w2, 8 C M23
- cmp.PRED p6, p0 = w0, u0 C M I
- ADDSUB w1 = u1, x1 C M I
- ;;
-.Lcj4: st8 [rp] = w3, 8 C M23
- shrp x2 = v2, v1, 63 C I0
- (p8) cmp.eq.or p6, p0 = LIM, w0 C M I
- (p8) add w0 = INCR, w0 C M I
- cmp.PRED p8, p0 = w1, u1 C M I
- ;;
-.Lcj3: shr.u r8 = v2, 63 C I0
- (p6) cmp.eq.or p8, p0 = LIM, w1 C M I
- (p6) add w1 = INCR, w1 C M I
- ADDSUB w2 = u2, x2 C M I
- ;;
- st8 [rp] = w0, 8 C M23
- cmp.PRED p6, p0 = w2, u2 C M I
- ;;
-.Lcj2: st8 [rp] = w1, 8 C M23
- (p8) cmp.eq.or p6, p0 = LIM, w2 C M I
- (p8) add w2 = INCR, w2 C M I
- ;;
-.Lcj1: st8 [rp] = w2, 8 C M23
- mov.i ar.lc = r2 C I0
- (p6) add r8 = 1, r8 C M I
- br.ret.sptk.many b0 C B
-EPILOGUE()
-ASM_END()
+include_mpn(`ia64/aorslshC_n.asm')
--- /dev/null
+dnl IA-64 mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2003, 2004, 2005, 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Itanium: 3.0
+C Itanium 2: 1.5
+
+
+define(LSH, 2)
+define(RSH, 62)
+
+ifdef(`OPERATION_addlsh2_n',`
+ define(ADDSUB, add)
+ define(ADDP, 1)
+ define(CND, ltu)
+ define(INCR, 1)
+ define(LIM, -1)
+ define(func, mpn_addlsh2_n)
+')
+ifdef(`OPERATION_sublsh2_n',`
+ define(ADDSUB, sub)
+ define(CND, gtu)
+ define(INCR, -1)
+ define(LIM, 0)
+ define(func, mpn_sublsh2_n)
+')
+
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n)
+
+include_mpn(`ia64/aorslshC_n.asm')
--- /dev/null
+dnl IA-64 mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2003, 2004, 2005, 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+C cycles/limb
+C Itanium: ?
+C Itanium 2: 1.5
+
+C TODO
+C * Use shladd in feed-in code (for mpn_addlshC_n).
+
+C INPUT PARAMETERS
+define(`rp', `r32')
+define(`up', `r33')
+define(`vp', `r34')
+define(`n', `r35')
+
+define(cmpeqor, `cmp.eq.or')
+define(PFDIST, 500)
+
+define(`u0',`r14') define(`u1',`r15') define(`u2',`r16') define(`u3',`r17')
+define(`v0',`r18') define(`v1',`r19') define(`v2',`r20') define(`v3',`r21')
+define(`w0',`r22') define(`w1',`r23') define(`w2',`r24') define(`w3',`r25')
+define(`s0',`r26') define(`s1',`r27') define(`s2',`r28') define(`s3',`r29')
+define(`x0',`r30') define(`x1',`r31') define(`x2',`r30') define(`x3',`r31')
+
+
+ASM_START()
+PROLOGUE(func)
+ .prologue
+ .save ar.lc, r2
+ .body
+ifdef(`HAVE_ABI_32',`
+ addp4 rp = 0, rp C M I
+ addp4 up = 0, up C M I
+ addp4 vp = 0, vp C M I
+ zxt4 n = n C I
+ ;;
+')
+ {.mmi; ld8 r11 = [vp], 8 C M01
+ ld8 r10 = [up], 8 C M01
+ mov.i r2 = ar.lc C I0
+}{.mmi; and r14 = 3, n C M I
+ cmp.lt p15, p0 = 4, n C M I
+ add n = -5, n C M I
+ ;;
+}{.mmi; cmp.eq p6, p0 = 1, r14 C M I
+ cmp.eq p7, p0 = 2, r14 C M I
+ cmp.eq p8, p0 = 3, r14 C M I
+}{.bbb
+ (p6) br.dptk .Lb01 C B
+ (p7) br.dptk .Lb10 C B
+ (p8) br.dptk .Lb11 C B
+}
+
+.Lb00: ld8 v0 = [vp], 8 C M01
+ ld8 u0 = [up], 8 C M01
+ shr.u n = n, 2 C I0
+ ;;
+.mmi; ld8 v1 = [vp], 8 C M01
+ ld8 u1 = [up], 8 C M01
+ shl x3 = r11, LSH C I0
+ ;;
+.mmi; ld8 v2 = [vp], 8 C M01
+ ld8 u2 = [up], 8 C M01
+ shrp x0 = v0, r11, RSH C I0
+.mmb; ADDSUB w3 = r10, x3 C M I
+ nop 0
+ (p15) br.dpnt .grt4 C B
+ ;;
+.mii; cmp.CND p7, p0 = w3, r10 C M I
+ shrp x1 = v1, v0, RSH C I0
+ ADDSUB w0 = u0, x0 C M I
+ ;;
+.mii; cmp.CND p8, p0 = w0, u0 C M I
+ shrp x2 = v2, v1, RSH C I0
+ ADDSUB w1 = u1, x1 C M I
+.mmb; nop 0
+ nop 0
+ br .Lcj4 C B
+
+ALIGN(32)
+.grt4: ld8 v3 = [vp], 8 C M01
+ shrp x0 = v0, r11, RSH C I0
+ cmp.CND p8, p0 = w3, r10 C M I
+ ;;
+.mmi; ld8 u3 = [up], 8 C M01
+ add r11 = PFDIST, vp
+ shrp x1 = v1, v0, RSH C I0
+.mmi; ld8 v0 = [vp], 8 C M01
+ ADDSUB w0 = u0, x0 C M I
+ nop 0
+ ;;
+.mmi; cmp.CND p6, p0 = w0, u0 C M I
+ add r10 = PFDIST, up
+ mov.i ar.lc = n C I0
+.mmb; ADDSUB w1 = u1, x1 C M I
+ ld8 u0 = [up], 8 C M01
+ br .LL00 C B
+
+
+ ALIGN(32)
+.Lb01:
+ifdef(`ADDP',
+` shladd w2 = r11, LSH, r10 C M I
+ shr.u r8 = r11, RSH C retval I0
+ (p15) br.dpnt .grt1 C B
+ ;;
+',`
+ shl x2 = r11, LSH C I0
+ (p15) br.dpnt .grt1 C B
+ ;;
+ ADDSUB w2 = r10, x2 C M I
+ shr.u r8 = r11, RSH C retval I0
+ ;;
+')
+ cmp.CND p6, p0 = w2, r10 C M I
+ br .Lcj1
+
+.grt1: ld8 v3 = [vp], 8 C M01
+ ld8 u3 = [up], 8 C M01
+ shr.u n = n, 2 C I0
+ ;;
+ ld8 v0 = [vp], 8 C M01
+ ld8 u0 = [up], 8 C M01
+ mov.i ar.lc = n C FIXME swap with next I0
+ifdef(`ADDP',
+`',`
+ ADDSUB w2 = r10, x2
+')
+ ;;
+.mmi; ld8 v1 = [vp], 8 C M01
+ ld8 u1 = [up], 8 C M01
+ shrp x3 = v3, r11, RSH C I0
+ ;;
+.mmi; ld8 v2 = [vp], 8 C M01
+ ld8 u2 = [up], 8 C M01
+ shrp x0 = v0, v3, RSH C I0
+.mmb; cmp.CND p6, p0 = w2, r10 C M I
+ ADDSUB w3 = u3, x3 C M I
+ br.cloop.dptk .grt5 C B
+ ;;
+.mmi; cmp.CND p7, p0 = w3, u3 C M I
+ ADDSUB w0 = u0, x0 C M I
+ shrp x1 = v1, v0, RSH C I0
+.mmb; nop 0
+ nop 0
+ br .Lcj5 C B
+.grt5:
+.mmi; add r10 = PFDIST, up
+ add r11 = PFDIST, vp
+ shrp x0 = v0, v3, RSH C I0
+.mmb; ld8 v3 = [vp], 8 C M01
+ cmp.CND p8, p0 = w3, u3 C M I
+ br .LL01 C B
+
+ ALIGN(32)
+.Lb10: ld8 v2 = [vp], 8 C M01
+ ld8 u2 = [up], 8 C M01
+ shl x1 = r11, LSH C I0
+.mmb; nop 0
+ nop 0
+ (p15) br.dpnt .grt2 C B
+ ;;
+.mmi; ADDSUB w1 = r10, x1 C M I
+ nop 0
+ shrp x2 = v2, r11, RSH C I0
+ ;;
+.mmi; cmp.CND p9, p0 = w1, r10 C M I
+ ADDSUB w2 = u2, x2 C M I
+ shr.u r8 = v2, RSH C retval I0
+ ;;
+.mmb; cmp.CND p6, p0 = w2, u2 C M I
+ nop 0
+ br .Lcj2 C B
+
+.grt2: ld8 v3 = [vp], 8 C M01
+ ld8 u3 = [up], 8 C M01
+ shr.u n = n, 2 C I0
+ ;;
+.mmi; ld8 v0 = [vp], 8 C M01
+ ld8 u0 = [up], 8 C M01
+ mov.i ar.lc = n C I0
+.mmi; ADDSUB w1 = r10, x1 C M I
+ nop 0
+ nop 0
+ ;;
+.mii; ld8 v1 = [vp], 8 C M01
+ shrp x2 = v2, r11, RSH C I0
+ cmp.CND p8, p0 = w1, r10 C M I
+ ;;
+.mmi; add r10 = PFDIST, up
+ ld8 u1 = [up], 8 C M01
+ shrp x3 = v3, v2, RSH C I0
+.mmi; add r11 = PFDIST, vp
+ ld8 v2 = [vp], 8 C M01
+ ADDSUB w2 = u2, x2 C M I
+ ;;
+.mmi; cmp.CND p6, p0 = w2, u2 C M I
+ ld8 u2 = [up], 8 C M01
+ shrp x0 = v0, v3, RSH C I0
+.mbb; ADDSUB w3 = u3, x3 C M I
+ br.cloop.dpnt L(top) C B
+ br L(end) C B
+
+.Lb11: ld8 v1 = [vp], 8 C M01
+ ld8 u1 = [up], 8 C M01
+ shl x0 = r11, LSH C I0
+ ;;
+.mmi; ld8 v2 = [vp], 8 C M01
+ ld8 u2 = [up], 8 C M01
+ shr.u n = n, 2 C I0
+.mmb; nop 0
+ nop 0
+ (p15) br.dpnt .grt3 C B
+ ;;
+.mii; nop 0
+ shrp x1 = v1, r11, RSH C I0
+ ADDSUB w0 = r10, x0 C M I
+ ;;
+.mii; cmp.CND p8, p0 = w0, r10 C M I
+ shrp x2 = v2, v1, RSH C I0
+ ADDSUB w1 = u1, x1 C M I
+ ;;
+.mmb; cmp.CND p9, p0 = w1, u1 C M I
+ ADDSUB w2 = u2, x2 C M I
+ br .Lcj3 C B
+.grt3:
+.mmi; ld8 v3 = [vp], 8 C M01
+ ld8 u3 = [up], 8 C M01
+ shrp x1 = v1, r11, RSH C I0
+.mmi; ADDSUB w0 = r10, x0 C M I
+ nop 0
+ nop 0
+ ;;
+.mmi; ld8 v0 = [vp], 8 C M01
+ cmp.CND p6, p0 = w0, r10 C M I
+ mov.i ar.lc = n C I0
+.mmi; ld8 u0 = [up], 8 C M01
+ ADDSUB w1 = u1, x1 C M I
+ nop 0
+ ;;
+.mmi; add r10 = PFDIST, up
+ add r11 = PFDIST, vp
+ shrp x2 = v2, v1, RSH C I0
+.mmb; ld8 v1 = [vp], 8 C M01
+ cmp.CND p8, p0 = w1, u1 C M I
+ br .LL11 C B
+
+
+C *** MAIN LOOP START ***
+ ALIGN(32)
+L(top): st8 [rp] = w1, 8 C M23
+ lfetch [r10], 32
+ (p8) cmpeqor p6, p0 = LIM, w2 C M I
+ (p8) add w2 = INCR, w2 C M I
+ ld8 v3 = [vp], 8 C M01
+ cmp.CND p8, p0 = w3, u3 C M I
+ ;;
+.LL01: ld8 u3 = [up], 8 C M01
+ shrp x1 = v1, v0, RSH C I0
+ (p6) cmpeqor p8, p0 = LIM, w3 C M I
+ (p6) add w3 = INCR, w3 C M I
+ ld8 v0 = [vp], 8 C M01
+ ADDSUB w0 = u0, x0 C M I
+ ;;
+ st8 [rp] = w2, 8 C M23
+ cmp.CND p6, p0 = w0, u0 C M I
+ nop.b 0
+ ld8 u0 = [up], 8 C M01
+ lfetch [r11], 32
+ ADDSUB w1 = u1, x1 C M I
+ ;;
+.LL00: st8 [rp] = w3, 8 C M23
+ shrp x2 = v2, v1, RSH C I0
+ (p8) cmpeqor p6, p0 = LIM, w0 C M I
+ (p8) add w0 = INCR, w0 C M I
+ ld8 v1 = [vp], 8 C M01
+ cmp.CND p8, p0 = w1, u1 C M I
+ ;;
+.LL11: ld8 u1 = [up], 8 C M01
+ shrp x3 = v3, v2, RSH C I0
+ (p6) cmpeqor p8, p0 = LIM, w1 C M I
+ (p6) add w1 = INCR, w1 C M I
+ ld8 v2 = [vp], 8 C M01
+ ADDSUB w2 = u2, x2 C M I
+ ;;
+.mmi; st8 [rp] = w0, 8 C M23
+ cmp.CND p6, p0 = w2, u2 C M I
+ shrp x0 = v0, v3, RSH C I0
+ ld8 u2 = [up], 8 C M01
+ ADDSUB w3 = u3, x3 C M I
+ br.cloop.dptk L(top) C B
+ ;;
+C *** MAIN LOOP END ***
+
+L(end):
+.mmi; st8 [rp] = w1, 8 C M23
+ (p8) cmpeqor p6, p0 = LIM, w2 C M I
+ shrp x1 = v1, v0, RSH C I0
+.mmi;
+ (p8) add w2 = INCR, w2 C M I
+ cmp.CND p7, p0 = w3, u3 C M I
+ ADDSUB w0 = u0, x0 C M I
+ ;;
+.Lcj5:
+.mmi; st8 [rp] = w2, 8 C M23
+ (p6) cmpeqor p7, p0 = LIM, w3 C M I
+ shrp x2 = v2, v1, RSH C I0
+.mmi;
+ (p6) add w3 = INCR, w3 C M I
+ cmp.CND p8, p0 = w0, u0 C M I
+ ADDSUB w1 = u1, x1 C M I
+ ;;
+.Lcj4:
+.mmi; st8 [rp] = w3, 8 C M23
+ (p7) cmpeqor p8, p0 = LIM, w0 C M I
+ mov.i ar.lc = r2 C I0
+.mmi;
+ (p7) add w0 = INCR, w0 C M I
+ cmp.CND p9, p0 = w1, u1 C M I
+ ADDSUB w2 = u2, x2 C M I
+ ;;
+.Lcj3:
+.mmi; st8 [rp] = w0, 8 C M23
+ (p8) cmpeqor p9, p0 = LIM, w1 C M I
+ shr.u r8 = v2, RSH C I0
+.mmi;
+ (p8) add w1 = INCR, w1 C M I
+ cmp.CND p6, p0 = w2, u2 C M I
+ nop 0
+ ;;
+.Lcj2:
+.mmi; st8 [rp] = w1, 8 C M23
+ (p9) cmpeqor p6, p0 = LIM, w2 C M I
+ (p9) add w2 = INCR, w2 C M I
+ ;;
+.Lcj1:
+.mmb; st8 [rp] = w2 C M23
+ (p6) add r8 = 1, r8 C M I
+ br.ret.sptk.many b0 C B
+EPILOGUE()
+ASM_END()
dnl IA-64 mpn_bdiv_dbm1.
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
dnl Copyright 2008, 2009 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl IA-64 mpn_copyd -- copy limb vector, decrementing.
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
dnl Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl IA-64 mpn_copyi -- copy limb vector, incrementing.
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
dnl Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl IA-64 mpn_divexact_1 -- mpn by limb exact division.
-dnl Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
+dnl Contributed to the GNU project by Torbjorn Granlund and Kevin Ryde.
+
+dnl Copyright 2003, 2004, 2005, 2010 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
ld8 r21 = [up], 8
br .Lent
-.Loop: ld8 r21 = [up], 8
+.Ltop: ld8 r21 = [up], 8
xma.l f12 = f9, f8, f10 C q = c * -inverse + si
+ nop.b 0
;;
.Lent: add r16 = 160, up
shl r22 = r21, lshift
+ nop.b 0
;;
stf8 [rp] = f12, 8
xma.hu f9 = f12, f6, f9 C c = high(q * divisor + c)
+ nop.b 0
+ nop.m 0
xmpy.l f10 = f11, f7 C si = ulimb * inverse
+ nop.b 0
;;
or r31 = r22, r23
shr.u r23 = r21, rshift
+ nop.b 0
;;
lfetch [r16]
setf.sig f11 = r31
- br.cloop.sptk.few.clr .Loop
+ br.cloop.sptk.few.clr .Ltop
xma.l f12 = f9, f8, f10 C q = c * -inverse + si
dnl IA-64 mpn_divrem_1 and mpn_preinv_divrem_1 -- Divide an mpn number by an
dnl unnormalized limb.
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
dnl Copyright 2002, 2004, 2005 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
-dnl IA-64 mpn_divrem_2 -- Divide an n-limb number by a 2-limb number.
+dnl IA-64 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
-dnl Copyright 2004, 2005 Free Software Foundation, Inc.
+dnl Copyright 2010, 2013 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl The GNU MP Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl by the Free Software Foundation; either version 2.1 of the License, or (at
dnl your option) any later version.
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write
+dnl to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+dnl Boston, MA 02110-1301, USA.
include(`../config.m4')
-C cycles/limb
-C Itanium: 63
-C Itanium 2: 46
+C norm frac
+C itanium 1
+C itanium 2 29 29
C TODO
-C * Further optimize the loop. We could probably do some more trickery with
-C arithmetic in the FPU, or perhaps use a non-zero addend of xma in more
-C places.
-C * Software pipeline for perhaps 5 saved cycles, around the end and start of
-C the loop.
-C * Schedule code outside of loop better.
-C * Update the comments. They are now using the same name for the same
-C logical quantity.
-C * Handle conditional zeroing of r31 in loop more cleanly.
-C * Inline mpn_invert_limb and schedule its insns across the entire init code.
-C * Ultimately, use 2-limb, or perhaps 3-limb or 4-limb inverse.
+C * Inline and interleave limb inversion code with loop setup code.
+C * We should use explicit bundling in much of the code, since it typically
+C cuts some cycles with the GNU assembler.
-define(`qp',`r32')
-define(`qxn',`r33')
-define(`np',`r34')
-define(`nn',`r35')
-define(`dp',`r36')
-
-define(`fnh',`f11')
-define(`fminus1',`f10')
-define(`fd0',`f13')
-define(`fd1',`f14')
-define(`d0',`r39')
-define(`d1',`r36')
-define(`fnl',`f32')
-define(`fdinv',`f12')
-
-define(`R1',`r38') define(`R0',`r37')
-define(`P1',`r28') define(`P0',`r27')
ASM_START()
.global mpn_invert_limb
.type mpn_invert_limb,@function
+C INPUT PARAMETERS
+C qp = r32
+C fn = r33
+C np = r34
+C nn = r35
+C dp = r36
+
+define(`f0x1', `f15')
+
+ASM_START()
PROLOGUE(mpn_divrem_2)
.prologue
- .save ar.pfs, r42
- .save ar.lc, r44
- .save rp, r41
ifdef(`HAVE_ABI_32',
-` addp4 qp = 0, qp C M I
- addp4 np = 0, np C M I
- addp4 dp = 0, dp C M I
- zxt4 nn = nn C I
- zxt4 qxn = qxn C I
+` addp4 r32 = 0, r32 C M I
+ addp4 r34 = 0, r34 C M I
+ addp4 r36 = 0, r36 C M I
+ zxt4 r35 = r35 C I
+ zxt4 r33 = r33 C I
;;
')
-
- alloc r42 = ar.pfs, 5,8,1,0 C M2
- ld8 d0 = [dp], 8 C M0M1 d0
- mov r44 = ar.lc C I0
- shladd np = nn, 3, np C M I
- ;;
- ld8 d1 = [dp] C M0M1 d1
- mov r41 = b0 C I0
- add r15 = -8, np C M I
- add np = -16, np C M I
- mov r40 = r0 C M I
- ;;
- ld8 R1 = [r15] C M0M1 n1
- ld8 R0 = [r34], -8 C M0M1 n0
- ;;
- cmp.ltu p6, p0 = d1, R1 C M I
- cmp.eq p8, p0 = d1, R1 C M I
- ;;
- (p8) cmp.leu p6, p0 = d0, R0
- cmp.ltu p8, p9 = R0, d0
- (p6) br.cond.dpnt .L_high_limb_1 C FIXME: inline!
+ .save ar.pfs, r42
+ alloc r42 = ar.pfs, 5, 9, 1, 0
+ shladd r34 = r35, 3, r34
+ adds r14 = 8, r36
+ mov r43 = r1
+ ;;
+ adds r15 = -8, r34
+ ld8 r39 = [r14]
+ .save ar.lc, r45
+ mov r45 = ar.lc
+ adds r14 = -16, r34
+ mov r40 = r0
+ adds r34 = -24, r34
+ ;;
+ ld8 r38 = [r15]
+ .save rp, r41
+ mov r41 = b0
+ .body
+ ld8 r36 = [r36]
+ ld8 r37 = [r14]
+ ;;
+ cmp.gtu p6, p7 = r39, r38
+ (p6) br.cond.dptk .L8
+ ;;
+ cmp.leu p8, p9 = r36, r37
+ cmp.geu p6, p7 = r39, r38
+ ;;
+ (p8) cmp4.ne.and.orcm p6, p7 = 0, r0
+ (p7) br.cond.dptk .L51
.L8:
-
- mov r45 = d1
- br.call.sptk.many b0 = mpn_invert_limb C FIXME: inline+schedule
+ add r14 = r33, r35 // un + fn
+ mov r46 = r39 // argument to mpn_invert_limb
;;
- setf.sig fd1 = d1 C d1
- setf.sig fd0 = d0 C d0
- add r14 = r33, r35 C nn + qxn
+ adds r35 = -3, r14
;;
- setf.sig fdinv = r8 C dinv
- mov r9 = -1
- add r35 = -3, r14
+ cmp.gt p12, p0 = r0, r35
+ (p12) br.cond.dpnt L(end)
+ br.call.sptk.many b0 = mpn_invert_limb
;;
- setf.sig fminus1 = r9
- cmp.gt p6, p0 = r0, r35
- shladd qp = r35, 3, qp
- mov ar.lc = r35
- mov r31 = 0 C n0
- (p6) br.cond.dpnt .Ldone
+ setf.sig f11 = r8 // di (non-final)
+ setf.sig f34 = r39 // d1
+ setf.sig f33 = r36 // d0
+ mov r1 = r43
;;
- ALIGN(16)
-C *** MAIN LOOP START ***
-.Loop: C 00
- mov r15 = R0 C nadj = n10
- cmp.le p14, p15 = 0, R0 C check high bit of R0
- cmp.le p8, p0 = r33, r35 C dividend limbs remaining?
- ;; C 01
- .pred.rel "mutex", p14, p15
- (p8) ld8 r31 = [r34], -8 C n0
- (p15) add r15 = d1, R0 C nadj = n10 + d1
- (p15) add r14 = 1, R1 C nh + (nl:63)
- (p14) mov r14 = R1 C nh
- cmp.eq p6, p0 = d1, R1 C nh == d1
- (p6) br.cond.spnt .L_R1_eq_d1
- ;; C 02
- setf.sig f8 = r14 C n2 + (nl:63)
- setf.sig f15 = r15 C nadj
- sub r23 = -1, R1 C r23 = ~nh
- ;; C 03
- setf.sig fnh = r23
- setf.sig fnl = R0
- ;; C 08
- xma.hu f7 = fdinv, f8, f15 C xh = HI(dinv*(nh-nmask)+nadj)
- ;; C 12
- xma.l f7 = f7, fminus1, fnh C nh + xh
- ;; C 16
- getf.sig r14 = f7
- xma.hu f9 = f7, fd1, fnl C xh = HI(q1*d1+nl)
- xma.l f33 = f7, fd1, fnl C xh = LO(q1*d1+nl)
- ;; C 20
- getf.sig r16 = f9
- sub r24 = d1, R1
- C 21
- getf.sig r17 = f33
- ;; C 25
- cmp.eq p6, p7 = r16, r24
- ;; C 26
- .pred.rel "mutex", p6, p7
- (p6) xma.l f8 = f7, fminus1, f0 C f8 = -f7
- (p7) xma.l f8 = f7,fminus1,fminus1 C f8 = -f7-1
- ;; C 27
- .pred.rel "mutex", p6, p7
- (p6) sub r18 = 0, r14 C q = -q1
- (p7) sub r18 = -1, r14 C q = -q1-1
- (p6) add r14 = 0, r17 C n1 = xl
- (p7) add r14 = d1, r17 C n1 = xl + d1
- ;; C 30
- xma.hu f9 = fd0, f8, f0 C d0*(-f7-1) = -d0*f7-d0
- xma.l f35 = fd0, f8, f0
- ;; C 34
- getf.sig P1 = f9 C P1
- C 35
- getf.sig P0 = f35 C P0
- ;;
-.L_adj: C 40
- cmp.ltu p8, p0 = r31, P0 C p8 = cy from low limb
- cmp.ltu p6, p0 = r14, P1 C p6 = prel cy from high limb
- sub R0 = r31, P0
- sub R1 = r14, P1
- ;; C 41
- (p8) cmp.eq.or p6, p0 = 0, R1 C p6 = final cy from high limb
- (p8) add R1 = -1, R1
- cmp.ne p10, p0 = r0, r0 C clear p10 FIXME: use unc below!
- cmp.ne p13, p0 = r0, r0 C clear p13 FIXME: use unc below!
- ;; C 42
- (p6) add R0 = R0, d0
- (p6) add R1 = R1, d1
- (p6) add r18 = -1, r18 C q--
- ;; C 43
- (p6) cmp.ltu p10, p0 = R0, d0
- (p6) cmp.ltu p0, p13 = R1, d1
- ;; C 44
- (p10) cmp.ne.and p0, p13 = -1, R1 C p13 = !cy
- (p10) add R1 = 1, R1
- (p13) br.cond.spnt .L_two_too_big C jump if not cy
- ;; C 45
- st8 [qp] = r18, -8
- add r35 = -1, r35
- mov r31 = 0 C n0, next iteration
- br.cloop.sptk .Loop
-C *** MAIN LOOP END ***
- ;;
-.Ldone:
- mov r8 = r40
- mov b0 = r41
- add r21 = 8, r34
- add r22 = 16, r34
- ;;
- st8 [r21] = R0
- st8 [r22] = R1
- mov ar.pfs = r42
- mov ar.lc = r44
- br.ret.sptk.many b0
-
-.L_high_limb_1:
- .pred.rel "mutex", p8, p9
- sub R0 = R0, d0
- (p8) sub R1 = R1, d1, 1
- (p9) sub R1 = R1, d1
- mov r40 = 1
- br.sptk .L8
+ mov r17 = 1
+ setf.sig f9 = r38 // n2
+ xma.l f6 = f11, f34, f0 // t0 = LO(di * d1)
+ ;;
+ setf.sig f10 = r37 // n1
+ setf.sig f15 = r17 // 1
+ xma.hu f8 = f11, f33, f0 // s0 = HI(di * d0)
+ ;;
+ getf.sig r17 = f6
+ getf.sig r16 = f8
+ mov ar.lc = r35
+ ;;
+ sub r18 = r0, r39 // -d1
+ add r14 = r17, r36
+ ;;
+ setf.sig f14 = r18 // -d1
+ cmp.leu p8, p9 = r17, r14
+ add r16 = r14, r16
+ ;;
+ (p9) adds r19 = 0, r0
+ (p8) adds r19 = -1, r0
+ cmp.gtu p6, p7 = r14, r16
+ ;;
+ (p6) adds r19 = 1, r19
+ ;;
+ifelse(1,1,`
+ cmp.gt p7, p6 = r0, r19
+ ;;
+ (p6) adds r8 = -1, r8 // di--
+ (p6) sub r14 = r16, r39 // t0 -= d1
+ (p6) cmp.ltu p6, p7 = r16, r39 // cy for: t0 - d1
+ ;;
+ (p6) cmp.gt p9, p8 = 1, r19
+ (p7) cmp.gt p9, p8 = 0, r19
+ (p6) adds r19 = -1, r19 // t1 -= cy
+ mov r16 = r14
+ ;;
+ (p8) adds r8 = -1, r8 // di--
+ (p8) sub r14 = r16, r39 // t0 -= d1
+ (p8) cmp.ltu p8, p9 = r16, r39 // cy for: t0 - d1
+ ;;
+ (p8) cmp.gt p7, p6 = 1, r19
+ (p9) cmp.gt p7, p6 = 0, r19
+ (p8) adds r19 = -1, r19 // t1 -= cy
+ mov r16 = r14
+ ;;
+ (p6) adds r8 = -1, r8 // di--
+ (p6) sub r14 = r16, r39 // t0 -= d1
+ (p6) cmp.ltu p6, p7 = r16, r39 // cy for: t0 - d1
+ ;;
+ (p6) cmp.gt p9, p8 = 1, r19
+ (p7) cmp.gt p9, p8 = 0, r19
+ (p6) adds r19 = -1, r19 // t1 -= cy
+ mov r16 = r14
+ ;;
+ (p8) adds r8 = -1, r8 // di--
+ (p8) sub r14 = r16, r39 // t0 -= d1
+ (p8) cmp.ltu p8, p9 = r16, r39 // cy for: t0 - d1
+ ;;
+ (p8) adds r19 = -1, r19 // t1 -= cy
+ mov r16 = r14
+',`
+ cmp.gt p8, p9 = r0, r19
+ (p8) br.cond.dpnt .L46
+.L52:
+ cmp.leu p6, p7 = r39, r16
+ sub r14 = r16, r39
+ adds r8 = -1, r8
+ ;;
+ (p7) adds r19 = -1, r19
+ mov r16 = r14
+ ;;
+ (p7) cmp.gt p8, p9 = r0, r19
+ (p9) br.cond.dptk .L52
+.L46:
+')
+ setf.sig f32 = r8 // di
+ shladd r32 = r35, 3, r32
;;
-.L_two_too_big:
- add R0 = R0, d0
- add R1 = R1, d1
- ;;
- add r18 = -1, r18 C q--
- cmp.ltu p10, p0 = R0, d0
- ;;
- (p10) add R1 = 1, R1
- st8 [qp] = r18, -8
- add r35 = -1, r35
- mov r31 = 0 C n0, next iteration
- br.cloop.sptk .Loop
- br.sptk .Ldone
-
-.L_R1_eq_d1:
- add r14 = R0, d1 C r = R0 + d1
- mov r18 = -1 C q = -1
+ ALIGN(16)
+L(top): nop 0
+ nop 0
+ cmp.gt p8, p9 = r33, r35
+ ;;
+ (p8) mov r37 = r0
+ (p9) ld8 r37 = [r34], -8
+ xma.hu f8 = f9, f32, f10 // 0,29
+ xma.l f12 = f9, f32, f10 // 0
+ ;;
+ getf.sig r20 = f12 // q0 4
+ xma.l f13 = f15, f8, f9 // q += n2 4
+ sub r8 = -1, r36 // bitnot d0
+ ;;
+ getf.sig r18 = f13 // 8
+ xma.l f7 = f14, f13, f10 // 8
+ xma.l f6 = f33, f13, f33 // t0 = LO(d0*q+d0) 8
+ xma.hu f9 = f33, f13, f33 // t1 = HI(d0*q+d0) 9
;;
- cmp.leu p6, p0 = R0, r14
- (p6) br.cond.spnt .L20 C jump unless cy
+ getf.sig r38 = f7 // n1 12
+ getf.sig r16 = f6 // 13
+ getf.sig r19 = f9 // 14
;;
- sub P1 = r14, d0
- add R0 = r31, d0
+ sub r38 = r38, r39 // n1 -= d1 17
;;
- cmp.ltu p8, p9 = R0, r31
+ cmp.ne p9, p0 = r0, r0 // clear p9
+ cmp.leu p10, p11 = r16, r37 // cy for: n0 - t0 18
;;
+ sub r37 = r37, r16 // n0 -= t0 19
+ (p11) sub r38 = r38, r19, 1 // n1 -= t1 - cy 19
+ (p10) sub r38 = r38, r19 // n1 -= t1 19
+ ;;
+ cmp.gtu p6, p7 = r20, r38 // n1 >= q0 20
+ ;;
+ (p7) cmp.ltu p9, p0 = r8, r37 // 21
+ (p6) add r18 = 1, r18 //
+ (p7) add r37 = r37, r36 // 21
+ (p7) add r38 = r38, r39 // 21
+ ;;
+ setf.sig f10 = r37 // n1 22
+ (p9) add r38 = 1, r38 // 22
+ ;;
+ setf.sig f9 = r38 // n2 23
+ cmp.gtu p6, p7 = r39, r38 // 23
+ (p7) br.cond.spnt L(fix)
+L(bck): st8 [r32] = r18, -8
+ adds r35 = -1, r35
+ br.cloop.sptk.few L(top)
+ ;;
+
+L(end): add r14 = 8, r34
+ add r15 = 16, r34
+ mov b0 = r41
+ ;;
+ st8 [r14] = r37
+ st8 [r15] = r38
+ mov ar.pfs = r42
+ mov r8 = r40
+ mov ar.lc = r45
+ br.ret.sptk.many b0
+ ;;
+.L51:
.pred.rel "mutex", p8, p9
- st8 [qp] = r18, -8
- (p8) add R1 = r0, P1, 1 C R1 = n1 - P1 - cy
- (p9) add R1 = r0, P1 C R1 = n1 - P1
- add r35 = -1, r35
- mov r31 = 0 C n0, next iteration
- br.cloop.sptk .Loop
- br.sptk .Ldone
- ;;
-.L20: cmp.ne p6, p7 = 0, d0
- ;;
- .pred.rel "mutex", p6, p7
- (p6) add P1 = -1, d0
- (p7) mov P1 = d0
- sub P0 = r0, d0
- br.sptk .L_adj
+ sub r37 = r37, r36
+ (p9) sub r38 = r38, r39, 1
+ (p8) sub r38 = r38, r39
+ adds r40 = 1, r0
+ br .L8
+ ;;
+
+L(fix): cmp.geu p6, p7 = r39, r38
+ cmp.leu p8, p9 = r36, r37
+ ;;
+ (p8) cmp4.ne.and.orcm p6, p7 = 0, r0
+ (p6) br.cond.dptk L(bck)
+ sub r37 = r37, r36
+ (p9) sub r38 = r38, r39, 1
+ (p8) sub r38 = r38, r39
+ adds r18 = 1, r18
+ ;;
+ setf.sig f9 = r38 // n2
+ setf.sig f10 = r37 // n1
+ br L(bck)
+
EPILOGUE()
ASM_END()
dnl Itanium-2 mpn_gcd_1 -- mpn by 1 gcd.
-dnl Copyright 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+dnl Contributed to the GNU project by Kevin Ryde, innerloop by Torbjorn
+dnl Granlund.
+
+dnl Copyright 2002, 2003, 2004, 2005, 2012 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
C cycles/bitpair (1x1 gcd)
-C Itanium: 14 (approx)
-C Itanium 2: 6.3
+C Itanium: ?
+C Itanium 2: 5.8 (trimmable to 5.64 with huge ctz_table)
C mpn_gcd_1 (mp_srcptr xp, mp_size_t xsize, mp_limb_t y);
C stripping factors of 2 from abs(x-y). Those factors of two are
C determined from just y-x, without the abs(), since there's the same
C number of trailing zeros on n or -n in twos complement. That makes the
-C dependent chain
-C
-C cycles
-C 1 sub x-y and x-y-1
-C 3 andcm (x-y-1)&~(x-y)
-C 2 popcnt trailing zeros
-C 3 shr.u strip abs(x-y)
-C ---
-C 9
+C dependent chain 8 cycles deep.
C
C The selection of x-y versus y-x for abs(x-y), and the selection of the
-C minimum of x and y, is done in parallel with the above.
+C minimum of x and y, is done in parallel with the critical path.
C
C The algorithm takes about 0.68 iterations per bit (two N bit operands) on
-C average, hence the final 6.3 cycles/bitpair.
-C
-C The loop is not as fast as one might hope, since there's extra latency
-C from andcm going across to the `multimedia' popcnt, and vice versa from
-C multimedia shr.u back to the integer sub.
-C
-C The loop branch is .sptk.clr since we usually expect a good number of
-C iterations, and the iterations are data dependent so it's unlikely past
-C results will predict anything much about the future.
+C average, hence the final 5.8 cycles/bitpair.
C
C Not done:
C
C using add. That would mean keeping track of the lowest not-yet-zeroed
C bit, using some sort of mask.
C
-C Itanium-1:
-C
-C This code is not designed for itanium-1 and in fact doesn't run well on
-C that chip. The loop seems to be about 21 cycles, probably because we end
-C up with a 10 cycle replay for not forcibly scheduling the shr.u latency.
-C Lack of branch hints might introduce a couple of bubbles too.
-C
+C TODO:
+C * Once mod_1_N exists in assembly for Itanium, add conditional calls.
+C * Call bmod_1 even for n=1 when up[0] >> v0 (like other gcd_1 impls).
+C * Probably avoid popcnt also outside of loop, instead use ctz_table.
ASM_START()
.explicit C What does this mean?
.global mpn_modexact_1c_odd
.type mpn_modexact_1c_odd,@function
+C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
+
+deflit(MAXSHIFT, 7)
+deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
+
+ .section ".rodata"
+ctz_table:
+ .byte MAXSHIFT
+forloop(i,1,MASK,
+` .byte m4_count_trailing_zeros(i)
+')
+
PROLOGUE(mpn_gcd_1)
C r32 xp
mov out_carry = 0
- C
-
popcnt y_twos = y_twos C I0 y twos
;;
- C
-
{ .mmi; add x_orig_one = -1, x_orig C M0 orig x-1
shr.u out_divisor = y, y_twos C I0 y without twos
}{ shr.u y = y, y_twos C I1 y without twos
mov b0 = save_rp C I0
} ;;
- C
-
popcnt x_orig = x_orig C I0 orig x twos
-
popcnt r9 = r9 C I0 x twos
;;
- C
-
{ cmp.lt p7,p0 = x_orig, y_twos C M0 orig x_twos < y_twos
shr.u x = x, r9 C I0 x odd
} ;;
{ (p7) mov y_twos = x_orig C M0 common twos
add r10 = -1, y C I0 y-1
- (p6) br.dpnt.few .Ldone_y C B0 x%y==0 then result y
-} ;;
-
- C
-
-
- C No noticable difference in speed for the loop aligned to
- C 32 or just 16.
-.Ltop:
- C r8 x
- C r10 y-1
- C r34 y
- C r38 common twos, for use at end
-
-{ .mmi; cmp.gtu p8,p9 = x, y C M0 x>y
- cmp.ne p10,p0 = x, y C M1 x==y
- sub r9 = y, x C I0 d = y - x
-}{ .mmi; sub r10 = r10, x C M2 d-1 = y - x - 1
+ (p6) br.dpnt.few L(done_y) C B0 x%y==0 then result y
} ;;
-{ .mmi; .pred.rel "mutex", p8, p9
- (p8) sub x = x, y C M0 x>y use x=x-y, y unchanged
- (p9) mov y = x C M1 y>=x use y=x
- (p9) mov x = r9 C I0 y>=x use x=y-x
-}{ .mmi; andcm r9 = r10, r9 C M2 (d-1)&~d
+ addl r22 = @ltoffx(ctz_table#), r1
;;
-
- add r10 = -1, y C M0 new y-1
- popcnt r9 = r9 C I0 twos on x-y
-} ;;
-
-{ shr.u x = x, r9 C I0 new x without twos
- (p10) br.sptk.few.clr .Ltop
-} ;;
-
+ ld8.mov r22 = [r22], ctz_table#
+ br L(ent)
+
+
+ ALIGN(32)
+L(top): .pred.rel "mutex", p6,p7
+.mmi; and r20 = MASK, r19
+ (p7) mov y = x
+ (p6) sub x = x, y
+.mmi; (p7) mov x = r19
+ nop 0
+ nop 0
+ ;;
+L(mid):
+.mmb; add r21 = r22, r20
+ cmp.eq p10,p0 = 0, r20
+ (p10) br.spnt.few.clr L(shift_alot)
+ ;;
+.mmi; ld1 r16 = [r21]
+ ;;
+ nop 0
+ shr.u x = x, r16
+ ;;
+L(ent):
+.mmi; sub r19 = y, x
+ cmp.gtu p6,p7 = x, y
+ cmp.ne p8,p0 = x, y
+.mmb; nop 0
+ nop 0
+ (p8) br.sptk.few.clr L(top)
C result is y
-.Ldone_y:
- shl r8 = y, y_twos C I common factors of 2
- ;;
+L(done_y):
mov ar.pfs = save_pfs C I0
+ shl r8 = y, y_twos C I common factors of 2
br.ret.sptk.many b0
+L(shift_alot):
+ extr.u r20 = x, MAXSHIFT, MAXSHIFT
+ shr.u x = x, MAXSHIFT
+ br L(mid)
EPILOGUE()
/* gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2009, 2010 Free Software
+Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2009, 2010, 2011 Free Software
Foundation, Inc.
This file is part of the GNU MP Library.
#define GMP_LIMB_BITS 64
#define BYTES_PER_MP_LIMB 8
-/* 1300MHz Itanium2 (babe.fsffrance.org) */
-
+/* 900MHz Itanium2 (titanic.gmplib.org) */
+#define MOD_1_1P_METHOD 2
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 4
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 8
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 22
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 22
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 26
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10
#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_2_PI2_THRESHOLD 12
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
-#define MUL_TOOM22_THRESHOLD 44
-#define MUL_TOOM33_THRESHOLD 89
-#define MUL_TOOM44_THRESHOLD 232
-#define MUL_TOOM6H_THRESHOLD 351
-#define MUL_TOOM8H_THRESHOLD 454
+#define MUL_TOOM22_THRESHOLD 36
+#define MUL_TOOM33_THRESHOLD 129
+#define MUL_TOOM44_THRESHOLD 214
+#define MUL_TOOM6H_THRESHOLD 318
+#define MUL_TOOM8H_THRESHOLD 430
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 121
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 138
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 121
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 145
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 203
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 101
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 160
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 138
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 159
+#define SQR_BASECASE_THRESHOLD 11
+#define SQR_TOOM2_THRESHOLD 84
+#define SQR_TOOM3_THRESHOLD 131
+#define SQR_TOOM4_THRESHOLD 494
+#define SQR_TOOM6_THRESHOLD 0 /* always */
+#define SQR_TOOM8_THRESHOLD 0 /* always */
-#define SQR_BASECASE_THRESHOLD 26
-#define SQR_TOOM2_THRESHOLD 119
-#define SQR_TOOM3_THRESHOLD 141
-#define SQR_TOOM4_THRESHOLD 282
-#define SQR_TOOM6_THRESHOLD 375
-#define SQR_TOOM8_THRESHOLD 527
+#define MULMID_TOOM42_THRESHOLD 98
-#define MULMOD_BNM1_THRESHOLD 24
-#define SQRMOD_BNM1_THRESHOLD 19
+#define MULMOD_BNM1_THRESHOLD 21
+#define SQRMOD_BNM1_THRESHOLD 25
-#define MUL_FFT_MODF_THRESHOLD 888 /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD 468 /* k = 5 */
#define MUL_FFT_TABLE3 \
- { { 888, 5}, { 31, 6}, { 16, 5}, { 33, 6}, \
- { 17, 5}, { 35, 6}, { 28, 7}, { 15, 6}, \
- { 33, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \
- { 39, 7}, { 29, 8}, { 15, 7}, { 33, 8}, \
- { 17, 7}, { 37, 8}, { 19, 7}, { 41, 8}, \
- { 21, 7}, { 43, 8}, { 23, 7}, { 47, 8}, \
- { 27, 7}, { 55, 8}, { 31, 7}, { 63, 8}, \
- { 37, 9}, { 19, 8}, { 43, 9}, { 23, 8}, \
- { 51, 9}, { 27, 8}, { 55, 9}, { 31, 8}, \
- { 63, 9}, { 35, 8}, { 71, 9}, { 39, 8}, \
- { 79, 9}, { 43,10}, { 23, 9}, { 47, 8}, \
- { 95, 9}, { 55,10}, { 31, 9}, { 71,10}, \
+ { { 476, 5}, { 27, 6}, { 14, 5}, { 29, 6}, \
+ { 33, 7}, { 17, 6}, { 37, 7}, { 19, 6}, \
+ { 39, 7}, { 21, 6}, { 43, 7}, { 33, 8}, \
+ { 17, 7}, { 37, 8}, { 19, 7}, { 39, 8}, \
+ { 21, 7}, { 43, 8}, { 37, 9}, { 19, 8}, \
+ { 43, 9}, { 23, 8}, { 51, 9}, { 27, 8}, \
+ { 57, 9}, { 31, 8}, { 63, 9}, { 43,10}, \
+ { 23, 9}, { 59,10}, { 31, 9}, { 71,10}, \
{ 39, 9}, { 83,10}, { 47, 9}, { 99,10}, \
- { 55,11}, { 31,10}, { 63, 9}, { 127,10}, \
- { 71, 9}, { 143,10}, { 87,11}, { 47,10}, \
+ { 55,11}, { 31,10}, { 87,11}, { 47,10}, \
{ 111,12}, { 31,11}, { 63,10}, { 143,11}, \
- { 79,10}, { 167,11}, { 95,10}, { 199,11}, \
- { 111,12}, { 63,11}, { 127,10}, { 255,11}, \
- { 143,10}, { 287,11}, { 159,10}, { 319,12}, \
- { 95,11}, { 223,13}, { 63,12}, { 127,11}, \
- { 287,12}, { 159,11}, { 335,12}, { 191,11}, \
- { 383,10}, { 767,11}, { 399,12}, { 223,13}, \
- { 127,12}, { 255,11}, { 511,10}, { 1023,12}, \
- { 287,11}, { 575,10}, { 1151,12}, { 319,11}, \
- { 639,10}, { 1279,11}, { 671,13}, { 191,12}, \
- { 383,11}, { 767,10}, { 1535,12}, { 415,11}, \
- { 831,14}, { 127,13}, { 255,12}, { 511,11}, \
- { 1023,12}, { 543,11}, { 1087,12}, { 575,13}, \
- { 319,12}, { 639,11}, { 1279,12}, { 671,11}, \
- { 1343,12}, { 703,11}, { 1471,13}, { 383,12}, \
- { 767,11}, { 1535,12}, { 799,11}, { 1599,12}, \
- { 831,13}, { 447,12}, { 959,14}, { 255,13}, \
- { 511,12}, { 1055,11}, { 2111,12}, { 1087,13}, \
- { 575,12}, { 1215,11}, { 2431,12}, { 1247,13}, \
- { 639,12}, { 1279,11}, { 2559,12}, { 1343,13}, \
- { 703,12}, { 1471,14}, { 383,13}, { 767,12}, \
- { 1599,13}, { 831,12}, { 1663,11}, { 3327,12}, \
- { 1727,13}, { 895,12}, { 1791,13}, { 959,15}, \
- { 255,14}, { 511,13}, { 1023,12}, { 2047,13}, \
- { 1087,12}, { 2175,13}, { 1151,12}, { 2303,13}, \
- { 1215,11}, { 4863,12}, { 2495,14}, { 639,13}, \
- { 1343,12}, { 2687,13}, { 1471,12}, { 2943,14}, \
- { 767,13}, { 1599,12}, { 3199,13}, { 1727,12}, \
- { 3455,14}, { 895,13}, { 1983,12}, { 3967,15}, \
- { 511,14}, { 1023,13}, { 2111,12}, { 4223,13}, \
- { 2239,12}, { 4479,13}, { 2495,14}, { 1279,13}, \
- { 2751,14}, { 1407,13}, { 2943,15}, { 767,14}, \
- { 1535,13}, { 3199,14}, { 1663,13}, { 3455,14}, \
- { 1791,12}, { 7167,14}, { 1919,13}, { 3967,16}, \
- { 511,15}, { 1023,14}, { 2175,13}, { 4351,14}, \
- { 2431,15}, { 1279,14}, { 2943,13}, { 5887,15}, \
- { 1535,14}, { 3199,13}, { 6399,14}, { 16384,15}, \
- { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
- { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
- {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 217
-#define MUL_FFT_THRESHOLD 9856
-
-#define SQR_FFT_MODF_THRESHOLD 751 /* k = 5 */
+ { 79,10}, { 167,11}, { 95,10}, { 191,11}, \
+ { 111,12}, { 63,11}, { 143,10}, { 287, 9}, \
+ { 575,10}, { 303,11}, { 159,10}, { 319,12}, \
+ { 95,11}, { 191,10}, { 399,11}, { 207,10}, \
+ { 431,13}, { 63,12}, { 127,11}, { 271,10}, \
+ { 543,11}, { 287,10}, { 575,11}, { 303,12}, \
+ { 159,11}, { 335,10}, { 671,11}, { 367,12}, \
+ { 191,11}, { 399,10}, { 799,11}, { 431,12}, \
+ { 223,11}, { 447,13}, { 127,12}, { 255,11}, \
+ { 543,12}, { 287,11}, { 607,12}, { 319,11}, \
+ { 671,12}, { 351,11}, { 703,13}, { 191,12}, \
+ { 415,11}, { 863,12}, { 447,14}, { 127,13}, \
+ { 255,12}, { 607,13}, { 319,12}, { 735,13}, \
+ { 383,12}, { 799,11}, { 1599,12}, { 863,13}, \
+ { 447,12}, { 927,11}, { 1855,14}, { 255,13}, \
+ { 511,12}, { 1055,13}, { 575,12}, { 1215,13}, \
+ { 639,12}, { 1279,13}, { 703,14}, { 383,13}, \
+ { 767,12}, { 1535,13}, { 831,12}, { 1663,13}, \
+ { 895,12}, { 1791,15}, { 255,14}, { 511,13}, \
+ { 1087,12}, { 2175,13}, { 1215,14}, { 639,13}, \
+ { 1343,12}, { 2687,13}, { 1471,14}, { 767,13}, \
+ { 1599,12}, { 3199,13}, { 1663,14}, { 895,13}, \
+ { 1855,15}, { 511,14}, { 1023,13}, { 2175,14}, \
+ { 1151,13}, { 2431,14}, { 1279,13}, { 2687,14}, \
+ { 1407,15}, { 767,14}, { 1535,13}, { 3199,14}, \
+ { 1663,13}, { 3455,14}, { 1791,16}, { 65536,17}, \
+ { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+ {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 155
+#define MUL_FFT_THRESHOLD 6272
+
+#define SQR_FFT_MODF_THRESHOLD 440 /* k = 5 */
#define SQR_FFT_TABLE3 \
- { { 751, 5}, { 35, 6}, { 18, 5}, { 37, 6}, \
- { 29, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \
- { 35, 7}, { 29, 8}, { 15, 7}, { 37, 8}, \
- { 19, 7}, { 41, 8}, { 21, 7}, { 43, 8}, \
- { 23, 7}, { 47, 8}, { 43, 9}, { 23, 8}, \
- { 51, 9}, { 27, 8}, { 55, 9}, { 31, 8}, \
- { 63, 9}, { 39, 8}, { 79, 9}, { 43,10}, \
- { 23, 9}, { 47, 8}, { 95, 9}, { 55,10}, \
- { 31, 9}, { 67,10}, { 39, 9}, { 83,10}, \
+ { { 436, 5}, { 14, 4}, { 29, 5}, { 31, 6}, \
+ { 35, 7}, { 18, 6}, { 37, 7}, { 37, 8}, \
+ { 19, 7}, { 40, 8}, { 37, 9}, { 19, 8}, \
+ { 43, 9}, { 23, 8}, { 49, 9}, { 27, 8}, \
+ { 57, 9}, { 43,10}, { 23, 9}, { 55,10}, \
+ { 31, 9}, { 71,10}, { 39, 9}, { 83,10}, \
{ 47, 9}, { 99,10}, { 55,11}, { 31,10}, \
- { 63, 9}, { 127,10}, { 79,11}, { 47,10}, \
- { 103,12}, { 31,11}, { 63,10}, { 143,11}, \
- { 79,10}, { 159,11}, { 95,10}, { 199,11}, \
- { 111,12}, { 63,11}, { 127,10}, { 255,11}, \
- { 143,10}, { 287,11}, { 159,12}, { 95,11}, \
- { 191,10}, { 383,11}, { 207,13}, { 63,12}, \
- { 127,11}, { 255,10}, { 511,11}, { 271,12}, \
- { 159,11}, { 319,10}, { 639,11}, { 335,12}, \
- { 191,11}, { 383,10}, { 767,12}, { 223,13}, \
- { 127,11}, { 511,10}, { 1023,11}, { 527,12}, \
- { 287,11}, { 575,10}, { 1151,11}, { 591,12}, \
- { 319,11}, { 639,13}, { 191,12}, { 383,11}, \
- { 767,10}, { 1535,11}, { 799,10}, { 1599, 9}, \
- { 3199,14}, { 127,13}, { 255,12}, { 511, 9}, \
- { 4095,10}, { 2111,12}, { 543,11}, { 1087,10}, \
- { 2239,12}, { 575,10}, { 2303,13}, { 319,12}, \
- { 671,11}, { 1471,13}, { 383,11}, { 1599,12}, \
- { 831,11}, { 1663,12}, { 863,10}, { 3455,13}, \
- { 447,12}, { 895,11}, { 1791,14}, { 255,13}, \
- { 511,12}, { 1023,11}, { 2111,12}, { 1087,11}, \
- { 2239,13}, { 575,12}, { 1215,11}, { 2495,13}, \
- { 639,12}, { 1343,13}, { 703,12}, { 1407,14}, \
- { 383,13}, { 767,12}, { 1599,13}, { 831,12}, \
- { 1727,11}, { 3455,12}, { 1791,15}, { 255,14}, \
- { 511,13}, { 1023,12}, { 2111,11}, { 4223,12}, \
- { 2239,11}, { 4479,10}, { 8959,11}, { 4607,13}, \
- { 1215,14}, { 639,13}, { 1343,12}, { 2815,13}, \
- { 1471,12}, { 2943,14}, { 767,13}, { 1599,12}, \
- { 3199,13}, { 1727,12}, { 3455,14}, { 895,13}, \
- { 1855,12}, { 3711,13}, { 1983,12}, { 3967,15}, \
- { 511,14}, { 1023,13}, { 2111,12}, { 4223,13}, \
- { 2239,12}, { 4479,14}, { 1151,13}, { 2495,14}, \
- { 1279,13}, { 2687,14}, { 1407,13}, { 2943,15}, \
- { 767,14}, { 1535,13}, { 3071,14}, { 1663,13}, \
- { 3327,14}, { 1791,16}, { 511,15}, { 1023,14}, \
- { 2047,13}, { 4223,14}, { 2175,13}, { 4479,12}, \
- { 8959,14}, { 2303,13}, { 4735,14}, { 2431,15}, \
- { 1279,14}, { 2943,15}, { 1535,14}, { 3071,13}, \
- { 6143,14}, { 16384,15}, { 32768,16}, { 65536,17}, \
+ { 87,11}, { 47,10}, { 111,12}, { 31,11}, \
+ { 63,10}, { 135,11}, { 79,10}, { 167,11}, \
+ { 95,10}, { 191,11}, { 111,12}, { 63,11}, \
+ { 127,10}, { 255,11}, { 143,10}, { 287, 9}, \
+ { 575,10}, { 303,11}, { 159,10}, { 319,12}, \
+ { 95,11}, { 191,10}, { 399,11}, { 207,10}, \
+ { 431,13}, { 63,12}, { 127,11}, { 271,10}, \
+ { 543,11}, { 303,12}, { 159,11}, { 335,10}, \
+ { 671,11}, { 367,10}, { 735,12}, { 191,11}, \
+ { 399,10}, { 799,11}, { 431,12}, { 223,11}, \
+ { 463,13}, { 127,12}, { 255,11}, { 543,12}, \
+ { 287,11}, { 607,12}, { 319,11}, { 671,12}, \
+ { 351,11}, { 735,13}, { 191,12}, { 383,11}, \
+ { 799,12}, { 415,11}, { 863,12}, { 447,11}, \
+ { 895,14}, { 127,13}, { 255,12}, { 543,11}, \
+ { 1087,12}, { 607,13}, { 319,12}, { 735,13}, \
+ { 383,12}, { 863,13}, { 447,12}, { 959,14}, \
+ { 255,13}, { 511,12}, { 1087,13}, { 575,12}, \
+ { 1183,13}, { 639,12}, { 1279,13}, { 703,12}, \
+ { 1407,14}, { 383,13}, { 767,12}, { 1535,13}, \
+ { 831,12}, { 1663,13}, { 895,12}, { 1791,13}, \
+ { 959,15}, { 255,14}, { 511,13}, { 1087,12}, \
+ { 2175,13}, { 1215,14}, { 639,13}, { 1343,12}, \
+ { 2687,13}, { 1471,14}, { 767,13}, { 1663,14}, \
+ { 895,13}, { 1919,15}, { 511,14}, { 1023,13}, \
+ { 2175,14}, { 1151,13}, { 2431,14}, { 1279,13}, \
+ { 2687,14}, { 1407,15}, { 767,14}, { 1535,13}, \
+ { 3199,14}, { 1663,13}, { 3455,14}, { 1791,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16}, { 65536,17}, \
{ 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
{2097152,22}, {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 203
-#define SQR_FFT_THRESHOLD 7552
+#define SQR_FFT_TABLE3_SIZE 151
+#define SQR_FFT_THRESHOLD 4032
-#define MULLO_BASECASE_THRESHOLD 17
-#define MULLO_DC_THRESHOLD 91
-#define MULLO_MUL_N_THRESHOLD 19187
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 62
+#define MULLO_MUL_N_THRESHOLD 12322
-#define DC_DIV_QR_THRESHOLD 72
-#define DC_DIVAPPR_Q_THRESHOLD 254
-#define DC_BDIV_QR_THRESHOLD 117
-#define DC_BDIV_Q_THRESHOLD 292
+#define DC_DIV_QR_THRESHOLD 55
+#define DC_DIVAPPR_Q_THRESHOLD 220
+#define DC_BDIV_QR_THRESHOLD 92
+#define DC_BDIV_Q_THRESHOLD 252
-#define INV_MULMOD_BNM1_THRESHOLD 86
-#define INV_NEWTON_THRESHOLD 178
-#define INV_APPR_THRESHOLD 179
+#define INV_MULMOD_BNM1_THRESHOLD 70
+#define INV_NEWTON_THRESHOLD 156
+#define INV_APPR_THRESHOLD 154
-#define BINV_NEWTON_THRESHOLD 300
-#define REDC_1_TO_REDC_2_THRESHOLD 2
-#define REDC_2_TO_REDC_N_THRESHOLD 167
+#define BINV_NEWTON_THRESHOLD 248
+#define REDC_1_TO_REDC_2_THRESHOLD 0 /* always */
+#define REDC_2_TO_REDC_N_THRESHOLD 149
-#define MU_DIV_QR_THRESHOLD 1787
-#define MU_DIVAPPR_Q_THRESHOLD 1470
+#define MU_DIV_QR_THRESHOLD 1142
+#define MU_DIVAPPR_Q_THRESHOLD 1142
#define MUPI_DIV_QR_THRESHOLD 0 /* always */
-#define MU_BDIV_QR_THRESHOLD 1787
-#define MU_BDIV_Q_THRESHOLD 2089
-
-#define MATRIX22_STRASSEN_THRESHOLD 27
-#define HGCD_THRESHOLD 139
-#define GCD_DC_THRESHOLD 469
-#define GCDEXT_DC_THRESHOLD 496
-#define JACOBI_BASE_METHOD 1
-
-#define GET_STR_DC_THRESHOLD 14
-#define GET_STR_PRECOMPUTE_THRESHOLD 22
-#define SET_STR_DC_THRESHOLD 1474
-#define SET_STR_PRECOMPUTE_THRESHOLD 3495
+#define MU_BDIV_QR_THRESHOLD 1142
+#define MU_BDIV_Q_THRESHOLD 1470
+
+#define POWM_SEC_TABLE 2,29,298,1897
+
+#define MATRIX22_STRASSEN_THRESHOLD 19
+#define HGCD_THRESHOLD 115
+#define HGCD_APPR_THRESHOLD 181
+#define HGCD_REDUCE_THRESHOLD 3014
+#define GCD_DC_THRESHOLD 555
+#define GCDEXT_DC_THRESHOLD 368
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 13
+#define GET_STR_PRECOMPUTE_THRESHOLD 21
+#define SET_STR_DC_THRESHOLD 1216
+#define SET_STR_PRECOMPUTE_THRESHOLD 3170
+
+#define FAC_DSC_THRESHOLD 746
+#define FAC_ODD_THRESHOLD 0 /* always */
dnl IA-64 mpn_hamdist -- mpn hamming distance.
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
dnl Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
-dnl
+
dnl This file is part of the GNU MP Library.
dnl The GNU MP Library is free software; you can redistribute it and/or modify
')')
define(`ASSERT_label_counter',1)
+define(`getfsig', `getf.sig')
+define(`setfsig', `setf.sig')
+define(`cmpeq', `cmp.eq')
+define(`cmpne', `cmp.ne')
+define(`cmpltu', `cmp.ltu')
+define(`cmpleu', `cmp.leu')
+define(`cmpgtu', `cmp.gtu')
+define(`cmpgeu', `cmp.geu')
+define(`cmple', `cmp.le')
+define(`cmpgt', `cmp.gt')
+define(`cmpeqor', `cmp.eq.or')
+define(`cmpequc', `cmp.eq.unc')
divert
dnl IA-64 mpn_invert_limb -- Invert a normalized limb.
+dnl Contributed to the GNU project by Torbjorn Granlund and Kevin Ryde.
+
dnl Copyright 2000, 2002, 2004 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl IA-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n,
dnl mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations.
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
dnl Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl IA-64 mpn_lshift/mpn_rshift.
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
dnl Copyright 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation,
dnl Inc.
include(`../config.m4')
C cycles/limb
-C Itanium: 2.0
-C Itanium 2: 1.0
+C Itanium: 2
+C Itanium 2: 1
C This code is scheduled deeply since the plain shift instructions shr and shl
C have a latency of 4 (on Itanium) or 3 (on Itanium 2). Poor scheduling of
C these instructions cause a 10 cycle replay trap on Itanium.
-C TODO
-C * Optimize function entry and feed-in code.
+C The ld8 scheduling should probably be decreased to make the function smaller.
+C Good lfetch will make sure we never stall anyway.
+
+C We should actually issue the first ld8 at cycle 0, and the first BSH/FSH pair
+C at cycle 2. Judicious use of predicates could allow us to issue more ld8's
+C in the prologue.
+
C INPUT PARAMETERS
-define(`rp',`r32')
-define(`up',`r33')
-define(`n',`r34')
+define(`rp', `r32')
+define(`up', `r33')
+define(`n', `r34')
define(`cnt',`r35')
define(`tnc',`r9')
ASM_START()
PROLOGUE(func)
.prologue
- .save ar.lc, r2
+ .save ar.lc, r2
.body
ifdef(`HAVE_ABI_32',
-` addp4 rp = 0, rp C M I
- addp4 up = 0, up C M I
- sxt4 n = n C M I
- zxt4 cnt = cnt C I
+` addp4 rp = 0, rp C M I
+ addp4 up = 0, up C M I
+ sxt4 n = n C M I
+ zxt4 cnt = cnt C I
;;
')
- {.mmi; cmp.lt p14, p15 = 4, n C M I
- and r14 = 3, n C M I
- mov.i r2 = ar.lc C I0
-}{.mmi; add r15 = -1, n C M I
- sub tnc = 64, cnt C M I
- add r16 = -5, n
- ;;
-}{.mmi; cmp.eq p6, p0 = 1, r14 C M I
- cmp.eq p7, p0 = 2, r14 C M I
- shr.u n = r16, 2 C I0
-}{.mmi; cmp.eq p8, p0 = 3, r14 C M I
+ {.mmi; cmp.lt p14, p15 = 4, n C M I
+ and r14 = 3, n C M I
+ mov.i r2 = ar.lc C I0
+}{.mmi; add r15 = -1, n C M I
+ sub tnc = 64, cnt C M I
+ add r16 = -5, n
+ ;;
+}{.mmi; cmp.eq p6, p0 = 1, r14 C M I
+ cmp.eq p7, p0 = 2, r14 C M I
+ shr.u n = r16, 2 C I0
+}{.mmi; cmp.eq p8, p0 = 3, r14 C M I
ifdef(`OPERATION_lshift',
-` shladd up = r15, 3, up C M I
- shladd rp = r15, 3, rp') C M I
+` shladd up = r15, 3, up C M I
+ shladd rp = r15, 3, rp') C M I
;;
-}{.mmi; add r11 = POFF, up C M I
- ld8 r10 = [up], UPD C M01
- mov.i ar.lc = n C I0
+}{.mmi; add r11 = POFF, up C M I
+ ld8 r10 = [up], UPD C M01
+ mov.i ar.lc = n C I0
}{.bbb;
- (p6) br.dptk .Lb01
- (p7) br.dptk .Lb10
- (p8) br.dptk .Lb11
- ;;
-}
+ (p6) br.dptk .Lb01
+ (p7) br.dptk .Lb10
+ (p8) br.dptk .Lb11
+ ;; }
-.Lb00: ld8 r19 = [up], UPD
+.Lb00: ld8 r19 = [up], UPD
+ ;;
+ ld8 r16 = [up], UPD
;;
- ld8 r16 = [up], UPD
+ ld8 r17 = [up], UPD
+ BSH r8 = r10, tnc C function return value
;;
- ld8 r17 = [up], UPD
- BSH r8 = r10, tnc C function return value
+ FSH r24 = r10, cnt
+ BSH r25 = r19, tnc
(p14) br.cond.dptk .grt4
-
- FSH r24 = r10, cnt
- BSH r25 = r19, tnc
;;
- FSH r26 = r19, cnt
- BSH r27 = r16, tnc
+ FSH r26 = r19, cnt
+ BSH r27 = r16, tnc
;;
- FSH r20 = r16, cnt
- BSH r21 = r17, tnc
+ FSH r20 = r16, cnt
+ BSH r21 = r17, tnc
;;
- or r14 = r25, r24
- FSH r22 = r17, cnt
- BSH r23 = r10, tnc
- br .Lr4
+ or r14 = r25, r24
+ FSH r22 = r17, cnt
+ BSH r23 = r10, tnc
+ br .Lr4
-.grt4: FSH r24 = r10, cnt
- BSH r25 = r19, tnc
- ;;
- ld8 r18 = [up], UPD
- FSH r26 = r19, cnt
- BSH r27 = r16, tnc
+.grt4: ld8 r18 = [up], UPD
+ FSH r26 = r19, cnt
+ BSH r27 = r16, tnc
;;
- ld8 r19 = [up], UPD
- FSH r20 = r16, cnt
- BSH r21 = r17, tnc
+ ld8 r19 = [up], UPD
+ FSH r20 = r16, cnt
+ BSH r21 = r17, tnc
;;
- ld8 r16 = [up], UPD
- FSH r22 = r17, cnt
- BSH r23 = r18, tnc
+ ld8 r16 = [up], UPD
+ FSH r22 = r17, cnt
+ BSH r23 = r18, tnc
;;
- or r14 = r25, r24
- ld8 r17 = [up], UPD
+ or r14 = r25, r24
+ ld8 r17 = [up], UPD
br.cloop.dpnt .Ltop
- br .Lbot
+ br .Lbot
.Lb01:
- (p15) BSH r8 = r10, tnc C function return value I
- (p15) FSH r22 = r10, cnt C I
- (p15) br.cond.dptk .Lr1 C return B
+ (p15) BSH r8 = r10, tnc C function return value I
+ (p15) FSH r22 = r10, cnt C I
+ (p15) br.cond.dptk .Lr1 C return B
-.grt1: ld8 r18 = [up], UPD
+.grt1: ld8 r18 = [up], UPD
;;
- ld8 r19 = [up], UPD
- BSH r8 = r10, tnc C function return value
+ ld8 r19 = [up], UPD
+ BSH r8 = r10, tnc C function return value
;;
- ld8 r16 = [up], UPD
- FSH r22 = r10, cnt
- BSH r23 = r18, tnc
+ ld8 r16 = [up], UPD
+ FSH r22 = r10, cnt
+ BSH r23 = r18, tnc
;;
- ld8 r17 = [up], UPD
+ ld8 r17 = [up], UPD
+ FSH r24 = r18, cnt
+ BSH r25 = r19, tnc
br.cloop.dpnt .grt5
;;
-
- FSH r24 = r18, cnt
- BSH r25 = r19, tnc
+ or r15 = r23, r22
+ FSH r26 = r19, cnt
+ BSH r27 = r16, tnc
;;
- or r15 = r23, r22
- FSH r26 = r19, cnt
- BSH r27 = r16, tnc
- ;;
- FSH r20 = r16, cnt
- BSH r21 = r17, tnc
- br .Lr5
+ FSH r20 = r16, cnt
+ BSH r21 = r17, tnc
+ br .Lr5
-.grt5: FSH r24 = r18, cnt
- BSH r25 = r19, tnc
- ;;
- ld8 r18 = [up], UPD
- FSH r26 = r19, cnt
- BSH r27 = r16, tnc
+.grt5: ld8 r18 = [up], UPD
+ FSH r26 = r19, cnt
+ BSH r27 = r16, tnc
;;
- ld8 r19 = [up], UPD
- FSH r20 = r16, cnt
- BSH r21 = r17, tnc
+ ld8 r19 = [up], UPD
+ FSH r20 = r16, cnt
+ BSH r21 = r17, tnc
;;
- or r15 = r23, r22
- ld8 r16 = [up], UPD
- br .LL01
+ or r15 = r23, r22
+ ld8 r16 = [up], UPD
+ br .LL01
-.Lb10: ld8 r17 = [up], UPD
+.Lb10: ld8 r17 = [up], UPD
(p14) br.cond.dptk .grt2
- BSH r8 = r10, tnc C function return value
+ BSH r8 = r10, tnc C function return value
;;
- FSH r20 = r10, cnt
- BSH r21 = r17, tnc
+ FSH r20 = r10, cnt
+ BSH r21 = r17, tnc
;;
- or r14 = r21, r20
- FSH r22 = r17, cnt
- br .Lr2 C return
+ or r14 = r21, r20
+ FSH r22 = r17, cnt
+ br .Lr2 C return
-.grt2: ld8 r18 = [up], UPD
- BSH r8 = r10, tnc C function return value
+.grt2: ld8 r18 = [up], UPD
+ BSH r8 = r10, tnc C function return value
;;
- ld8 r19 = [up], UPD
- FSH r20 = r10, cnt
- BSH r21 = r17, tnc
+ ld8 r19 = [up], UPD
+ FSH r20 = r10, cnt
+ BSH r21 = r17, tnc
;;
- ld8 r16 = [up], UPD
- FSH r22 = r17, cnt
- BSH r23 = r18, tnc
+ ld8 r16 = [up], UPD
+ FSH r22 = r17, cnt
+ BSH r23 = r18, tnc
;;
- ld8 r17 = [up], UPD
+ {.mmi; ld8 r17 = [up], UPD
+ or r14 = r21, r20
+ FSH r24 = r18, cnt
+}{.mib; nop 0
+ BSH r25 = r19, tnc
br.cloop.dpnt .grt6
- ;;
+ ;; }
- or r14 = r21, r20
- FSH r24 = r18, cnt
- BSH r25 = r19, tnc
- ;;
- FSH r26 = r19, cnt
- BSH r27 = r16, tnc
- br .Lr6
+ FSH r26 = r19, cnt
+ BSH r27 = r16, tnc
+ br .Lr6
-.grt6: or r14 = r21, r20
- FSH r24 = r18, cnt
- BSH r25 = r19, tnc
+.grt6: ld8 r18 = [up], UPD
+ FSH r26 = r19, cnt
+ BSH r27 = r16, tnc
;;
- ld8 r18 = [up], UPD
- FSH r26 = r19, cnt
- BSH r27 = r16, tnc
- ;;
- ld8 r19 = [up], UPD
- br .LL10
+ ld8 r19 = [up], UPD
+ br .LL10
-.Lb11: ld8 r16 = [up], UPD
+.Lb11: ld8 r16 = [up], UPD
;;
- ld8 r17 = [up], UPD
- BSH r8 = r10, tnc C function return value
+ ld8 r17 = [up], UPD
+ BSH r8 = r10, tnc C function return value
(p14) br.cond.dptk .grt3
;;
- FSH r26 = r10, cnt
- BSH r27 = r16, tnc
+ FSH r26 = r10, cnt
+ BSH r27 = r16, tnc
;;
- FSH r20 = r16, cnt
- BSH r21 = r17, tnc
+ FSH r20 = r16, cnt
+ BSH r21 = r17, tnc
;;
- or r15 = r27, r26
- FSH r22 = r17, cnt
- br .Lr3 C return
+ or r15 = r27, r26
+ FSH r22 = r17, cnt
+ br .Lr3 C return
-.grt3: ld8 r18 = [up], UPD
- FSH r26 = r10, cnt
- BSH r27 = r16, tnc
+.grt3: ld8 r18 = [up], UPD
+ FSH r26 = r10, cnt
+ BSH r27 = r16, tnc
;;
- ld8 r19 = [up], UPD
- FSH r20 = r16, cnt
- BSH r21 = r17, tnc
+ ld8 r19 = [up], UPD
+ FSH r20 = r16, cnt
+ BSH r21 = r17, tnc
;;
- ld8 r16 = [up], UPD
- FSH r22 = r17, cnt
- BSH r23 = r18, tnc
+ ld8 r16 = [up], UPD
+ FSH r22 = r17, cnt
+ BSH r23 = r18, tnc
;;
- ld8 r17 = [up], UPD
+ ld8 r17 = [up], UPD
br.cloop.dpnt .grt7
- or r15 = r27, r26
- FSH r24 = r18, cnt
- BSH r25 = r19, tnc
- br .Lr7
+ or r15 = r27, r26
+ FSH r24 = r18, cnt
+ BSH r25 = r19, tnc
+ br .Lr7
-.grt7: or r15 = r27, r26
- FSH r24 = r18, cnt
- BSH r25 = r19, tnc
- ld8 r18 = [up], UPD
- br .LL11
+.grt7: or r15 = r27, r26
+ FSH r24 = r18, cnt
+ BSH r25 = r19, tnc
+ ld8 r18 = [up], UPD
+ br .LL11
C *** MAIN LOOP START ***
ALIGN(32)
.Ltop:
- {.mmi; st8 [rp] = r14, UPD C M2
- or r15 = r27, r26 C M3
- FSH r24 = r18, cnt C I0
-}{.mmi; ld8 r18 = [up], UPD C M1
- lfetch [r11], PUPD
- BSH r25 = r19, tnc C I1
+ {.mmi; st8 [rp] = r14, UPD C M2
+ or r15 = r27, r26 C M3
+ FSH r24 = r18, cnt C I0
+}{.mmi; ld8 r18 = [up], UPD C M1
+ lfetch [r11], PUPD
+ BSH r25 = r19, tnc C I1
;; }
.LL11:
- {.mmi; st8 [rp] = r15, UPD
- or r14 = r21, r20
- FSH r26 = r19, cnt
-}{.mmi; ld8 r19 = [up], UPD
- nop.m 0
- BSH r27 = r16, tnc
+ {.mmi; st8 [rp] = r15, UPD
+ or r14 = r21, r20
+ FSH r26 = r19, cnt
+}{.mmi; ld8 r19 = [up], UPD
+ nop.m 0
+ BSH r27 = r16, tnc
;; }
.LL10:
- {.mmi; st8 [rp] = r14, UPD
- or r15 = r23, r22
- FSH r20 = r16, cnt
-}{.mmi; ld8 r16 = [up], UPD
- nop.m 0
- BSH r21 = r17, tnc
+ {.mmi; st8 [rp] = r14, UPD
+ or r15 = r23, r22
+ FSH r20 = r16, cnt
+}{.mmi; ld8 r16 = [up], UPD
+ nop.m 0
+ BSH r21 = r17, tnc
;; }
.LL01:
- {.mmi; st8 [rp] = r15, UPD
- or r14 = r25, r24
- FSH r22 = r17, cnt
-}{.mib; ld8 r17 = [up], UPD
- BSH r23 = r18, tnc
+ {.mmi; st8 [rp] = r15, UPD
+ or r14 = r25, r24
+ FSH r22 = r17, cnt
+}{.mib; ld8 r17 = [up], UPD
+ BSH r23 = r18, tnc
br.cloop.dptk .Ltop
;; }
-
C *** MAIN LOOP END ***
-.Lbot: or r15 = r27, r26
- FSH r24 = r18, cnt
- BSH r25 = r19, tnc
- st8 [rp] = r14, UPD
- ;;
-.Lr7: or r14 = r21, r20
- FSH r26 = r19, cnt
- BSH r27 = r16, tnc
- st8 [rp] = r15, UPD
- ;;
-.Lr6: or r15 = r23, r22
- FSH r20 = r16, cnt
- BSH r21 = r17, tnc
- st8 [rp] = r14, UPD
- ;;
-.Lr5: st8 [rp] = r15, UPD
- or r14 = r25, r24
- FSH r22 = r17, cnt
+.Lbot:
+ {.mmi; st8 [rp] = r14, UPD
+ or r15 = r27, r26
+ FSH r24 = r18, cnt
+}{.mib; nop 0
+ BSH r25 = r19, tnc
+ nop 0
+ ;; }
+.Lr7:
+ {.mmi; st8 [rp] = r15, UPD
+ or r14 = r21, r20
+ FSH r26 = r19, cnt
+}{.mib; nop 0
+ BSH r27 = r16, tnc
+ nop 0
+ ;; }
+.Lr6:
+ {.mmi; st8 [rp] = r14, UPD
+ or r15 = r23, r22
+ FSH r20 = r16, cnt
+}{.mib; nop 0
+ BSH r21 = r17, tnc
+ nop 0
+ ;; }
+.Lr5: st8 [rp] = r15, UPD
+ or r14 = r25, r24
+ FSH r22 = r17, cnt
;;
-.Lr4: or r15 = r27, r26
- st8 [rp] = r14, UPD
+.Lr4: st8 [rp] = r14, UPD
+ or r15 = r27, r26
;;
-.Lr3: or r14 = r21, r20
- st8 [rp] = r15, UPD
+.Lr3: st8 [rp] = r15, UPD
+ or r14 = r21, r20
;;
-.Lr2: st8 [rp] = r14, UPD
+.Lr2: st8 [rp] = r14, UPD
;;
-.Lr1: st8 [rp] = r22, UPD C M23
- mov ar.lc = r2 C I0
- br.ret.sptk.many b0 C B
+.Lr1: st8 [rp] = r22, UPD C M23
+ mov ar.lc = r2 C I0
+ br.ret.sptk.many b0 C B
EPILOGUE(func)
ASM_END()
--- /dev/null
+dnl IA-64 mpn_lshiftc.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2010 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Itanium: ?
+C Itanium 2: 1.25
+
+C This code is scheduled deeply since the plain shift instructions shr and shl
+C have a latency of 4 (on Itanium) or 3 (on Itanium 2). Poor scheduling of
+C these instructions cause a 10 cycle replay trap on Itanium.
+
+C The ld8 scheduling should probably be decreased to make the function smaller.
+C Good lfetch will make sure we never stall anyway.
+
+C We should actually issue the first ld8 at cycle 0, and the first BSH/FSH pair
+C at cycle 2. Judicious use of predicates could allow us to issue more ld8's
+C in the prologue.
+
+
+C INPUT PARAMETERS
+define(`rp', `r32')
+define(`up', `r33')
+define(`n', `r34')
+define(`cnt',`r35')
+
+define(`tnc',`r9')
+
+define(`FSH',`shl')
+define(`BSH',`shr.u')
+define(`UPD',`-8')
+define(`POFF',`-512')
+define(`PUPD',`-32')
+define(`func',`mpn_lshiftc')
+
+ASM_START()
+PROLOGUE(mpn_lshiftc)
+ .prologue
+ .save ar.lc, r2
+ .body
+ifdef(`HAVE_ABI_32',
+` addp4 rp = 0, rp C M I
+ addp4 up = 0, up C M I
+ sxt4 n = n C M I
+ zxt4 cnt = cnt C I
+ ;;
+')
+
+ {.mmi; nop 0 C M I
+ and r14 = 3, n C M I
+ mov.i r2 = ar.lc C I0
+}{.mmi; add r15 = -1, n C M I
+ sub tnc = 64, cnt C M I
+ nop 0
+ ;;
+}{.mmi; cmp.eq p6, p0 = 1, r14 C M I
+ cmp.eq p7, p0 = 2, r14 C M I
+ shr.u n = r15, 2 C I0
+}{.mmi; cmp.eq p8, p0 = 3, r14 C M I
+ shladd up = r15, 3, up C M I
+ shladd rp = r15, 3, rp C M I
+ ;;
+}{.mmi; add r11 = POFF, up C M I
+ ld8 r10 = [up], UPD C M01
+ mov.i ar.lc = n C I0
+}{.bbb;
+ (p6) br.dptk .Lb01
+ (p7) br.dptk .Lb10
+ (p8) br.dptk .Lb11
+ ;; }
+
+.Lb00:
+ ld8 r19 = [up], UPD
+ ;;
+ ld8 r16 = [up], UPD
+ ;;
+ ld8 r17 = [up], UPD
+ BSH r8 = r10, tnc
+ br.cloop.dptk L(gt4)
+ ;;
+ FSH r24 = r10, cnt
+ BSH r25 = r19, tnc
+ ;;
+ FSH r26 = r19, cnt
+ BSH r27 = r16, tnc
+ ;;
+ FSH r20 = r16, cnt
+ BSH r21 = r17, tnc
+ ;;
+ or r14 = r25, r24
+ FSH r22 = r17, cnt
+ ;;
+ or r15 = r27, r26
+ sub r31 = -1, r14
+ br .Lr4
+
+L(gt4):
+ {.mmi; nop 0
+ nop 0
+ FSH r24 = r10, cnt
+}{.mmi; ld8 r18 = [up], UPD
+ nop 0
+ BSH r25 = r19, tnc
+ ;; }
+ {.mmi; nop 0
+ nop 0
+ FSH r26 = r19, cnt
+}{.mmi; ld8 r19 = [up], UPD
+ nop 0
+ BSH r27 = r16, tnc
+ ;; }
+ {.mmi; nop 0
+ nop 0
+ FSH r20 = r16, cnt
+}{.mmi; ld8 r16 = [up], UPD
+ nop 0
+ BSH r21 = r17, tnc
+ ;; }
+ {.mmi; nop 0
+ or r14 = r25, r24
+ FSH r22 = r17, cnt
+}{.mib; ld8 r17 = [up], UPD
+ BSH r23 = r18, tnc
+ br.cloop.dptk L(gt8)
+ ;; }
+ {.mmi; nop 0
+ or r15 = r27, r26
+ FSH r24 = r18, cnt
+}{.mib; sub r31 = -1, r14
+ BSH r25 = r19, tnc
+ br .Lr8 }
+
+L(gt8):
+ or r15 = r27, r26
+ FSH r24 = r18, cnt
+ ld8 r18 = [up], UPD
+ sub r31 = -1, r14
+ BSH r25 = r19, tnc
+ br .LL00
+
+.Lb01:
+ br.cloop.dptk L(gt1)
+ ;;
+ BSH r8 = r10, tnc
+ FSH r22 = r10, cnt
+ ;;
+ sub r31 = -1, r22
+ br .Lr1
+ ;;
+L(gt1):
+ ld8 r18 = [up], UPD
+ BSH r8 = r10, tnc
+ FSH r22 = r10, cnt
+ ;;
+ ld8 r19 = [up], UPD
+ ;;
+ ld8 r16 = [up], UPD
+ ;;
+ ld8 r17 = [up], UPD
+ BSH r23 = r18, tnc
+ br.cloop.dptk L(gt5)
+ ;;
+ nop 0
+ FSH r24 = r18, cnt
+ BSH r25 = r19, tnc
+ ;;
+ nop 0
+ FSH r26 = r19, cnt
+ BSH r27 = r16, tnc
+ ;;
+ or r15 = r23, r22
+ FSH r20 = r16, cnt
+ BSH r21 = r17, tnc
+ ;;
+ or r14 = r25, r24
+ FSH r22 = r17, cnt
+ sub r31 = -1, r15
+ br .Lr5
+
+L(gt5):
+ {.mmi; nop 0
+ nop 0
+ FSH r24 = r18, cnt
+}{.mmi; ld8 r18 = [up], UPD
+ nop 0
+ BSH r25 = r19, tnc
+ ;; }
+ {.mmi; nop 0
+ nop 0
+ FSH r26 = r19, cnt
+}{.mmi; ld8 r19 = [up], UPD
+ nop 0
+ BSH r27 = r16, tnc
+ ;; }
+ {.mmi; nop 0
+ or r15 = r23, r22
+ FSH r20 = r16, cnt
+}{.mmi; ld8 r16 = [up], UPD
+ nop 0
+ BSH r21 = r17, tnc
+ ;; }
+ {.mmi; or r14 = r25, r24
+ sub r31 = -1, r15
+ FSH r22 = r17, cnt
+}{.mib; ld8 r17 = [up], UPD
+ BSH r23 = r18, tnc
+ br L(end)
+ ;; }
+
+.Lb10:
+ ld8 r17 = [up], UPD
+ br.cloop.dptk L(gt2)
+ ;;
+ BSH r8 = r10, tnc
+ FSH r20 = r10, cnt
+ ;;
+ BSH r21 = r17, tnc
+ FSH r22 = r17, cnt
+ ;;
+ or r14 = r21, r20
+ ;;
+ sub r31 = -1, r14
+ br .Lr2
+ ;;
+L(gt2):
+ ld8 r18 = [up], UPD
+ BSH r8 = r10, tnc
+ FSH r20 = r10, cnt
+ ;;
+ ld8 r19 = [up], UPD
+ ;;
+ ld8 r16 = [up], UPD
+ BSH r21 = r17, tnc
+ FSH r22 = r17, cnt
+ ;;
+ ld8 r17 = [up], UPD
+ BSH r23 = r18, tnc
+ br.cloop.dptk L(gt6)
+ ;;
+ nop 0
+ FSH r24 = r18, cnt
+ BSH r25 = r19, tnc
+ ;;
+ or r14 = r21, r20
+ FSH r26 = r19, cnt
+ BSH r27 = r16, tnc
+ ;;
+ {.mmi; nop 0
+ or r15 = r23, r22
+ FSH r20 = r16, cnt
+}{.mib; sub r31 = -1, r14
+ BSH r21 = r17, tnc
+ br .Lr6
+ ;; }
+L(gt6):
+ {.mmi; nop 0
+ nop 0
+ FSH r24 = r18, cnt
+}{.mmi; ld8 r18 = [up], UPD
+ nop 0
+ BSH r25 = r19, tnc
+ ;; }
+ {.mmi; nop 0
+ or r14 = r21, r20
+ FSH r26 = r19, cnt
+}{.mmi; ld8 r19 = [up], UPD
+ nop 0
+ BSH r27 = r16, tnc
+ ;; }
+ {.mmi; or r15 = r23, r22
+ sub r31 = -1, r14
+ FSH r20 = r16, cnt
+}{.mib; ld8 r16 = [up], UPD
+ BSH r21 = r17, tnc
+ br .LL10
+}
+
+.Lb11:
+ ld8 r16 = [up], UPD
+ ;;
+ ld8 r17 = [up], UPD
+ BSH r8 = r10, tnc
+ FSH r26 = r10, cnt
+ br.cloop.dptk L(gt3)
+ ;;
+ BSH r27 = r16, tnc
+ ;;
+ FSH r20 = r16, cnt
+ BSH r21 = r17, tnc
+ ;;
+ FSH r22 = r17, cnt
+ ;;
+ or r15 = r27, r26
+ ;;
+ or r14 = r21, r20
+ sub r31 = -1, r15
+ br .Lr3
+ ;;
+L(gt3):
+ ld8 r18 = [up], UPD
+ ;;
+ ld8 r19 = [up], UPD
+ BSH r27 = r16, tnc
+ ;;
+ {.mmi; nop 0
+ nop 0
+ FSH r20 = r16, cnt
+}{.mmi; ld8 r16 = [up], UPD
+ nop 0
+ BSH r21 = r17, tnc
+ ;; }
+ {.mmi nop 0
+ nop 0
+ FSH r22 = r17, cnt
+}{.mib; ld8 r17 = [up], UPD
+ BSH r23 = r18, tnc
+ br.cloop.dptk L(gt7)
+ ;; }
+ or r15 = r27, r26
+ FSH r24 = r18, cnt
+ BSH r25 = r19, tnc
+ ;;
+ {.mmi; nop 0
+ or r14 = r21, r20
+ FSH r26 = r19, cnt
+}{.mib; sub r31 = -1, r15
+ BSH r27 = r16, tnc
+ br .Lr7
+}
+L(gt7):
+ {.mmi; nop 0
+ or r15 = r27, r26
+ FSH r24 = r18, cnt
+}{.mmi; ld8 r18 = [up], UPD
+ nop 0
+ BSH r25 = r19, tnc
+ ;; }
+ {.mmi; or r14 = r21, r20
+ sub r31 = -1, r15
+ FSH r26 = r19, cnt
+}{.mib; ld8 r19 = [up], UPD
+ BSH r27 = r16, tnc
+ br .LL11
+}
+
+C *** MAIN LOOP START ***
+ ALIGN(32)
+L(top):
+.LL01:
+ {.mmi; st8 [rp] = r31, UPD C M2
+ or r15 = r27, r26 C M3
+ FSH r24 = r18, cnt C I0
+}{.mmi; ld8 r18 = [up], UPD C M0
+ sub r31 = -1, r14 C M1
+ BSH r25 = r19, tnc C I1
+ ;; }
+.LL00:
+ {.mmi; st8 [rp] = r31, UPD
+ or r14 = r21, r20
+ FSH r26 = r19, cnt
+}{.mmi; ld8 r19 = [up], UPD
+ sub r31 = -1, r15
+ BSH r27 = r16, tnc
+ ;; }
+.LL11:
+ {.mmi; st8 [rp] = r31, UPD
+ or r15 = r23, r22
+ FSH r20 = r16, cnt
+}{.mmi; ld8 r16 = [up], UPD
+ sub r31 = -1, r14
+ BSH r21 = r17, tnc
+ ;; }
+.LL10:
+ {.mmi; st8 [rp] = r31, UPD
+ or r14 = r25, r24
+ FSH r22 = r17, cnt
+}{.mmi; ld8 r17 = [up], UPD
+ sub r31 = -1, r15
+ BSH r23 = r18, tnc
+ ;; }
+L(end): lfetch [r11], PUPD
+ br.cloop.dptk L(top)
+C *** MAIN LOOP END ***
+
+ {.mmi; st8 [rp] = r31, UPD
+ or r15 = r27, r26
+ FSH r24 = r18, cnt
+}{.mib; sub r31 = -1, r14
+ BSH r25 = r19, tnc
+ nop 0
+ ;; }
+.Lr8:
+ {.mmi; st8 [rp] = r31, UPD
+ or r14 = r21, r20
+ FSH r26 = r19, cnt
+}{.mib; sub r31 = -1, r15
+ BSH r27 = r16, tnc
+ nop 0
+ ;; }
+.Lr7:
+ {.mmi; st8 [rp] = r31, UPD
+ or r15 = r23, r22
+ FSH r20 = r16, cnt
+}{.mib; sub r31 = -1, r14
+ BSH r21 = r17, tnc
+ nop 0
+ ;; }
+.Lr6: st8 [rp] = r31, UPD
+ or r14 = r25, r24
+ FSH r22 = r17, cnt
+ sub r31 = -1, r15
+ ;;
+.Lr5: st8 [rp] = r31, UPD
+ or r15 = r27, r26
+ sub r31 = -1, r14
+ ;;
+.Lr4: st8 [rp] = r31, UPD
+ or r14 = r21, r20
+ sub r31 = -1, r15
+ ;;
+.Lr3: st8 [rp] = r31, UPD
+ sub r31 = -1, r14
+ ;;
+.Lr2: st8 [rp] = r31, UPD
+ sub r31 = -1, r22
+ ;;
+.Lr1: st8 [rp] = r31, UPD C M23
+ mov ar.lc = r2 C I0
+ br.ret.sptk.many b0 C B
+EPILOGUE(func)
+ASM_END()
--- /dev/null
+dnl IA-64 mpn_mod_34lsub1
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2003, 2004, 2005, 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Itanium: ?
+C Itanium 2: 1
+
+
+C INPUT PARAMETERS
+define(`up', `r32')
+define(`n', `r33')
+
+C Some useful aliases for registers we use
+define(`u0',`r14') define(`u1',`r15') define(`u2',`r16')
+define(`a0',`r17') define(`a1',`r18') define(`a2',`r19')
+define(`c0',`r20') define(`c1',`r21') define(`c2',`r22')
+
+C This is a fairly simple-minded implementation. One could approach 0.67 c/l
+C with a more sophisticated implementation. If we're really crazy, we could
+C super-unroll, storing carries just in predicate registers, then copy them to
+C a general register, and population count them from there. That'd bring us
+C close to 3 insn/limb, for nearly 0.5 c/l.
+
+C Computing n/3 needs 16 cycles, which is a lot of startup overhead.
+C We therefore use a plain while-style loop:
+C add n = -3, n
+C cmp.le p9, p0 = 3, n
+C (p9) br.cond .Loop
+C Alternatively, we could table n/3 for, say, n < 256, and predicate the
+C 16-cycle code.
+
+C The summing-up code at the end was written quickly, and could surely be
+C vastly improved.
+
+ASM_START()
+PROLOGUE(mpn_mod_34lsub1)
+ .prologue
+ .save ar.lc, r2
+ .body
+ifdef(`HAVE_ABI_32',`
+ addp4 up = 0, up C M I
+ zxt4 n = n C I
+ ;;
+')
+
+ifelse(0,1,`
+ movl r14 = 0xAAAAAAAAAAAAAAAB
+ ;;
+ setf.sig f6 = r14
+ setf.sig f7 = r33
+ ;;
+ xmpy.hu f6 = f6, f7
+ ;;
+ getf.sig r8 = f6
+ ;;
+ shr.u r8 = r8, 1 C Loop count
+ ;;
+ mov.i ar.lc = r8
+')
+
+ ld8 u0 = [up], 8
+ cmp.ne p9, p0 = 1, n
+ (p9) br L(gt1)
+ ;;
+ shr.u r8 = u0, 48
+ dep.z r27 = u0, 0, 48
+ ;;
+ add r8 = r8, r27
+ br.ret.sptk.many b0
+
+
+L(gt1):
+.mmi; nop.m 0
+ mov a0 = 0
+ add n = -2, n
+.mmi; mov c0 = 0
+ mov c1 = 0
+ mov c2 = 0
+ ;;
+.mmi; ld8 u1 = [up], 8
+ mov a1 = 0
+ cmp.ltu p6, p0 = r0, r0 C clear p6
+.mmb; cmp.gt p9, p0 = 3, n
+ mov a2 = 0
+ (p9) br.cond.dptk L(end)
+ ;;
+
+ ALIGN(32)
+L(top):
+.mmi; ld8 u2 = [up], 8
+ (p6) add c0 = 1, c0
+ cmp.ltu p7, p0 = a0, u0
+.mmb; sub a0 = a0, u0
+ add n = -3, n
+ nop.b 0
+ ;;
+.mmi; ld8 u0 = [up], 8
+ (p7) add c1 = 1, c1
+ cmp.ltu p8, p0 = a1, u1
+.mmb; sub a1 = a1, u1
+ cmp.le p9, p0 = 3, n
+ nop.b 0
+ ;;
+.mmi; ld8 u1 = [up], 8
+ (p8) add c2 = 1, c2
+ cmp.ltu p6, p0 = a2, u2
+.mmb; sub a2 = a2, u2
+ nop.m 0
+dnl br.cloop.dptk L(top)
+ (p9) br.cond.dptk L(top)
+ ;;
+
+L(end):
+ cmp.eq p10, p0 = 0, n
+ cmp.eq p11, p0 = 1, n
+ (p10) br L(0)
+
+L(2):
+.mmi; ld8 u2 = [up], 8
+ (p6) add c0 = 1, c0
+ cmp.ltu p7, p0 = a0, u0
+.mmb; sub a0 = a0, u0
+ nop.m 0
+ (p11) br L(1)
+ ;;
+ ld8 u0 = [up], 8
+ (p7) add c1 = 1, c1
+ cmp.ltu p8, p0 = a1, u1
+ sub a1 = a1, u1
+ ;;
+ (p8) add c2 = 1, c2
+ cmp.ltu p6, p0 = a2, u2
+ sub a2 = a2, u2
+ ;;
+ (p6) add c0 = 1, c0
+ cmp.ltu p7, p0 = a0, u0
+ sub a0 = a0, u0
+ ;;
+ (p7) add c1 = 1, c1
+ br L(com)
+
+
+L(1):
+ (p7) add c1 = 1, c1
+ cmp.ltu p8, p0 = a1, u1
+ sub a1 = a1, u1
+ ;;
+ (p8) add c2 = 1, c2
+ cmp.ltu p6, p0 = a2, u2
+ sub a2 = a2, u2
+ ;;
+ (p6) add c0 = 1, c0
+ br L(com)
+
+
+L(0):
+ (p6) add c0 = 1, c0
+ cmp.ltu p7, p0 = a0, u0
+ sub a0 = a0, u0
+ ;;
+ (p7) add c1 = 1, c1
+ cmp.ltu p8, p0 = a1, u1
+ sub a1 = a1, u1
+ ;;
+ (p8) add c2 = 1, c2
+
+L(com):
+C | a2 | a1 | a0 |
+C | | | | |
+ shr.u r24 = a0, 48 C 16 bits
+ shr.u r25 = a1, 32 C 32 bits
+ shr.u r26 = a2, 16 C 48 bits
+ ;;
+ shr.u r10 = c0, 48 C 16 bits, always zero
+ shr.u r11 = c1, 32 C 32 bits
+ shr.u r30 = c2, 16 C 48 bits
+ ;;
+ dep.z r27 = a0, 0, 48 C 48 bits
+ dep.z r28 = a1, 16, 32 C 48 bits
+ dep.z r29 = a2, 32, 16 C 48 bits
+ dep.z r31 = c0, 0, 48 C 48 bits
+ dep.z r14 = c1, 16, 32 C 48 bits
+ dep.z r15 = c2, 32, 16 C 48 bits
+ ;;
+.mmi; add r24 = r24, r25
+ add r26 = r26, r27
+ add r28 = r28, r29
+.mmi; add r10 = r10, r11
+ add r30 = r30, r31
+ add r14 = r14, r15
+ ;;
+ movl r8 = 0xffffffffffff0
+ add r24 = r24, r26
+ add r10 = r10, r30
+ ;;
+ add r24 = r24, r28
+ add r10 = r10, r14
+ ;;
+ sub r8 = r8, r24
+ ;;
+ add r8 = r8, r10
+ br.ret.sptk.many b0
+EPILOGUE()
+ASM_END()
dnl Itanium-2 mpn_modexact_1c_odd -- mpn by 1 exact remainder.
+dnl Contributed to the GNU project by Kevin Ryde.
+
dnl Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl IA-64 mpn_mul_1, mpn_mul_1c -- Multiply a limb vector with a limb and
dnl store the result in a second limb vector.
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
dnl Copyright 2000, 2001, 2002, 2003, 2004, 2006, 2007 Free Software
dnl Foundation, Inc.
(p6) cmp.leu p8, p9 = r24, r17
(p7) cmp.ltu p8, p9 = r24, r17
;;
- .pred.rel "mutex",p8,p9
(p8) add r8 = 1, r8
mov.i ar.lc = r2
br.ret.sptk.many b0
dnl IA-64 mpn_mul_2 -- Multiply a n-limb number with a 2-limb number and store
dnl store the result to a (n+1)-limb number.
-dnl Copyright 2004 Free Software Foundation, Inc.
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2004, 2011 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
C cycles/limb
-C Itanium: 3.15
-C Itanium 2: 1.625
-
-C Note that this is very similar to addmul_2.asm. If you change this file,
-C please change that file too.
+C Itanium: ?
+C Itanium 2: 1.5
C TODO
C * Clean up variable names, and try to decrease the number of distinct
C registers used.
-C * Cleanup feed-in code to not require zeroing several registers.
+C * Clean up feed-in code to not require zeroing several registers.
C * Make sure we don't depend on uninitialized predicate registers.
-C * We currently cross-jump very aggressively, at the expense of a few cycles
-C per operation. Consider changing that.
C * Could perhaps save a few cycles by using 1 c/l carry propagation in
C wind-down code.
C * Ultimately rewrite. The problem with this code is that it first uses a
.save ar.lc, r2
.body
-ifdef(`HAVE_ABI_32',
-` addp4 rp = 0, rp C M I
- addp4 up = 0, up C M I
- addp4 vp = 0, vp C M I
- zxt4 n = n C I
+ifdef(`HAVE_ABI_32',`
+.mmi; addp4 rp = 0, rp C M I
+ addp4 up = 0, up C M I
+ addp4 vp = 0, vp C M I
+.mmi; nop 1
+ nop 1
+ zxt4 n = n C I
;;')
-{.mmi C 00
- ldf8 ux = [up], 8 C M
- ldf8 v0 = [vp], 8 C M
- mov.i r2 = ar.lc C I0
-}{.mmi
- nop 0 C M
- and r14 = 3, n C M I
- add n = -2, n C M I
- ;;
-}{.mmi C 01
- ldf8 uy = [up], 8 C M
- ldf8 v1 = [vp] C M
- shr.u n = n, 2 C I
-}{.mmi
- nop 0 C M
- cmp.eq p10, p0 = 1, r14 C M I
- cmp.eq p11, p0 = 2, r14 C M I
- ;;
-}{.mmi C 02
- nop 0 C M
- cmp.eq p12, p0 = 3, r14 C M I
- mov.i ar.lc = n C I0
-}{.bbb
- (p10) br.dptk .Lb01 C B
- (p11) br.dptk .Lb10 C B
- (p12) br.dptk .Lb11 C B
- ;;
-}
+.mmi; ldf8 ux = [up], 8 C M
+ ldf8 v0 = [vp], 8 C M
+ mov r2 = ar.lc C I0
+.mmi; nop 1 C M
+ and r14 = 3, n C M I
+ add n = -2, n C M I
+ ;;
+.mmi; ldf8 uy = [up], 8 C M
+ ldf8 v1 = [vp] C M
+ shr.u n = n, 2 C I
+.mmi; nop 1 C M
+ cmp.eq p10, p0 = 1, r14 C M I
+ cmp.eq p11, p0 = 2, r14 C M I
+ ;;
+.mmi; nop 1 C M
+ cmp.eq p12, p0 = 3, r14 C M I
+ mov ar.lc = n C I0
+.bbb; (p10) br.dptk L(b01) C B
+ (p11) br.dptk L(b10) C B
+ (p12) br.dptk L(b11) C B
+ ;;
ALIGN(32)
-.Lb00: ldf8 u_1 = [up], 8
- mov acc1_2 = 0
- mov pr1_2 = 0
- mov pr0_3 = 0
- cmp.ne p8, p9 = r0, r0
+L(b00): ldf8 u_1 = [up], 8
+ mov acc1_2 = 0
+ mov pr1_2 = 0
+ mov pr0_3 = 0
+ cmp.ne p8, p9 = r0, r0
;;
- xma.l fp0b_3 = ux, v0, f0
- cmp.ne p12, p13 = r0, r0
- ldf8 u_2 = [up], 8
- xma.hu fp1a_3 = ux, v0, f0
- br.cloop.dptk .grt4
+ xma.l fp0b_3 = ux, v0, f0
+ cmp.ne p12, p13 = r0, r0
+ ldf8 u_2 = [up], 8
+ xma.hu fp1a_3 = ux, v0, f0
+ br.cloop.dptk L(gt4)
- xma.l fp0b_0 = uy, v0, f0
- xma.hu fp1a_0 = uy, v0, f0
+ xma.l fp0b_0 = uy, v0, f0
+ xma.hu fp1a_0 = uy, v0, f0
;;
- getf.sig acc0 = fp0b_3
- xma.l fp1b_3 = ux, v1, fp1a_3
- xma.hu fp2a_3 = ux, v1, fp1a_3
+ getfsig acc0 = fp0b_3
+ xma.l fp1b_3 = ux, v1, fp1a_3
+ xma.hu fp2a_3 = ux, v1, fp1a_3
;;
- xma.l fp0b_1 = u_1, v0, f0
- xma.hu fp1a_1 = u_1, v0, f0
+ xma.l fp0b_1 = u_1, v0, f0
+ xma.hu fp1a_1 = u_1, v0, f0
;;
- getf.sig pr0_0 = fp0b_0
- xma.l fp1b_0 = uy, v1, fp1a_0
- xma.hu fp2a_0 = uy, v1, fp1a_0
+ getfsig pr0_0 = fp0b_0
+ xma.l fp1b_0 = uy, v1, fp1a_0
+ xma.hu fp2a_0 = uy, v1, fp1a_0
;;
- getf.sig pr1_3 = fp1b_3
- getf.sig acc1_3 = fp2a_3
- xma.l fp0b_2 = u_2, v0, f0
- xma.hu fp1a_2 = u_2, v0, f0
- br .Lcj4
+ getfsig pr1_3 = fp1b_3
+ getfsig acc1_3 = fp2a_3
+ xma.l fp0b_2 = u_2, v0, f0
+ xma.hu fp1a_2 = u_2, v0, f0
+ br L(cj4)
-.grt4: xma.l fp0b_0 = uy, v0, f0
- xma.hu fp1a_0 = uy, v0, f0
+L(gt4): xma.l fp0b_0 = uy, v0, f0
+ xma.hu fp1a_0 = uy, v0, f0
;;
- getf.sig acc0 = fp0b_3
- xma.l fp1b_3 = ux, v1, fp1a_3
- ldf8 u_3 = [up], 8
- xma.hu fp2a_3 = ux, v1, fp1a_3
+ getfsig acc0 = fp0b_3
+ xma.l fp1b_3 = ux, v1, fp1a_3
+ ldf8 u_3 = [up], 8
+ xma.hu fp2a_3 = ux, v1, fp1a_3
;;
- xma.l fp0b_1 = u_1, v0, f0
- xma.hu fp1a_1 = u_1, v0, f0
+ xma.l fp0b_1 = u_1, v0, f0
+ xma.hu fp1a_1 = u_1, v0, f0
;;
- getf.sig pr0_0 = fp0b_0
- xma.l fp1b_0 = uy, v1, fp1a_0
- xma.hu fp2a_0 = uy, v1, fp1a_0
+ getfsig pr0_0 = fp0b_0
+ xma.l fp1b_0 = uy, v1, fp1a_0
+ xma.hu fp2a_0 = uy, v1, fp1a_0
;;
- ldf8 u_0 = [up], 8
- getf.sig pr1_3 = fp1b_3
+ ldf8 u_0 = [up], 8
+ getfsig pr1_3 = fp1b_3
+ xma.l fp0b_2 = u_2, v0, f0
;;
- getf.sig acc1_3 = fp2a_3
- xma.l fp0b_2 = u_2, v0, f0
- xma.hu fp1a_2 = u_2, v0, f0
- br .LL00
+ getfsig acc1_3 = fp2a_3
+ xma.hu fp1a_2 = u_2, v0, f0
+ br L(00)
ALIGN(32)
-.Lb01: ldf8 u_0 = [up], 8 C M
- mov acc1_1 = 0 C M I
- mov pr1_1 = 0 C M I
- mov pr0_2 = 0 C M I
- cmp.ne p6, p7 = r0, r0 C M I
+L(b01): ldf8 u_0 = [up], 8 C M
+ mov acc1_1 = 0 C M I
+ mov pr1_1 = 0 C M I
+ mov pr0_2 = 0 C M I
+ cmp.ne p6, p7 = r0, r0 C M I
;;
- xma.l fp0b_2 = ux, v0, f0 C F
- cmp.ne p10, p11 = r0, r0 C M I
- ldf8 u_1 = [up], 8 C M
- xma.hu fp1a_2 = ux, v0, f0 C F
+ xma.l fp0b_2 = ux, v0, f0 C F
+ cmp.ne p10, p11 = r0, r0 C M I
+ ldf8 u_1 = [up], 8 C M
+ xma.hu fp1a_2 = ux, v0, f0 C F
;;
- xma.l fp0b_3 = uy, v0, f0 C F
- xma.hu fp1a_3 = uy, v0, f0 C F
+ xma.l fp0b_3 = uy, v0, f0 C F
+ xma.hu fp1a_3 = uy, v0, f0 C F
;;
- getf.sig acc0 = fp0b_2 C M
- xma.l fp1b_2 = ux, v1,fp1a_2 C F
- xma.hu fp2a_2 = ux, v1,fp1a_2 C F
- ldf8 u_2 = [up], 8 C M
- br.cloop.dptk .grt5
+ getfsig acc0 = fp0b_2 C M
+ xma.l fp1b_2 = ux, v1,fp1a_2 C F
+ ldf8 u_2 = [up], 8 C M
+ xma.hu fp2a_2 = ux, v1,fp1a_2 C F
+ br.cloop.dptk L(gt5)
- xma.l fp0b_0 = u_0, v0, f0 C F
- xma.hu fp1a_0 = u_0, v0, f0 C F
+ xma.l fp0b_0 = u_0, v0, f0 C F
+ xma.hu fp1a_0 = u_0, v0, f0 C F
;;
- getf.sig pr0_3 = fp0b_3 C M
- xma.l fp1b_3 = uy, v1,fp1a_3 C F
- xma.hu fp2a_3 = uy, v1,fp1a_3 C F
+ getfsig pr0_3 = fp0b_3 C M
+ xma.l fp1b_3 = uy, v1,fp1a_3 C F
+ xma.hu fp2a_3 = uy, v1,fp1a_3 C F
;;
- getf.sig pr1_2 = fp1b_2 C M
- getf.sig acc1_2 = fp2a_2 C M
- xma.l fp0b_1 = u_1, v0, f0 C F
- xma.hu fp1a_1 = u_1, v0, f0 C F
- br .Lcj5
+ getfsig pr1_2 = fp1b_2 C M
+ getfsig acc1_2 = fp2a_2 C M
+ xma.l fp0b_1 = u_1, v0, f0 C F
+ xma.hu fp1a_1 = u_1, v0, f0 C F
+ br L(cj5)
-.grt5: xma.l fp0b_0 = u_0, v0, f0
- xma.hu fp1a_0 = u_0, v0, f0
+L(gt5): xma.l fp0b_0 = u_0, v0, f0
+ xma.hu fp1a_0 = u_0, v0, f0
;;
- getf.sig pr0_3 = fp0b_3
- xma.l fp1b_3 = uy, v1, fp1a_3
- xma.hu fp2a_3 = uy, v1, fp1a_3
+ getfsig pr0_3 = fp0b_3
+ xma.l fp1b_3 = uy, v1, fp1a_3
+ xma.hu fp2a_3 = uy, v1, fp1a_3
;;
- ldf8 u_3 = [up], 8
- getf.sig pr1_2 = fp1b_2
+ ldf8 u_3 = [up], 8
+ getfsig pr1_2 = fp1b_2
+ xma.l fp0b_1 = u_1, v0, f0
;;
- getf.sig acc1_2 = fp2a_2
- xma.l fp0b_1 = u_1, v0, f0
- xma.hu fp1a_1 = u_1, v0, f0
- br .LL01
+ getfsig acc1_2 = fp2a_2
+ xma.hu fp1a_1 = u_1, v0, f0
+ br L(01)
-C We have two variants for n = 2. They turn out to run at exactly the same
-C speed. But the first, odd variant might allow one cycle to be trimmed.
ALIGN(32)
-ifdef(`',`
-.Lb10: C 03
- br.cloop.dptk .grt2
- C 04
- C 05
- C 06
- xma.l fp0b_1 = ux, v0, f0 C 0
- xma.hu fp1a_1 = ux, v0, f0 C 1
- ;; C 07
- xma.l fp0b_2 = uy, v0, f0 C 1
- xma.l fp1b_1 = ux, v1, f0 C 1
- ;; C 08
- xma.hu fp1a_2 = uy, v0, f0 C 2
- xma.hu fp2a_1 = ux, v1, f0 C 2
- ;; C 09
- xma.l fp1b_2 = uy, v1, f0 C 2
- xma.hu fp2a_2 = uy, v1, f0 C 3
- ;; C 10
- getf.sig r16 = fp1a_1
- stf8 [rp] = fp0b_1, 8
- ;; C 11
- getf.sig r17 = fp0b_2
- C 12
- getf.sig r18 = fp1b_1
- C 13
- getf.sig r19 = fp1a_2
- C 14
- getf.sig r20 = fp2a_1
- C 15
- getf.sig r21 = fp1b_2
- ;; C 16
- getf.sig r8 = fp2a_2
- add r24 = r16, r17
- ;; C 17
- cmp.ltu p6, p7 = r24, r16
- add r26 = r24, r18
- ;; C 18
- cmp.ltu p8, p9 = r26, r24
- ;; C 19
- st8 [rp] = r26, 8
- (p6) add r25 = r19, r20, 1
- (p7) add r25 = r19, r20
- ;; C 20
- (p8) add r27 = r25, r21, 1
- (p9) add r27 = r25, r21
- (p6) cmp.leu p10, p0 = r25, r19
- (p7) cmp.ltu p10, p0 = r25, r19
- ;; C 21
- (p10) add r8 = 1, r8
- (p8) cmp.leu p12, p0 = r27, r25
- (p9) cmp.ltu p12, p0 = r27, r25
- ;; C 22
- st8 [rp] = r27, 8
- mov.i ar.lc = r2
- (p12) add r8 = 1, r8
- br.ret.sptk.many b0
-')
-
-.Lb10: C 03
- br.cloop.dptk .grt2
- C 04
- C 05
- C 06
- xma.l fp0b_1 = ux, v0, f0
- xma.hu fp1a_1 = ux, v0, f0
- ;; C 07
- xma.l fp0b_2 = uy, v0, f0
- xma.hu fp1a_2 = uy, v0, f0
- ;; C 08
- C 09
- C 10
- stf8 [rp] = fp0b_1, 8
- xma.l fp1b_1 = ux, v1, fp1a_1
- xma.hu fp2a_1 = ux, v1, fp1a_1
- ;; C 11
- getf.sig acc0 = fp0b_2
- xma.l fp1b_2 = uy, v1, fp1a_2
- xma.hu fp2a_2 = uy, v1, fp1a_2
- ;; C 12
- C 13
- C 14
- getf.sig pr1_1 = fp1b_1
- C 15
- getf.sig acc1_1 = fp2a_1
- C 16
- getf.sig pr1_2 = fp1b_2
- C 17
- getf.sig r8 = fp2a_2
- ;; C 18
- C 19
- add s0 = pr1_1, acc0
- ;; C 20
- st8 [rp] = s0, 8
- cmp.ltu p8, p9 = s0, pr1_1
- sub r31 = -1, acc1_1
- ;; C 21
- .pred.rel "mutex", p8, p9
- (p8) add acc0 = pr1_2, acc1_1, 1
- (p9) add acc0 = pr1_2, acc1_1
- (p8) cmp.leu p10, p0 = r31, pr1_2
- (p9) cmp.ltu p10, p0 = r31, pr1_2
- ;; C 22
- st8 [rp] = acc0, 8
- mov.i ar.lc = r2
- (p10) add r8 = 1, r8
- br.ret.sptk.many b0
-
-
-.grt2: ldf8 u_3 = [up], 8
- mov acc1_0 = 0
- mov pr1_0 = 0
- ;;
- mov pr0_1 = 0
- xma.l fp0b_1 = ux, v0, f0
- ldf8 u_0 = [up], 8
- xma.hu fp1a_1 = ux, v0, f0
- ;;
- xma.l fp0b_2 = uy, v0, f0
- xma.hu fp1a_2 = uy, v0, f0
- ;;
- getf.sig acc0 = fp0b_1
- xma.l fp1b_1 = ux, v1, fp1a_1
- xma.hu fp2a_1 = ux, v1, fp1a_1
- ;;
- ldf8 u_1 = [up], 8
- xma.l fp0b_3 = u_3, v0, f0
- xma.hu fp1a_3 = u_3, v0, f0
- ;;
- getf.sig pr0_2 = fp0b_2
- xma.l fp1b_2 = uy, v1, fp1a_2
- xma.hu fp2a_2 = uy, v1, fp1a_2
- ;;
- ldf8 u_2 = [up], 8
- getf.sig pr1_1 = fp1b_1
- ;;
- getf.sig acc1_1 = fp2a_1
- xma.l fp0b_0 = u_0, v0, f0
- cmp.ne p8, p9 = r0, r0
- cmp.ne p12, p13 = r0, r0
- xma.hu fp1a_0 = u_0, v0, f0
- br .LL10
+L(b10): br.cloop.dptk L(gt2)
+ xma.l fp0b_1 = ux, v0, f0
+ xma.hu fp1a_1 = ux, v0, f0
+ ;;
+ xma.l fp0b_2 = uy, v0, f0
+ xma.hu fp1a_2 = uy, v0, f0
+ ;;
+ stf8 [rp] = fp0b_1, 8
+ xma.l fp1b_1 = ux, v1, fp1a_1
+ xma.hu fp2a_1 = ux, v1, fp1a_1
+ ;;
+ getfsig acc0 = fp0b_2
+ xma.l fp1b_2 = uy, v1, fp1a_2
+ xma.hu fp2a_2 = uy, v1, fp1a_2
+ ;;
+ getfsig pr1_1 = fp1b_1
+ getfsig acc1_1 = fp2a_1
+ mov ar.lc = r2
+ getfsig pr1_2 = fp1b_2
+ getfsig r8 = fp2a_2
+ ;;
+ add s0 = pr1_1, acc0
+ ;;
+ st8 [rp] = s0, 8
+ cmp.ltu p8, p9 = s0, pr1_1
+ sub r31 = -1, acc1_1
+ ;;
+ .pred.rel "mutex", p8, p9
+ (p8) add acc0 = pr1_2, acc1_1, 1
+ (p9) add acc0 = pr1_2, acc1_1
+ (p8) cmp.leu p10, p0 = r31, pr1_2
+ (p9) cmp.ltu p10, p0 = r31, pr1_2
+ ;;
+ st8 [rp] = acc0, 8
+ (p10) add r8 = 1, r8
+ br.ret.sptk.many b0
+
+L(gt2): ldf8 u_3 = [up], 8
+ mov acc1_0 = 0
+ mov pr1_0 = 0
+ ;;
+ mov pr0_1 = 0
+ xma.l fp0b_1 = ux, v0, f0
+ ldf8 u_0 = [up], 8
+ xma.hu fp1a_1 = ux, v0, f0
+ ;;
+ xma.l fp0b_2 = uy, v0, f0
+ xma.hu fp1a_2 = uy, v0, f0
+ ;;
+ getfsig acc0 = fp0b_1
+ xma.l fp1b_1 = ux, v1, fp1a_1
+ xma.hu fp2a_1 = ux, v1, fp1a_1
+ ;;
+ ldf8 u_1 = [up], 8
+ xma.l fp0b_3 = u_3, v0, f0
+ xma.hu fp1a_3 = u_3, v0, f0
+ ;;
+ getfsig pr0_2 = fp0b_2
+ xma.l fp1b_2 = uy, v1, fp1a_2
+ xma.hu fp2a_2 = uy, v1, fp1a_2
+ ;;
+ ldf8 u_2 = [up], 8
+ getfsig pr1_1 = fp1b_1
+ ;;
+.mfi; getfsig acc1_1 = fp2a_1
+ xma.l fp0b_0 = u_0, v0, f0
+ cmp.ne p8, p9 = r0, r0
+.mfb; cmp.ne p12, p13 = r0, r0
+ xma.hu fp1a_0 = u_0, v0, f0
+ br L(10)
ALIGN(32)
-.Lb11: mov acc1_3 = 0
- mov pr1_3 = 0
- mov pr0_0 = 0
- cmp.ne p6, p7 = r0, r0
- ;;
- ldf8 u_2 = [up], 8
- br.cloop.dptk .grt3
+L(b11): mov acc1_3 = 0
+ mov pr1_3 = 0
+ mov pr0_0 = 0
+ ldf8 u_2 = [up], 8
+ cmp.ne p6, p7 = r0, r0
+ br.cloop.dptk L(gt3)
;;
- xma.l fp0b_0 = ux, v0, f0
- xma.hu fp1a_0 = ux, v0, f0
+ xma.l fp0b_0 = ux, v0, f0
+ xma.hu fp1a_0 = ux, v0, f0
;;
- cmp.ne p10, p11 = r0, r0
- xma.l fp0b_1 = uy, v0, f0
- xma.hu fp1a_1 = uy, v0, f0
+ cmp.ne p10, p11 = r0, r0
+ xma.l fp0b_1 = uy, v0, f0
+ xma.hu fp1a_1 = uy, v0, f0
;;
- getf.sig acc0 = fp0b_0
- xma.l fp1b_0 = ux, v1, fp1a_0
- xma.hu fp2a_0 = ux, v1, fp1a_0
+ getfsig acc0 = fp0b_0
+ xma.l fp1b_0 = ux, v1, fp1a_0
+ xma.hu fp2a_0 = ux, v1, fp1a_0
;;
- xma.l fp0b_2 = u_2, v0, f0
- xma.hu fp1a_2 = u_2, v0, f0
+ xma.l fp0b_2 = u_2, v0, f0
+ xma.hu fp1a_2 = u_2, v0, f0
;;
- getf.sig pr0_1 = fp0b_1
- xma.l fp1b_1 = uy, v1, fp1a_1
- xma.hu fp2a_1 = uy, v1, fp1a_1
+ getfsig pr0_1 = fp0b_1
+ xma.l fp1b_1 = uy, v1, fp1a_1
+ xma.hu fp2a_1 = uy, v1, fp1a_1
;;
- getf.sig pr1_0 = fp1b_0
- getf.sig acc1_0 = fp2a_0
- br .Lcj3
+ getfsig pr1_0 = fp1b_0
+ getfsig acc1_0 = fp2a_0
+ br L(cj3)
-.grt3: xma.l fp0b_0 = ux, v0, f0
- cmp.ne p10, p11 = r0, r0
- ldf8 u_3 = [up], 8
- xma.hu fp1a_0 = ux, v0, f0
+L(gt3): xma.l fp0b_0 = ux, v0, f0
+ cmp.ne p10, p11 = r0, r0
+ ldf8 u_3 = [up], 8
+ xma.hu fp1a_0 = ux, v0, f0
;;
- xma.l fp0b_1 = uy, v0, f0
- xma.hu fp1a_1 = uy, v0, f0
+ xma.l fp0b_1 = uy, v0, f0
+ xma.hu fp1a_1 = uy, v0, f0
;;
- getf.sig acc0 = fp0b_0
- xma.l fp1b_0 = ux, v1, fp1a_0
- ldf8 u_0 = [up], 8
- xma.hu fp2a_0 = ux, v1, fp1a_0
+ getfsig acc0 = fp0b_0
+ xma.l fp1b_0 = ux, v1, fp1a_0
+ ldf8 u_0 = [up], 8
+ xma.hu fp2a_0 = ux, v1, fp1a_0
;;
- xma.l fp0b_2 = u_2, v0, f0
- xma.hu fp1a_2 = u_2, v0, f0
+ xma.l fp0b_2 = u_2, v0, f0
+ xma.hu fp1a_2 = u_2, v0, f0
;;
- getf.sig pr0_1 = fp0b_1
- xma.l fp1b_1 = uy, v1, fp1a_1
- xma.hu fp2a_1 = uy, v1, fp1a_1
+ getfsig pr0_1 = fp0b_1
+ xma.l fp1b_1 = uy, v1, fp1a_1
+ xma.hu fp2a_1 = uy, v1, fp1a_1
;;
- ldf8 u_1 = [up], 8
- getf.sig pr1_0 = fp1b_0
+ ldf8 u_1 = [up], 8
+ getfsig pr1_0 = fp1b_0
;;
- getf.sig acc1_0 = fp2a_0
- xma.l fp0b_3 = u_3, v0, f0
- xma.hu fp1a_3 = u_3, v0, f0
- br .LL11
+ getfsig acc1_0 = fp2a_0
+ xma.l fp0b_3 = u_3, v0, f0
+ xma.hu fp1a_3 = u_3, v0, f0
+ br L(11)
C *** MAIN LOOP START ***
ALIGN(32)
-.Loop: C 00
- .pred.rel "mutex", p12, p13
- getf.sig pr0_3 = fp0b_3
- xma.l fp1b_3 = u_3, v1, fp1a_3
- (p12) add s0 = pr1_0, acc0, 1
- (p13) add s0 = pr1_0, acc0
- xma.hu fp2a_3 = u_3, v1, fp1a_3
+L(top): C 00
+ .pred.rel "mutex", p8, p9
+ .pred.rel "mutex", p12, p13
+ ldf8 u_3 = [up], 8
+ getfsig pr1_2 = fp1b_2
+ (p8) cmp.leu p6, p7 = acc0, pr0_1
+ (p9) cmp.ltu p6, p7 = acc0, pr0_1
+ (p12) cmp.leu p10, p11 = s0, pr1_0
+ (p13) cmp.ltu p10, p11 = s0, pr1_0
;; C 01
- .pred.rel "mutex", p8, p9
- .pred.rel "mutex", p12, p13
- ldf8 u_3 = [up], 8
- getf.sig pr1_2 = fp1b_2
- (p8) cmp.leu p6, p7 = acc0, pr0_1
- (p9) cmp.ltu p6, p7 = acc0, pr0_1
- (p12) cmp.leu p10, p11 = s0, pr1_0
- (p13) cmp.ltu p10, p11 = s0, pr1_0
+ .pred.rel "mutex", p6, p7
+ getfsig acc1_2 = fp2a_2
+ st8 [rp] = s0, 8
+ xma.l fp0b_1 = u_1, v0, f0
+ (p6) add acc0 = pr0_2, acc1_0, 1
+ (p7) add acc0 = pr0_2, acc1_0
+ xma.hu fp1a_1 = u_1, v0, f0
;; C 02
- .pred.rel "mutex", p6, p7
- getf.sig acc1_2 = fp2a_2
- st8 [rp] = s0, 8
- xma.l fp0b_1 = u_1, v0, f0
- (p6) add acc0 = pr0_2, acc1_0, 1
- (p7) add acc0 = pr0_2, acc1_0
- xma.hu fp1a_1 = u_1, v0, f0
+L(01):
+ .pred.rel "mutex", p10, p11
+ getfsig pr0_0 = fp0b_0
+ xma.l fp1b_0 = u_0, v1, fp1a_0
+ (p10) add s0 = pr1_1, acc0, 1
+ (p11) add s0 = pr1_1, acc0
+ xma.hu fp2a_0 = u_0, v1, fp1a_0
+ nop 1
;; C 03
-.LL01:
- .pred.rel "mutex", p10, p11
- getf.sig pr0_0 = fp0b_0
- xma.l fp1b_0 = u_0, v1, fp1a_0
- (p10) add s0 = pr1_1, acc0, 1
- (p11) add s0 = pr1_1, acc0
- xma.hu fp2a_0 = u_0, v1, fp1a_0
+ .pred.rel "mutex", p6, p7
+ .pred.rel "mutex", p10, p11
+ ldf8 u_0 = [up], 8
+ getfsig pr1_3 = fp1b_3
+ (p6) cmp.leu p8, p9 = acc0, pr0_2
+ (p7) cmp.ltu p8, p9 = acc0, pr0_2
+ (p10) cmp.leu p12, p13 = s0, pr1_1
+ (p11) cmp.ltu p12, p13 = s0, pr1_1
;; C 04
- .pred.rel "mutex", p6, p7
- .pred.rel "mutex", p10, p11
- ldf8 u_0 = [up], 8
- getf.sig pr1_3 = fp1b_3
- (p6) cmp.leu p8, p9 = acc0, pr0_2
- (p7) cmp.ltu p8, p9 = acc0, pr0_2
- (p10) cmp.leu p12, p13 = s0, pr1_1
- (p11) cmp.ltu p12, p13 = s0, pr1_1
+ .pred.rel "mutex", p8, p9
+ getfsig acc1_3 = fp2a_3
+ st8 [rp] = s0, 8
+ xma.l fp0b_2 = u_2, v0, f0
+ (p8) add acc0 = pr0_3, acc1_1, 1
+ (p9) add acc0 = pr0_3, acc1_1
+ xma.hu fp1a_2 = u_2, v0, f0
;; C 05
- .pred.rel "mutex", p8, p9
- getf.sig acc1_3 = fp2a_3
- st8 [rp] = s0, 8
- xma.l fp0b_2 = u_2, v0, f0
- (p8) add acc0 = pr0_3, acc1_1, 1
- (p9) add acc0 = pr0_3, acc1_1
- xma.hu fp1a_2 = u_2, v0, f0
+L(00):
+ .pred.rel "mutex", p12, p13
+ getfsig pr0_1 = fp0b_1
+ xma.l fp1b_1 = u_1, v1, fp1a_1
+ (p12) add s0 = pr1_2, acc0, 1
+ (p13) add s0 = pr1_2, acc0
+ xma.hu fp2a_1 = u_1, v1, fp1a_1
+ nop 1
;; C 06
-.LL00:
- .pred.rel "mutex", p12, p13
- getf.sig pr0_1 = fp0b_1
- xma.l fp1b_1 = u_1, v1, fp1a_1
- (p12) add s0 = pr1_2, acc0, 1
- (p13) add s0 = pr1_2, acc0
- xma.hu fp2a_1 = u_1, v1, fp1a_1
+ .pred.rel "mutex", p8, p9
+ .pred.rel "mutex", p12, p13
+ ldf8 u_1 = [up], 8
+ getfsig pr1_0 = fp1b_0
+ (p8) cmp.leu p6, p7 = acc0, pr0_3
+ (p9) cmp.ltu p6, p7 = acc0, pr0_3
+ (p12) cmp.leu p10, p11 = s0, pr1_2
+ (p13) cmp.ltu p10, p11 = s0, pr1_2
;; C 07
- .pred.rel "mutex", p8, p9
- .pred.rel "mutex", p12, p13
- ldf8 u_1 = [up], 8
- getf.sig pr1_0 = fp1b_0
- (p8) cmp.leu p6, p7 = acc0, pr0_3
- (p9) cmp.ltu p6, p7 = acc0, pr0_3
- (p12) cmp.leu p10, p11 = s0, pr1_2
- (p13) cmp.ltu p10, p11 = s0, pr1_2
+ .pred.rel "mutex", p6, p7
+ getfsig acc1_0 = fp2a_0
+ st8 [rp] = s0, 8
+ xma.l fp0b_3 = u_3, v0, f0
+ (p6) add acc0 = pr0_0, acc1_2, 1
+ (p7) add acc0 = pr0_0, acc1_2
+ xma.hu fp1a_3 = u_3, v0, f0
;; C 08
- .pred.rel "mutex", p6, p7
- getf.sig acc1_0 = fp2a_0
- st8 [rp] = s0, 8
- xma.l fp0b_3 = u_3, v0, f0
- (p6) add acc0 = pr0_0, acc1_2, 1
- (p7) add acc0 = pr0_0, acc1_2
- xma.hu fp1a_3 = u_3, v0, f0
+L(11):
+ .pred.rel "mutex", p10, p11
+ getfsig pr0_2 = fp0b_2
+ xma.l fp1b_2 = u_2, v1, fp1a_2
+ (p10) add s0 = pr1_3, acc0, 1
+ (p11) add s0 = pr1_3, acc0
+ xma.hu fp2a_2 = u_2, v1, fp1a_2
+ nop 1
;; C 09
-.LL11:
- .pred.rel "mutex", p10, p11
- getf.sig pr0_2 = fp0b_2
- xma.l fp1b_2 = u_2, v1, fp1a_2
- (p10) add s0 = pr1_3, acc0, 1
- (p11) add s0 = pr1_3, acc0
- xma.hu fp2a_2 = u_2, v1, fp1a_2
+ .pred.rel "mutex", p6, p7
+ .pred.rel "mutex", p10, p11
+ ldf8 u_2 = [up], 8
+ getfsig pr1_1 = fp1b_1
+ (p6) cmp.leu p8, p9 = acc0, pr0_0
+ (p7) cmp.ltu p8, p9 = acc0, pr0_0
+ (p10) cmp.leu p12, p13 = s0, pr1_3
+ (p11) cmp.ltu p12, p13 = s0, pr1_3
;; C 10
- .pred.rel "mutex", p6, p7
- .pred.rel "mutex", p10, p11
- ldf8 u_2 = [up], 8
- getf.sig pr1_1 = fp1b_1
- (p6) cmp.leu p8, p9 = acc0, pr0_0
- (p7) cmp.ltu p8, p9 = acc0, pr0_0
- (p10) cmp.leu p12, p13 = s0, pr1_3
- (p11) cmp.ltu p12, p13 = s0, pr1_3
+ .pred.rel "mutex", p8, p9
+ getfsig acc1_1 = fp2a_1
+ st8 [rp] = s0, 8
+ xma.l fp0b_0 = u_0, v0, f0
+ (p8) add acc0 = pr0_1, acc1_3, 1
+ (p9) add acc0 = pr0_1, acc1_3
+ xma.hu fp1a_0 = u_0, v0, f0
;; C 11
- .pred.rel "mutex", p8, p9
- getf.sig acc1_1 = fp2a_1
- st8 [rp] = s0, 8
- xma.l fp0b_0 = u_0, v0, f0
- (p8) add acc0 = pr0_1, acc1_3, 1
- (p9) add acc0 = pr0_1, acc1_3
- xma.hu fp1a_0 = u_0, v0, f0
-.LL10: br.cloop.dptk .Loop C 12
+L(10):
+ .pred.rel "mutex", p12, p13
+ getfsig pr0_3 = fp0b_3
+ xma.l fp1b_3 = u_3, v1, fp1a_3
+ (p12) add s0 = pr1_0, acc0, 1
+ (p13) add s0 = pr1_0, acc0
+ xma.hu fp2a_3 = u_3, v1, fp1a_3
+ br.cloop.dptk L(top)
;;
C *** MAIN LOOP END ***
-.Lcj6:
- .pred.rel "mutex", p12, p13
- getf.sig pr0_3 = fp0b_3
- xma.l fp1b_3 = u_3, v1, fp1a_3
- (p12) add s0 = pr1_0, acc0, 1
- (p13) add s0 = pr1_0, acc0
- xma.hu fp2a_3 = u_3, v1, fp1a_3
- ;;
- .pred.rel "mutex", p8, p9
- .pred.rel "mutex", p12, p13
- getf.sig pr1_2 = fp1b_2
- (p8) cmp.leu p6, p7 = acc0, pr0_1
- (p9) cmp.ltu p6, p7 = acc0, pr0_1
- (p12) cmp.leu p10, p11 = s0, pr1_0
- (p13) cmp.ltu p10, p11 = s0, pr1_0
- ;;
- .pred.rel "mutex", p6, p7
- getf.sig acc1_2 = fp2a_2
- st8 [rp] = s0, 8
- xma.l fp0b_1 = u_1, v0, f0
- (p6) add acc0 = pr0_2, acc1_0, 1
- (p7) add acc0 = pr0_2, acc1_0
- xma.hu fp1a_1 = u_1, v0, f0
- ;;
-.Lcj5:
- .pred.rel "mutex", p10, p11
- getf.sig pr0_0 = fp0b_0
- xma.l fp1b_0 = u_0, v1, fp1a_0
- (p10) add s0 = pr1_1, acc0, 1
- (p11) add s0 = pr1_1, acc0
- xma.hu fp2a_0 = u_0, v1, fp1a_0
- ;;
- .pred.rel "mutex", p6, p7
- .pred.rel "mutex", p10, p11
- getf.sig pr1_3 = fp1b_3
- (p6) cmp.leu p8, p9 = acc0, pr0_2
- (p7) cmp.ltu p8, p9 = acc0, pr0_2
- (p10) cmp.leu p12, p13 = s0, pr1_1
- (p11) cmp.ltu p12, p13 = s0, pr1_1
- ;;
- .pred.rel "mutex", p8, p9
- getf.sig acc1_3 = fp2a_3
- st8 [rp] = s0, 8
- xma.l fp0b_2 = u_2, v0, f0
- (p8) add acc0 = pr0_3, acc1_1, 1
- (p9) add acc0 = pr0_3, acc1_1
- xma.hu fp1a_2 = u_2, v0, f0
- ;;
-.Lcj4:
- .pred.rel "mutex", p12, p13
- getf.sig pr0_1 = fp0b_1
- xma.l fp1b_1 = u_1, v1, fp1a_1
- (p12) add s0 = pr1_2, acc0, 1
- (p13) add s0 = pr1_2, acc0
- xma.hu fp2a_1 = u_1, v1, fp1a_1
- ;;
- .pred.rel "mutex", p8, p9
- .pred.rel "mutex", p12, p13
- getf.sig pr1_0 = fp1b_0
- (p8) cmp.leu p6, p7 = acc0, pr0_3
- (p9) cmp.ltu p6, p7 = acc0, pr0_3
- (p12) cmp.leu p10, p11 = s0, pr1_2
- (p13) cmp.ltu p10, p11 = s0, pr1_2
- ;;
- .pred.rel "mutex", p6, p7
- getf.sig acc1_0 = fp2a_0
- st8 [rp] = s0, 8
- (p6) add acc0 = pr0_0, acc1_2, 1
- (p7) add acc0 = pr0_0, acc1_2
- ;;
-.Lcj3:
- .pred.rel "mutex", p10, p11
- getf.sig pr0_2 = fp0b_2
- xma.l fp1b_2 = u_2, v1, fp1a_2
- (p10) add s0 = pr1_3, acc0, 1
- (p11) add s0 = pr1_3, acc0
- xma.hu fp2a_2 = u_2, v1, fp1a_2
- ;;
- .pred.rel "mutex", p6, p7
- .pred.rel "mutex", p10, p11
- getf.sig pr1_1 = fp1b_1
- (p6) cmp.leu p8, p9 = acc0, pr0_0
- (p7) cmp.ltu p8, p9 = acc0, pr0_0
- (p10) cmp.leu p12, p13 = s0, pr1_3
- (p11) cmp.ltu p12, p13 = s0, pr1_3
- ;;
- .pred.rel "mutex", p8, p9
- getf.sig acc1_1 = fp2a_1
- st8 [rp] = s0, 8
- (p8) add acc0 = pr0_1, acc1_3, 1
- (p9) add acc0 = pr0_1, acc1_3
- ;;
- .pred.rel "mutex", p12, p13
- (p12) add s0 = pr1_0, acc0, 1
- (p13) add s0 = pr1_0, acc0
- ;;
- .pred.rel "mutex", p8, p9
- .pred.rel "mutex", p12, p13
- getf.sig pr1_2 = fp1b_2
- (p8) cmp.leu p6, p7 = acc0, pr0_1
- (p9) cmp.ltu p6, p7 = acc0, pr0_1
- (p12) cmp.leu p10, p11 = s0, pr1_0
- (p13) cmp.ltu p10, p11 = s0, pr1_0
- ;;
- .pred.rel "mutex", p6, p7
- getf.sig acc1_2 = fp2a_2
- st8 [rp] = s0, 8
- (p6) add acc0 = pr0_2, acc1_0, 1
- (p7) add acc0 = pr0_2, acc1_0
- ;;
- .pred.rel "mutex", p10, p11
- (p10) add s0 = pr1_1, acc0, 1
- (p11) add s0 = pr1_1, acc0
- ;;
- .pred.rel "mutex", p6, p7
- .pred.rel "mutex", p10, p11
- (p6) cmp.leu p8, p9 = acc0, pr0_2
- (p7) cmp.ltu p8, p9 = acc0, pr0_2
- (p10) cmp.leu p12, p13 = s0, pr1_1
- (p11) cmp.ltu p12, p13 = s0, pr1_1
- ;;
- .pred.rel "mutex", p8, p9
- st8 [rp] = s0, 8
- (p8) add acc0 = pr1_2, acc1_1, 1
- (p9) add acc0 = pr1_2, acc1_1
- ;;
- .pred.rel "mutex", p8, p9
- (p8) cmp.leu p10, p11 = acc0, pr1_2
- (p9) cmp.ltu p10, p11 = acc0, pr1_2
- (p12) add acc0 = 1, acc0
- ;;
- st8 [rp] = acc0, 8
- (p12) cmp.eq.or p10, p0 = 0, acc0
- mov r8 = acc1_2
- ;;
- .pred.rel "mutex", p10, p11
- (p10) add r8 = 1, r8
- mov.i ar.lc = r2
- br.ret.sptk.many b0
+ .pred.rel "mutex", p8, p9
+ .pred.rel "mutex", p12, p13
+.mmi; getfsig pr1_2 = fp1b_2
+ st8 [rp] = s0, 8
+ (p8) cmp.leu p6, p7 = acc0, pr0_1
+.mmi; (p9) cmp.ltu p6, p7 = acc0, pr0_1
+ (p12) cmp.leu p10, p11 = s0, pr1_0
+ (p13) cmp.ltu p10, p11 = s0, pr1_0
+ ;;
+ .pred.rel "mutex", p6, p7
+.mfi; getfsig acc1_2 = fp2a_2
+ xma.l fp0b_1 = u_1, v0, f0
+ nop 1
+.mmf; (p6) add acc0 = pr0_2, acc1_0, 1
+ (p7) add acc0 = pr0_2, acc1_0
+ xma.hu fp1a_1 = u_1, v0, f0
+ ;;
+L(cj5):
+ .pred.rel "mutex", p10, p11
+.mfi; getfsig pr0_0 = fp0b_0
+ xma.l fp1b_0 = u_0, v1, fp1a_0
+ (p10) add s0 = pr1_1, acc0, 1
+.mfi; (p11) add s0 = pr1_1, acc0
+ xma.hu fp2a_0 = u_0, v1, fp1a_0
+ nop 1
+ ;;
+ .pred.rel "mutex", p6, p7
+ .pred.rel "mutex", p10, p11
+.mmi; getfsig pr1_3 = fp1b_3
+ st8 [rp] = s0, 8
+ (p6) cmp.leu p8, p9 = acc0, pr0_2
+.mmi; (p7) cmp.ltu p8, p9 = acc0, pr0_2
+ (p10) cmp.leu p12, p13 = s0, pr1_1
+ (p11) cmp.ltu p12, p13 = s0, pr1_1
+ ;;
+ .pred.rel "mutex", p8, p9
+.mfi; getfsig acc1_3 = fp2a_3
+ xma.l fp0b_2 = u_2, v0, f0
+ nop 1
+.mmf; (p8) add acc0 = pr0_3, acc1_1, 1
+ (p9) add acc0 = pr0_3, acc1_1
+ xma.hu fp1a_2 = u_2, v0, f0
+ ;;
+L(cj4):
+ .pred.rel "mutex", p12, p13
+.mfi; getfsig pr0_1 = fp0b_1
+ xma.l fp1b_1 = u_1, v1, fp1a_1
+ (p12) add s0 = pr1_2, acc0, 1
+.mfi; (p13) add s0 = pr1_2, acc0
+ xma.hu fp2a_1 = u_1, v1, fp1a_1
+ nop 1
+ ;;
+ .pred.rel "mutex", p8, p9
+ .pred.rel "mutex", p12, p13
+.mmi; getfsig pr1_0 = fp1b_0
+ st8 [rp] = s0, 8
+ (p8) cmp.leu p6, p7 = acc0, pr0_3
+.mmi; (p9) cmp.ltu p6, p7 = acc0, pr0_3
+ (p12) cmp.leu p10, p11 = s0, pr1_2
+ (p13) cmp.ltu p10, p11 = s0, pr1_2
+ ;;
+ .pred.rel "mutex", p6, p7
+.mmi; getfsig acc1_0 = fp2a_0
+ (p6) add acc0 = pr0_0, acc1_2, 1
+ (p7) add acc0 = pr0_0, acc1_2
+ ;;
+L(cj3):
+ .pred.rel "mutex", p10, p11
+.mfi; getfsig pr0_2 = fp0b_2
+ xma.l fp1b_2 = u_2, v1, fp1a_2
+ (p10) add s0 = pr1_3, acc0, 1
+.mfi; (p11) add s0 = pr1_3, acc0
+ xma.hu fp2a_2 = u_2, v1, fp1a_2
+ nop 1
+ ;;
+ .pred.rel "mutex", p6, p7
+ .pred.rel "mutex", p10, p11
+.mmi; getfsig pr1_1 = fp1b_1
+ st8 [rp] = s0, 8
+ (p6) cmp.leu p8, p9 = acc0, pr0_0
+.mmi; (p7) cmp.ltu p8, p9 = acc0, pr0_0
+ (p10) cmp.leu p12, p13 = s0, pr1_3
+ (p11) cmp.ltu p12, p13 = s0, pr1_3
+ ;;
+ .pred.rel "mutex", p8, p9
+.mmi; getfsig acc1_1 = fp2a_1
+ (p8) add acc0 = pr0_1, acc1_3, 1
+ (p9) add acc0 = pr0_1, acc1_3
+ ;;
+ .pred.rel "mutex", p12, p13
+.mmi; (p12) add s0 = pr1_0, acc0, 1
+ (p13) add s0 = pr1_0, acc0
+ nop 1
+ ;;
+ .pred.rel "mutex", p8, p9
+ .pred.rel "mutex", p12, p13
+.mmi; getfsig pr1_2 = fp1b_2
+ st8 [rp] = s0, 8
+ (p8) cmp.leu p6, p7 = acc0, pr0_1
+.mmi; (p9) cmp.ltu p6, p7 = acc0, pr0_1
+ (p12) cmp.leu p10, p11 = s0, pr1_0
+ (p13) cmp.ltu p10, p11 = s0, pr1_0
+ ;;
+ .pred.rel "mutex", p6, p7
+.mmi; getfsig r8 = fp2a_2
+ (p6) add acc0 = pr0_2, acc1_0, 1
+ (p7) add acc0 = pr0_2, acc1_0
+ ;;
+ .pred.rel "mutex", p10, p11
+.mmi; (p10) add s0 = pr1_1, acc0, 1
+ (p11) add s0 = pr1_1, acc0
+ (p6) cmp.leu p8, p9 = acc0, pr0_2
+ ;;
+ .pred.rel "mutex", p10, p11
+.mmi; (p7) cmp.ltu p8, p9 = acc0, pr0_2
+ (p10) cmp.leu p12, p13 = s0, pr1_1
+ (p11) cmp.ltu p12, p13 = s0, pr1_1
+ ;;
+ .pred.rel "mutex", p8, p9
+.mmi; st8 [rp] = s0, 8
+ (p8) add acc0 = pr1_2, acc1_1, 1
+ (p9) add acc0 = pr1_2, acc1_1
+ ;;
+ .pred.rel "mutex", p8, p9
+.mmi; (p8) cmp.leu p10, p11 = acc0, pr1_2
+ (p9) cmp.ltu p10, p11 = acc0, pr1_2
+ (p12) add acc0 = 1, acc0
+ ;;
+.mmi; st8 [rp] = acc0, 8
+ (p12) cmpeqor p10, p0 = 0, acc0
+ nop 1
+ ;;
+.mib; (p10) add r8 = 1, r8
+ mov ar.lc = r2
+ br.ret.sptk.many b0
EPILOGUE()
ASM_END()
dnl IA-64 mpn_popcount -- mpn population count.
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
dnl Copyright 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation,
dnl Inc.
dnl IA-64 mpn_rsh1add_n/mpn_rsh1sub_n -- rp[] = (up[] +- vp[]) >> 1.
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
dnl Copyright 2003, 2004, 2005 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
--- /dev/null
+dnl IA-64 mpn_sqr_diag_addlsh1
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2010, 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Itanium: ?
+C Itanium 2: 2 Unrolling could bring it to 1.5 + epsilon
+
+C Exact performance table. The 2nd line is this code, the 3rd line is ctop-
+C less code. In an assembly sqr_basecase, the ctop-full numbers will become a
+C few cycles better since we can mitigate the many I0 instructions.
+C
+C 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
+C - 20 22 24 26 28 30 32 34 36 38 40 42 44 46 48 50 52 54 56 Needs updating
+C - 13 16 17 18 20 21 23 25 26 30 31 31 33 34 36 38 39 42 43
+
+C We should keep in mind that this code takes linear time in a O(n^2) context
+C and that it will only be used under SQR_TOOM2_THRESHOLD, which might become
+C around 60. Keeping overhead down for smallish operands (< 10) is more
+C important than optimal cycle counts.
+
+C TODO
+C * Make sure we don't depend on uninitialised r-registers, f-registers, or
+C * p-registers.
+C * Optimise by doing first two loop iterations in function header.
+
+C INPUT PARAMETERS
+define(`rp_param', `r32') define(`rp', `r14') C size: 2n
+define(`tp_param', `r33') define(`tp', `r15') C size: 2n - 2
+define(`up_param', `r34') define(`up', `r31') C size: n
+define(`n', `r35')
+
+
+ASM_START()
+PROLOGUE(mpn_sqr_diag_addlsh1)
+
+ .prologue
+ .save ar.pfs, r2
+ .save ar.lc, r3
+ .body
+
+.mmi; alloc r2 = ar.pfs, 4,24,0,24 C M
+ nop 4711
+ mov r3 = ar.lc C I0
+.mmi; mov tp = tp_param C M I
+ mov up = up_param C M I
+ mov rp = rp_param C M I
+ ;;
+.mmi; ld8 r36 = [tp], 8 C M
+ add r20 = -2, n C M I
+ mov r9 = ar.ec C I0
+ ;;
+.mmi; ld8 r32 = [tp], 8 C M
+ mov r16 = 0 C M I
+ mov ar.ec = 7 C I0
+ ;;
+.mmi; nop 4711
+ mov r44 = 0 C M I
+ mov ar.lc = r20 C I0
+ ;;
+.mii; mov r33 = 0
+ mov r10 = pr C I0
+ mov pr.rot = 0x30000 C I0
+ ;;
+ br.cexit.spnt.few.clr L(end)
+
+dnl *** MAIN LOOP START ***
+ ALIGN(32)
+L(top):
+.mfi; (p18) ldf8 f33 = [up], 8 C M
+ (p20) xma.l f36 = f35, f35, f42 C F
+ (p41) cmpequc p50, p0 = -1, r44 C M I
+.mfi; setfsig f40 = r16 C M23
+ (p20) xma.hu f38 = f35, f35, f42 C F
+ (p23) add r50 = r41, r49 C M I
+ ;;
+.mmi; (p16) ld8 r36 = [tp], 8 C M
+ (p23) cmpltu p40, p0 = r50, r41 C cyout hi M I
+ (p19) shrp r45 = r38, r35, 63 C non-critical I0
+.mmi; (p21) getfsig r39 = f39 C hi M2
+ (p24) st8 [rp] = r51, 8 C hi M23
+ (p41) add r44 = 1, r44 C M I
+ ;;
+.mmi; (p16) ld8 r32 = [tp], 8 C M
+ (p50) cmpeqor p40, p0 = -1, r50 C cyout hi M I
+ (p17) shrp r16 = r33, r37, 63 C critical I0
+.mmi; (p21) getfsig r42 = f37 C lo M2
+ (p23) st8 [rp] = r44, 8 C lo M23
+ (p50) add r50 = 1, r50 C M I
+ ;;
+ br.ctop.sptk.few.clr L(top) C B
+dnl *** MAIN LOOP END ***
+ ;;
+L(end):
+.mmi; nop 4711
+ (p41) add r44 = 1, r44 C M I
+ shr.u r48 = r39, 63 C I0
+ ;;
+.mmi; st8 [rp] = r51, 8 C M23
+ (p41) cmpequc p6, p0 = 0, r44 C M I
+ add r50 = r41, r48 C M I
+ ;;
+.mmi; st8 [rp] = r44, 8 C M23
+ (p6) add r50 = 1, r50 C M I
+ mov ar.lc = r3 C I0
+ ;;
+.mii; st8 [rp] = r50 C M23
+ mov ar.ec = r9 C I0
+ mov pr = r10 C I0
+ ;;
+.mib; nop 4711
+ mov ar.pfs = r2 C I0
+ br.ret.sptk.many b0 C B
+EPILOGUE()
+++ /dev/null
-dnl IA-64 mpn_sqr_diagonal. Helper for sqr_basecase.
-
-dnl Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C Itanium: 4
-C Itanium 2: 2
-
-C TODO
-C * Perhaps avoid ctop loop. Unfortunately, a cloop loop running at 1 c/l
-C would need prohibitive 8-way unrolling.
-C * Instead of messing too much with this, write a nifty mpn_sqr_basecase.
-
-C INPUT PARAMETERS
-C rp = r32
-C sp = r33
-C n = r34
-
-ASM_START()
-PROLOGUE(mpn_sqr_diagonal)
- .prologue
- .save ar.lc, r2
- .save pr, r15
- .body
-ifdef(`HAVE_ABI_32',
-` addp4 r32 = 0, r32
- addp4 r33 = 0, r33
- zxt4 r34 = r34
- ;;
-')
- ldf8 f32 = [r33], 8 C M load rp[0] early
- mov r2 = ar.lc C I0
- mov r14 = ar.ec C I0
- mov r15 = pr C I0
- add r19 = -1, r34 C M I decr n
- add r18 = 8, r32 C M I rp for high limb
- ;;
- mov ar.lc = r19 C I0
- mov ar.ec = 5 C I0
- mov pr.rot = 1<<16 C I0
- ;;
- br.cexit.spnt .Ldone C B
- ;;
- ALIGN(32)
-.Loop:
- (p16) ldf8 f32 = [r33], 8 C M
- (p19) xma.l f36 = f35, f35, f0 C F
- (p21) stf8 [r32] = f38, 16 C M2 M3
- (p19) xma.hu f40 = f35, f35, f0 C F
- (p21) stf8 [r18] = f42, 16 C M2 M3
- br.ctop.dptk .Loop C B
- ;;
-.Ldone:
- stf8 [r32] = f38 C M2 M3
- stf8 [r18] = f42 C M2 M3
- mov ar.ec = r14 C I0
- ;;
- mov pr = r15, 0x1ffff C I0
- mov ar.lc = r2 C I0
- br.ret.sptk.many b0 C B
-EPILOGUE(mpn_sqr_diagonal)
-ASM_END()
dnl IA-64 mpn_submul_1 -- Multiply a limb vector with a limb and subtract the
dnl result from a second limb vector.
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
dnl Copyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
--- /dev/null
+dnl IA-64 mpn_tabselect.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C Itanium: ?
+C Itanium 2: 2.5
+
+C NOTES
+C * Using software pipelining could trivially yield 2 c/l without unrolling,
+C or 1+epsilon with unrolling. (This code was modelled after the powerpc64
+C code, for simplicity.)
+
+C mpn_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
+define(`rp', `r32')
+define(`tp', `r33')
+define(`n', `r34')
+define(`nents', `r35')
+define(`which', `r36')
+
+define(`mask', `r8')
+
+define(`rp1', `r32')
+define(`tp1', `r33')
+define(`rp2', `r14')
+define(`tp2', `r15')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_tabselect)
+ .prologue
+ .save ar.lc, r2
+ .body
+ifdef(`HAVE_ABI_32',`
+.mmi; addp4 rp = 0, rp C M I
+ addp4 tp = 0, tp C M I
+ zxt4 n = n C I
+.mii; nop 0
+ zxt4 nents = nents C I
+ zxt4 which = which C I
+ ;;
+')
+.mmi; add rp2 = 8, rp1
+ add tp2 = 8, tp1
+ add r6 = -2, n
+ ;;
+.mmi; cmp.eq p10, p0 = 1, n
+ and r9 = 1, n C set cr0 for use in inner loop
+ shr.u r6 = r6, 1 C inner loop count
+ ;;
+.mmi; cmp.eq p8, p0 = 0, r9
+ sub which = nents, which
+ shl n = n, 3
+ ;;
+
+L(outer):
+.mmi cmp.eq p6, p7 = which, nents C are we at the selected table entry?
+ nop 0
+ mov ar.lc = r6 C I0
+ ;;
+.mmb;
+ (p6) mov mask = -1
+ (p7) mov mask = 0
+ (p8) br.dptk L(top) C branch to loop entry if n even
+ ;;
+
+.mmi; ld8 r16 = [tp1], 8
+ add tp2 = 8, tp2
+ nop 0
+ ;;
+.mmi; ld8 r18 = [rp1]
+ and r16 = r16, mask
+ nop 0
+ ;;
+.mmi; andcm r18 = r18, mask
+ ;;
+ or r16 = r16, r18
+ nop 0
+ ;;
+.mmb; st8 [rp1] = r16, 8
+ add rp2 = 8, rp2
+ (p10) br.dpnt L(end)
+
+ ALIGN(32)
+L(top):
+.mmi; ld8 r16 = [tp1], 16
+ ld8 r17 = [tp2], 16
+ nop 0
+ ;;
+.mmi; ld8 r18 = [rp1]
+ and r16 = r16, mask
+ nop 0
+.mmi; ld8 r19 = [rp2]
+ and r17 = r17, mask
+ nop 0
+ ;;
+.mmi; andcm r18 = r18, mask
+ andcm r19 = r19, mask
+ nop 0
+ ;;
+.mmi; or r16 = r16, r18
+ or r17 = r17, r19
+ nop 0
+ ;;
+.mmb; st8 [rp1] = r16, 16
+ st8 [rp2] = r17, 16
+ br.cloop.dptk L(top)
+ ;;
+L(end):
+.mmi; sub rp1 = rp1, n C move rp back to beginning
+ sub rp2 = rp2, n C move rp back to beginning
+ cmp.ne p9, p0 = 1, nents
+.mmb; add nents = -1, nents
+ nop 0
+ (p9) br.dptk L(outer)
+ ;;
+
+.mib; nop 0
+ nop 0
+ br.ret.sptk.many b0
+EPILOGUE()
dnl mc68020 mpn_addmul_1, mpn_submul_1 -- add or subtract mpn multiple.
-dnl Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002 Free Software
+dnl Copyright 1992, 1994, 1996, 1999, 2000, 2001, 2002, 2011 Free Software
dnl Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or
-dnl modify it under the terms of the GNU Lesser General Public License as
-dnl published by the Free Software Foundation; either version 3 of the
-dnl License, or (at your option) any later version.
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl Lesser General Public License for more details.
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
dnl
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
define(s1_size, `d2')
define(s2_limb, `d4')
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
PROLOGUE(M4_function_1)
/* Minimal values gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 1991, 1993, 1994, 2000, 2006, 2008, 2009, 2010 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 2000, 2006, 2008, 2009, 2010, 2012 Free
+Software Foundation, Inc.
This file is part of the GNU MP Library.
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1_1_THRESHOLD 2
-#define MOD_1_2_THRESHOLD 3
-#define MOD_1_4_THRESHOLD 4
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 2
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 3
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 4
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 1
#define USE_PREINV_DIVREM_1 1 /* native */
-#define USE_PREINV_MOD_1 1
-#define DIVREM_2_THRESHOLD 0 /* always */
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD 3
#define MUL_TOOM22_THRESHOLD 8
#define MUL_TOOM33_THRESHOLD 20
#define MUL_TOOM44_THRESHOLD 24
-#define MUL_TOOM6H_THRESHOLD 200 /* FIXME */
+#define MUL_TOOM6H_THRESHOLD 70 /* FIXME */
#define MUL_TOOM8H_THRESHOLD 86
#define MUL_TOOM32_TO_TOOM43_THRESHOLD 50 /* FIXME */
#define SQR_TOOM2_THRESHOLD 8
#define SQR_TOOM3_THRESHOLD 20
#define SQR_TOOM4_THRESHOLD 24
-#define SQR_TOOM6H_THRESHOLD 200 /* FIXME */
+#define SQR_TOOM6H_THRESHOLD 70 /* FIXME */
#define SQR_TOOM8H_THRESHOLD 86
#define MULMOD_BNM1_THRESHOLD 10
#define INV_APPR_THRESHOLD 4
#define BINV_NEWTON_THRESHOLD 6
-#define REDC_1_TO_REDC_N_THRESHOLD 4
+#define REDC_1_TO_REDC_N_THRESHOLD 9
#define MU_DIV_QR_THRESHOLD 8
#define MU_DIVAPPR_Q_THRESHOLD 8
#define GET_STR_PRECOMPUTE_THRESHOLD 10
#define SET_STR_THRESHOLD 64
#define SET_STR_PRECOMPUTE_THRESHOLD 100
+
+#define FAC_ODD_THRESHOLD 0 /* always */
+#define FAC_DSC_THRESHOLD 70
dnl MIPS64 mpn_add_n -- Add two limb vectors of the same length > 0 and store
dnl sum in a third limb vector.
-dnl Copyright 1995, 2000, 2001, 2002 Free Software Foundation, Inc.
+dnl Copyright 1995, 2000, 2001, 2002, 2011 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
C size $7
ASM_START()
+PROLOGUE(mpn_add_nc)
+ ld $10,0($5)
+ ld $11,0($6)
+
+ daddiu $7,$7,-1
+ and $9,$7,4-1 C number of limbs in first loop
+ beq $9,$0,.L0 C if multiple of 4 limbs, skip first loop
+ move $2,$8
+ b .Loop0
+ dsubu $7,$7,$9
+EPILOGUE()
PROLOGUE(mpn_add_n)
ld $10,0($5)
ld $11,0($6)
sd $11,0($4)
j $31
or $2,$2,$8
-EPILOGUE(mpn_add_n)
+EPILOGUE()
dnl MIPS64 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
dnl store difference in a third limb vector.
-dnl Copyright 1995, 2000, 2001, 2002 Free Software Foundation, Inc.
+dnl Copyright 1995, 2000, 2001, 2002, 2011 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
C size $7
ASM_START()
+PROLOGUE(mpn_sub_nc)
+ ld $10,0($5)
+ ld $11,0($6)
+
+ daddiu $7,$7,-1
+ and $9,$7,4-1 C number of limbs in first loop
+ beq $9,$0,.L0 C if multiple of 4 limbs, skip first loop
+ move $2,$8
+ b .Loop0
+ dsubu $7,$7,$9
+EPILOGUE()
PROLOGUE(mpn_sub_n)
ld $10,0($5)
ld $11,0($6)
sd $11,0($4)
j $31
or $2,$2,$8
-EPILOGUE(mpn_sub_n)
+EPILOGUE()
/* 552 MHz PA8600 (gcc61.fsffrance.org) */
#define DIVREM_1_NORM_THRESHOLD 3
-#define DIVREM_1_UNNORM_THRESHOLD 4
+#define DIVREM_1_UNNORM_THRESHOLD 3
#define MOD_1_NORM_THRESHOLD 3
#define MOD_1_UNNORM_THRESHOLD 4
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 14
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 11
#define MOD_1U_TO_MOD_1_1_THRESHOLD 8
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 18
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 22
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 22
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 28
#define USE_PREINV_DIVREM_1 1
-#define DIVREM_2_THRESHOLD 0 /* always */
#define DIVEXACT_1_THRESHOLD 0 /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD 31
+#define BMOD_1_TO_MOD_1_THRESHOLD 36
-#define MUL_TOOM22_THRESHOLD 15
-#define MUL_TOOM33_THRESHOLD 91
-#define MUL_TOOM44_THRESHOLD 154
-#define MUL_TOOM6H_THRESHOLD 204
-#define MUL_TOOM8H_THRESHOLD 482
+#define MUL_TOOM22_THRESHOLD 18
+#define MUL_TOOM33_THRESHOLD 65
+#define MUL_TOOM44_THRESHOLD 166
+#define MUL_TOOM6H_THRESHOLD 202
+#define MUL_TOOM8H_THRESHOLD 333
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 97
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 103
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 109
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 103
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 105
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 138
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 105
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 102
-#define SQR_BASECASE_THRESHOLD 6
-#define SQR_TOOM2_THRESHOLD 47
+#define SQR_BASECASE_THRESHOLD 7
+#define SQR_TOOM2_THRESHOLD 55
#define SQR_TOOM3_THRESHOLD 93
#define SQR_TOOM4_THRESHOLD 250
-#define SQR_TOOM6_THRESHOLD 278
-#define SQR_TOOM8_THRESHOLD 502
+#define SQR_TOOM6_THRESHOLD 306
+#define SQR_TOOM8_THRESHOLD 527
#define MULMOD_BNM1_THRESHOLD 13
#define SQRMOD_BNM1_THRESHOLD 15
#define SQR_FFT_THRESHOLD 1600
#define MULLO_BASECASE_THRESHOLD 0 /* always */
-#define MULLO_DC_THRESHOLD 90
-#define MULLO_MUL_N_THRESHOLD 4167
+#define MULLO_DC_THRESHOLD 116
+#define MULLO_MUL_N_THRESHOLD 3574
#define DC_DIV_QR_THRESHOLD 100
-#define DC_DIVAPPR_Q_THRESHOLD 342
-#define DC_BDIV_QR_THRESHOLD 119
-#define DC_BDIV_Q_THRESHOLD 246
+#define DC_DIVAPPR_Q_THRESHOLD 348
+#define DC_BDIV_QR_THRESHOLD 109
+#define DC_BDIV_Q_THRESHOLD 254
-#define INV_MULMOD_BNM1_THRESHOLD 12
-#define INV_NEWTON_THRESHOLD 274
-#define INV_APPR_THRESHOLD 268
+#define INV_MULMOD_BNM1_THRESHOLD 34
+#define INV_NEWTON_THRESHOLD 276
+#define INV_APPR_THRESHOLD 276
-#define BINV_NEWTON_THRESHOLD 327
-#define REDC_1_TO_REDC_N_THRESHOLD 70
+#define BINV_NEWTON_THRESHOLD 278
+#define REDC_1_TO_REDC_N_THRESHOLD 78
#define MU_DIV_QR_THRESHOLD 979
-#define MU_DIVAPPR_Q_THRESHOLD 1142
-#define MUPI_DIV_QR_THRESHOLD 100
-#define MU_BDIV_QR_THRESHOLD 667
+#define MU_DIVAPPR_Q_THRESHOLD 263
+#define MUPI_DIV_QR_THRESHOLD 102
+#define MU_BDIV_QR_THRESHOLD 807
#define MU_BDIV_Q_THRESHOLD 1187
-#define MATRIX22_STRASSEN_THRESHOLD 15
-#define HGCD_THRESHOLD 99
-#define GCD_DC_THRESHOLD 372
-#define GCDEXT_DC_THRESHOLD 241
+#define MATRIX22_STRASSEN_THRESHOLD 11
+#define HGCD_THRESHOLD 100
+#define GCD_DC_THRESHOLD 379
+#define GCDEXT_DC_THRESHOLD 249
#define JACOBI_BASE_METHOD 2
#define GET_STR_DC_THRESHOLD 7
-#define GET_STR_PRECOMPUTE_THRESHOLD 14
-#define SET_STR_DC_THRESHOLD 224
-#define SET_STR_PRECOMPUTE_THRESHOLD 788
+#define GET_STR_PRECOMPUTE_THRESHOLD 16
+#define SET_STR_DC_THRESHOLD 270
+#define SET_STR_PRECOMPUTE_THRESHOLD 782
#define DIVREM_1_NORM_THRESHOLD 0 /* always */
#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_1P_METHOD 2
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* never */
-#define MOD_1U_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD MP_SIZE_T_MAX
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD MP_SIZE_T_MAX
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 4
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 10
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 14
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 11
#define USE_PREINV_DIVREM_1 1
-#define DIVREM_2_THRESHOLD 0 /* always */
+#define DIV_QR_2_PI2_THRESHOLD 21
#define DIVEXACT_1_THRESHOLD 0 /* always */
#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
-#define MUL_TOOM22_THRESHOLD 30
-#define MUL_TOOM33_THRESHOLD 113
-#define MUL_TOOM44_THRESHOLD 195
+#define MUL_TOOM22_THRESHOLD 31
+#define MUL_TOOM33_THRESHOLD 114
+#define MUL_TOOM44_THRESHOLD 179
#define MUL_TOOM6H_THRESHOLD 222
-#define MUL_TOOM8H_THRESHOLD 236
+#define MUL_TOOM8H_THRESHOLD 296
#define MUL_TOOM32_TO_TOOM43_THRESHOLD 130
#define MUL_TOOM32_TO_TOOM53_THRESHOLD 229
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 132
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 129
#define MUL_TOOM42_TO_TOOM63_THRESHOLD 54
-#define SQR_BASECASE_THRESHOLD 4
-#define SQR_TOOM2_THRESHOLD 54
-#define SQR_TOOM3_THRESHOLD 169
-#define SQR_TOOM4_THRESHOLD 280
-#define SQR_TOOM6_THRESHOLD 280
-#define SQR_TOOM8_THRESHOLD 296
+#define SQR_BASECASE_THRESHOLD 5
+#define SQR_TOOM2_THRESHOLD 58
+#define SQR_TOOM3_THRESHOLD 153
+#define SQR_TOOM4_THRESHOLD 278
+#define SQR_TOOM6_THRESHOLD 0 /* always */
+#define SQR_TOOM8_THRESHOLD 0 /* always */
+
+#define MULMID_TOOM42_THRESHOLD 56
#define MULMOD_BNM1_THRESHOLD 15
-#define SQRMOD_BNM1_THRESHOLD 17
+#define SQRMOD_BNM1_THRESHOLD 19
+
+#define POWM_SEC_TABLE 2,23,228,1084
#define MUL_FFT_MODF_THRESHOLD 336 /* k = 5 */
#define MUL_FFT_TABLE3 \
#define SQR_FFT_THRESHOLD 1856
#define MULLO_BASECASE_THRESHOLD 0 /* always */
-#define MULLO_DC_THRESHOLD 125
+#define MULLO_DC_THRESHOLD 113
#define MULLO_MUL_N_THRESHOLD 4658
#define DC_DIV_QR_THRESHOLD 123
#define DC_DIVAPPR_Q_THRESHOLD 372
#define DC_BDIV_QR_THRESHOLD 142
-#define DC_BDIV_Q_THRESHOLD 309
+#define DC_BDIV_Q_THRESHOLD 312
-#define INV_MULMOD_BNM1_THRESHOLD 56
+#define INV_MULMOD_BNM1_THRESHOLD 58
#define INV_NEWTON_THRESHOLD 315
-#define INV_APPR_THRESHOLD 318
+#define INV_APPR_THRESHOLD 315
-#define BINV_NEWTON_THRESHOLD 363
-#define REDC_1_TO_REDC_N_THRESHOLD 102
+#define BINV_NEWTON_THRESHOLD 360
+#define REDC_1_TO_REDC_N_THRESHOLD 101
#define MU_DIV_QR_THRESHOLD 979
-#define MU_DIVAPPR_Q_THRESHOLD 998
-#define MUPI_DIV_QR_THRESHOLD 0 /* always */
-#define MU_BDIV_QR_THRESHOLD 942
-#define MU_BDIV_Q_THRESHOLD 1334
+#define MU_DIVAPPR_Q_THRESHOLD 1142
+#define MUPI_DIV_QR_THRESHOLD 93
+#define MU_BDIV_QR_THRESHOLD 889
+#define MU_BDIV_Q_THRESHOLD 1187
#define MATRIX22_STRASSEN_THRESHOLD 9
-#define HGCD_THRESHOLD 240
-#define GCD_DC_THRESHOLD 689
-#define GCDEXT_DC_THRESHOLD 538
+#define HGCD_THRESHOLD 234
+#define HGCD_APPR_THRESHOLD 300
+#define HGCD_REDUCE_THRESHOLD 1553
+#define GCD_DC_THRESHOLD 684
+#define GCDEXT_DC_THRESHOLD 525
#define JACOBI_BASE_METHOD 2
#define GET_STR_DC_THRESHOLD 21
#define DC_BDIV_QR_THRESHOLD 35
#define DC_BDIV_Q_THRESHOLD 88
-#define INV_MULMOD_BNM1_THRESHOLD 76
+#define INV_MULMOD_BNM1_THRESHOLD 42
#define INV_NEWTON_THRESHOLD 149
#define INV_APPR_THRESHOLD 125
include(`../config.m4')
-C cycles/limb
-C 603e: ?
-C 604e: ? old: 3.25
-C 75x (G3): ? old: 3.5
-C 7400,7410 (G4): 3.25
-C 744x,745x (G4+): 4
-C power4/ppc970: ? old: 2.0
-C power5: ? old: 2.5
+C cycles/limb
+C 603e: ?
+C 604e: ? old: 3.25
+C 75x (G3): ? old: 3.5
+C 7400,7410 (G4): 3.25
+C 744x,745x (G4+): 4
+C POWER3/PPC630 2
+C POWER4/PPC970 2.4
+C POWER5 2.75
+C POWER6 40-140
+C POWER7 3
C INPUT PARAMETERS
define(`rp', `r3')
/* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004, 2008, 2009,
-2010, 2012 Free Software Foundation, Inc.
+2010 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#define DIVREM_1_NORM_THRESHOLD 0 /* always */
#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_1P_METHOD 1
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 7
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 8
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 36
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 37
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 8
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 6
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 8
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 49
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 18
#define USE_PREINV_DIVREM_1 1
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIVEXACT_1_THRESHOLD 0 /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD 69
+#define BMOD_1_TO_MOD_1_THRESHOLD 66
#define MUL_TOOM22_THRESHOLD 14
#define MUL_TOOM33_THRESHOLD 73
#define MUL_TOOM8H_THRESHOLD 236
#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 71
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 72
#define MUL_TOOM42_TO_TOOM53_THRESHOLD 73
#define MUL_TOOM42_TO_TOOM63_THRESHOLD 72
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 82
-#define SQR_BASECASE_THRESHOLD 0 /* always */
-#define SQR_TOOM2_THRESHOLD 24
+#define SQR_BASECASE_THRESHOLD 4
+#define SQR_TOOM2_THRESHOLD 26
#define SQR_TOOM3_THRESHOLD 77
-#define SQR_TOOM4_THRESHOLD 130
+#define SQR_TOOM4_THRESHOLD 136
#define SQR_TOOM6_THRESHOLD 189
#define SQR_TOOM8_THRESHOLD 284
-#define MULMOD_BNM1_THRESHOLD 10
-#define SQRMOD_BNM1_THRESHOLD 13
+#define MULMID_TOOM42_THRESHOLD 32
+
+#define MULMOD_BNM1_THRESHOLD 9
+#define SQRMOD_BNM1_THRESHOLD 14
#define MUL_FFT_MODF_THRESHOLD 284 /* k = 5 */
#define MUL_FFT_TABLE3 \
{ 1535,12}, { 3071,13}, { 1919,12}, { 3839,15}, \
{ 32768,16} }
#define MUL_FFT_TABLE3_SIZE 165
-#define MUL_FFT_THRESHOLD 3712
+#define MUL_FFT_THRESHOLD 3392
-#define SQR_FFT_MODF_THRESHOLD 248 /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD 236 /* k = 5 */
#define SQR_FFT_TABLE3 \
{ { 248, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
{ 17, 7}, { 9, 6}, { 20, 7}, { 11, 6}, \
{ 767,13}, { 1535,12}, { 3199,13}, { 1919,15}, \
{ 32768,16} }
#define SQR_FFT_TABLE3_SIZE 153
-#define SQR_FFT_THRESHOLD 2688
+#define SQR_FFT_THRESHOLD 2368
#define MULLO_BASECASE_THRESHOLD 0 /* always */
#define MULLO_DC_THRESHOLD 45
#define MULLO_MUL_N_THRESHOLD 6633
#define DC_DIV_QR_THRESHOLD 43
-#define DC_DIVAPPR_Q_THRESHOLD 154
-#define DC_BDIV_QR_THRESHOLD 55
+#define DC_DIVAPPR_Q_THRESHOLD 153
+#define DC_BDIV_QR_THRESHOLD 54
#define DC_BDIV_Q_THRESHOLD 124
#define INV_MULMOD_BNM1_THRESHOLD 42
#define INV_NEWTON_THRESHOLD 179
#define INV_APPR_THRESHOLD 157
-#define BINV_NEWTON_THRESHOLD 232
+#define BINV_NEWTON_THRESHOLD 204
#define REDC_1_TO_REDC_N_THRESHOLD 54
-#define MU_DIV_QR_THRESHOLD 1057
-#define MU_DIVAPPR_Q_THRESHOLD 1142
-#define MUPI_DIV_QR_THRESHOLD 83
-#define MU_BDIV_QR_THRESHOLD 872
-#define MU_BDIV_Q_THRESHOLD 1142
+#define MU_DIV_QR_THRESHOLD 998
+#define MU_DIVAPPR_Q_THRESHOLD 1037
+#define MUPI_DIV_QR_THRESHOLD 84
+#define MU_BDIV_QR_THRESHOLD 748
+#define MU_BDIV_Q_THRESHOLD 942
+
+#define POWM_SEC_TABLE 4,23,164,616,1812
-#define MATRIX22_STRASSEN_THRESHOLD 15
-#define HGCD_THRESHOLD 122
+#define MATRIX22_STRASSEN_THRESHOLD 11
+#define HGCD_THRESHOLD 118
+#define HGCD_APPR_THRESHOLD 167
+#define HGCD_REDUCE_THRESHOLD 1679
#define GCD_DC_THRESHOLD 339
-#define GCDEXT_DC_THRESHOLD 278
-#define JACOBI_BASE_METHOD 1
+#define GCDEXT_DC_THRESHOLD 273
+#define JACOBI_BASE_METHOD 4
-#define GET_STR_DC_THRESHOLD 17
-#define GET_STR_PRECOMPUTE_THRESHOLD 38
+#define GET_STR_DC_THRESHOLD 12
+#define GET_STR_PRECOMPUTE_THRESHOLD 27
#define SET_STR_DC_THRESHOLD 781
#define SET_STR_PRECOMPUTE_THRESHOLD 1505
+
+#define FAC_DSC_THRESHOLD 141
+#define FAC_ODD_THRESHOLD 34
--- /dev/null
+dnl PowerPC-32 mpn_invert_limb -- Invert a normalized limb.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C 603e: ?
+C 604e: ?
+C 75x (G3): ?
+C 7400,7410 (G4): ?
+C 744x,745x (G4+): 32
+C power4/ppc970: ?
+C power5: ?
+
+EXTERN(approx_tab)
+
+ASM_START()
+PROLOGUE(mpn_invert_limb)
+ rlwinm r6, r3, 11, 22, 30 C extract bits 30..22 to pos 2^1
+ srwi r10, r3, 11 C extract bits 31..11
+ LEA( r9, approx_tab) C N.B. clobbers r0 for ELF and Darwin
+ lhzx r9, r9, r6 C w2
+ addi r0, r10, 1
+ mullw r11, r9, r9
+ slwi r9, r9, 4
+ mulhwu r7, r11, r0
+ rlwinm r11, r3, 0, 31, 31 C extract bit 0
+ addi r0, r9, -1
+ srwi r9, r3, 1 C d >> 1
+ subf r0, r7, r0 C w1
+ add r9, r9, r11 C d31
+ mullw r9, r0, r9 C w1 * d31
+ srwi r10, r0, 1 C w1 >> 1
+ neg r11, r11
+ and r11, r10, r11
+ subf r11, r9, r11
+ mulhwu r9, r11, r0
+ slwi r0, r0, 15
+ srwi r9, r9, 1
+ add r0, r9, r0 C w0
+ mullw r10, r0, r3
+ mulhwu r9, r0, r3
+ addc r11, r10, r3
+ adde r3, r9, r3
+ subf r3, r3, r0
+ blr
+EPILOGUE()
+
+DEF_OBJECT(approx_tab)
+ .short 0x7fe1,0x7fa1,0x7f61,0x7f22,0x7ee3,0x7ea4,0x7e65,0x7e27
+ .short 0x7de9,0x7dab,0x7d6d,0x7d30,0x7cf3,0x7cb6,0x7c79,0x7c3d
+ .short 0x7c00,0x7bc4,0x7b89,0x7b4d,0x7b12,0x7ad7,0x7a9c,0x7a61
+ .short 0x7a27,0x79ec,0x79b2,0x7979,0x793f,0x7906,0x78cc,0x7894
+ .short 0x785b,0x7822,0x77ea,0x77b2,0x777a,0x7742,0x770b,0x76d3
+ .short 0x769c,0x7665,0x762f,0x75f8,0x75c2,0x758c,0x7556,0x7520
+ .short 0x74ea,0x74b5,0x7480,0x744b,0x7416,0x73e2,0x73ad,0x7379
+ .short 0x7345,0x7311,0x72dd,0x72aa,0x7277,0x7243,0x7210,0x71de
+ .short 0x71ab,0x7179,0x7146,0x7114,0x70e2,0x70b1,0x707f,0x704e
+ .short 0x701c,0x6feb,0x6fba,0x6f8a,0x6f59,0x6f29,0x6ef9,0x6ec8
+ .short 0x6e99,0x6e69,0x6e39,0x6e0a,0x6ddb,0x6dab,0x6d7d,0x6d4e
+ .short 0x6d1f,0x6cf1,0x6cc2,0x6c94,0x6c66,0x6c38,0x6c0a,0x6bdd
+ .short 0x6bb0,0x6b82,0x6b55,0x6b28,0x6afb,0x6acf,0x6aa2,0x6a76
+ .short 0x6a49,0x6a1d,0x69f1,0x69c6,0x699a,0x696e,0x6943,0x6918
+ .short 0x68ed,0x68c2,0x6897,0x686c,0x6842,0x6817,0x67ed,0x67c3
+ .short 0x6799,0x676f,0x6745,0x671b,0x66f2,0x66c8,0x669f,0x6676
+ .short 0x664d,0x6624,0x65fc,0x65d3,0x65aa,0x6582,0x655a,0x6532
+ .short 0x650a,0x64e2,0x64ba,0x6493,0x646b,0x6444,0x641c,0x63f5
+ .short 0x63ce,0x63a7,0x6381,0x635a,0x6333,0x630d,0x62e7,0x62c1
+ .short 0x629a,0x6275,0x624f,0x6229,0x6203,0x61de,0x61b8,0x6193
+ .short 0x616e,0x6149,0x6124,0x60ff,0x60da,0x60b6,0x6091,0x606d
+ .short 0x6049,0x6024,0x6000,0x5fdc,0x5fb8,0x5f95,0x5f71,0x5f4d
+ .short 0x5f2a,0x5f07,0x5ee3,0x5ec0,0x5e9d,0x5e7a,0x5e57,0x5e35
+ .short 0x5e12,0x5def,0x5dcd,0x5dab,0x5d88,0x5d66,0x5d44,0x5d22
+ .short 0x5d00,0x5cde,0x5cbd,0x5c9b,0x5c7a,0x5c58,0x5c37,0x5c16
+ .short 0x5bf5,0x5bd4,0x5bb3,0x5b92,0x5b71,0x5b51,0x5b30,0x5b10
+ .short 0x5aef,0x5acf,0x5aaf,0x5a8f,0x5a6f,0x5a4f,0x5a2f,0x5a0f
+ .short 0x59ef,0x59d0,0x59b0,0x5991,0x5972,0x5952,0x5933,0x5914
+ .short 0x58f5,0x58d6,0x58b7,0x5899,0x587a,0x585b,0x583d,0x581f
+ .short 0x5800,0x57e2,0x57c4,0x57a6,0x5788,0x576a,0x574c,0x572e
+ .short 0x5711,0x56f3,0x56d5,0x56b8,0x569b,0x567d,0x5660,0x5643
+ .short 0x5626,0x5609,0x55ec,0x55cf,0x55b2,0x5596,0x5579,0x555d
+ .short 0x5540,0x5524,0x5507,0x54eb,0x54cf,0x54b3,0x5497,0x547b
+ .short 0x545f,0x5443,0x5428,0x540c,0x53f0,0x53d5,0x53b9,0x539e
+ .short 0x5383,0x5368,0x534c,0x5331,0x5316,0x52fb,0x52e0,0x52c6
+ .short 0x52ab,0x5290,0x5276,0x525b,0x5240,0x5226,0x520c,0x51f1
+ .short 0x51d7,0x51bd,0x51a3,0x5189,0x516f,0x5155,0x513b,0x5121
+ .short 0x5108,0x50ee,0x50d5,0x50bb,0x50a2,0x5088,0x506f,0x5056
+ .short 0x503c,0x5023,0x500a,0x4ff1,0x4fd8,0x4fbf,0x4fa6,0x4f8e
+ .short 0x4f75,0x4f5c,0x4f44,0x4f2b,0x4f13,0x4efa,0x4ee2,0x4eca
+ .short 0x4eb1,0x4e99,0x4e81,0x4e69,0x4e51,0x4e39,0x4e21,0x4e09
+ .short 0x4df1,0x4dda,0x4dc2,0x4daa,0x4d93,0x4d7b,0x4d64,0x4d4d
+ .short 0x4d35,0x4d1e,0x4d07,0x4cf0,0x4cd8,0x4cc1,0x4caa,0x4c93
+ .short 0x4c7d,0x4c66,0x4c4f,0x4c38,0x4c21,0x4c0b,0x4bf4,0x4bde
+ .short 0x4bc7,0x4bb1,0x4b9a,0x4b84,0x4b6e,0x4b58,0x4b41,0x4b2b
+ .short 0x4b15,0x4aff,0x4ae9,0x4ad3,0x4abd,0x4aa8,0x4a92,0x4a7c
+ .short 0x4a66,0x4a51,0x4a3b,0x4a26,0x4a10,0x49fb,0x49e5,0x49d0
+ .short 0x49bb,0x49a6,0x4990,0x497b,0x4966,0x4951,0x493c,0x4927
+ .short 0x4912,0x48fe,0x48e9,0x48d4,0x48bf,0x48ab,0x4896,0x4881
+ .short 0x486d,0x4858,0x4844,0x482f,0x481b,0x4807,0x47f3,0x47de
+ .short 0x47ca,0x47b6,0x47a2,0x478e,0x477a,0x4766,0x4752,0x473e
+ .short 0x472a,0x4717,0x4703,0x46ef,0x46db,0x46c8,0x46b4,0x46a1
+ .short 0x468d,0x467a,0x4666,0x4653,0x4640,0x462c,0x4619,0x4606
+ .short 0x45f3,0x45e0,0x45cd,0x45ba,0x45a7,0x4594,0x4581,0x456e
+ .short 0x455b,0x4548,0x4536,0x4523,0x4510,0x44fe,0x44eb,0x44d8
+ .short 0x44c6,0x44b3,0x44a1,0x448f,0x447c,0x446a,0x4458,0x4445
+ .short 0x4433,0x4421,0x440f,0x43fd,0x43eb,0x43d9,0x43c7,0x43b5
+ .short 0x43a3,0x4391,0x437f,0x436d,0x435c,0x434a,0x4338,0x4327
+ .short 0x4315,0x4303,0x42f2,0x42e0,0x42cf,0x42bd,0x42ac,0x429b
+ .short 0x4289,0x4278,0x4267,0x4256,0x4244,0x4233,0x4222,0x4211
+ .short 0x4200,0x41ef,0x41de,0x41cd,0x41bc,0x41ab,0x419a,0x418a
+ .short 0x4179,0x4168,0x4157,0x4147,0x4136,0x4125,0x4115,0x4104
+ .short 0x40f4,0x40e3,0x40d3,0x40c2,0x40b2,0x40a2,0x4091,0x4081
+ .short 0x4071,0x4061,0x4050,0x4040,0x4030,0x4020,0x4010,0x4000
+END_OBJECT(approx_tab)
+ASM_END()
ASM_START()
PROLOGUE(mpn_lshift)
- cmpwi cr0, r5, 12 C more than 12 limbs?
+ cmpwi cr0, r5, 30 C more than 30 limbs?
slwi r0, r5, 2
add r4, r4, r0 C make r4 point at end of s1
add r7, r3, r0 C make r7 point at end of res
stw r12, -20(r7)
lmw r24, -32(r1) C restore registers
blr
-EPILOGUE(mpn_lshift)
+EPILOGUE()
--- /dev/null
+dnl PowerPC-32 mpn_lshiftc.
+
+dnl Copyright 1995, 1998, 2000, 2002, 2003, 2004, 2005, 2010 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C 603e: ?
+C 604e: 3.0
+C 75x (G3): 3.0
+C 7400,7410 (G4): 3.0
+C 7445,7455 (G4+): 2.5
+C 7447,7457 (G4+): 2.25
+C power4/ppc970: 2.5
+C power5: 2.5
+
+C INPUT PARAMETERS
+C rp r3
+C up r4
+C n r5
+C cnt r6
+
+ASM_START()
+PROLOGUE(mpn_lshiftc)
+ cmpwi cr0, r5, 30 C more than 30 limbs?
+ slwi r0, r5, 2
+ add r4, r4, r0 C make r4 point at end of s1
+ add r7, r3, r0 C make r7 point at end of res
+ bgt L(BIG) C branch if more than 12 limbs
+
+ mtctr r5 C copy size into CTR
+ subfic r8, r6, 32
+ lwzu r11, -4(r4) C load first s1 limb
+ srw r3, r11, r8 C compute function return value
+ bdz L(end1)
+
+L(oop): lwzu r10, -4(r4)
+ slw r9, r11, r6
+ srw r12, r10, r8
+ nor r9, r9, r12
+ stwu r9, -4(r7)
+ bdz L(end2)
+ lwzu r11, -4(r4)
+ slw r9, r10, r6
+ srw r12, r11, r8
+ nor r9, r9, r12
+ stwu r9, -4(r7)
+ bdnz L(oop)
+
+L(end1):
+ slw r0, r11, r6
+ nor r0, r0, r0
+ stw r0, -4(r7)
+ blr
+L(end2):
+ slw r0, r10, r6
+ nor r0, r0, r0
+ stw r0, -4(r7)
+ blr
+
+L(BIG):
+ stmw r24, -32(r1) C save registers we are supposed to preserve
+ lwzu r9, -4(r4)
+ subfic r8, r6, 32
+ srw r3, r9, r8 C compute function return value
+ slw r0, r9, r6
+ addi r5, r5, -1
+
+ andi. r10, r5, 3 C count for spill loop
+ beq L(e)
+ mtctr r10
+ lwzu r28, -4(r4)
+ bdz L(xe0)
+
+L(loop0):
+ slw r12, r28, r6
+ srw r24, r28, r8
+ lwzu r28, -4(r4)
+ nor r24, r0, r24
+ stwu r24, -4(r7)
+ mr r0, r12
+ bdnz L(loop0) C taken at most once!
+
+L(xe0): slw r12, r28, r6
+ srw r24, r28, r8
+ nor r24, r0, r24
+ stwu r24, -4(r7)
+ mr r0, r12
+
+L(e): srwi r5, r5, 2 C count for unrolled loop
+ addi r5, r5, -1
+ mtctr r5
+ lwz r28, -4(r4)
+ lwz r29, -8(r4)
+ lwz r30, -12(r4)
+ lwzu r31, -16(r4)
+
+L(loopU):
+ slw r9, r28, r6
+ srw r24, r28, r8
+ lwz r28, -4(r4)
+ slw r10, r29, r6
+ srw r25, r29, r8
+ lwz r29, -8(r4)
+ slw r11, r30, r6
+ srw r26, r30, r8
+ lwz r30, -12(r4)
+ slw r12, r31, r6
+ srw r27, r31, r8
+ lwzu r31, -16(r4)
+ nor r24, r0, r24
+ stw r24, -4(r7)
+ nor r25, r9, r25
+ stw r25, -8(r7)
+ nor r26, r10, r26
+ stw r26, -12(r7)
+ nor r27, r11, r27
+ stwu r27, -16(r7)
+ mr r0, r12
+ bdnz L(loopU)
+
+ slw r9, r28, r6
+ srw r24, r28, r8
+ slw r10, r29, r6
+ srw r25, r29, r8
+ slw r11, r30, r6
+ srw r26, r30, r8
+ slw r12, r31, r6
+ srw r27, r31, r8
+ nor r24, r0, r24
+ stw r24, -4(r7)
+ nor r25, r9, r25
+ stw r25, -8(r7)
+ nor r26, r10, r26
+ stw r26, -12(r7)
+ nor r27, r11, r27
+ stw r27, -16(r7)
+ nor r12, r12, r12
+ stw r12, -20(r7)
+ lmw r24, -32(r1) C restore registers
+ blr
+EPILOGUE()
--- /dev/null
+dnl PowerPC-32 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
+
+dnl Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2007, 2011 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C POWER3/PPC630 1.5
+C POWER4/PPC970 2
+C POWER5 2
+C POWER6 2.78
+C POWER7 2.15-2.87
+
+C This code is based on powerpc64/aors_n.asm.
+
+C INPUT PARAMETERS
+C rp r3
+C up r4
+C vp r5
+C n r6
+
+ifdef(`OPERATION_add_n',`
+ define(ADDSUBC, adde)
+ define(ADDSUB, addc)
+ define(func, mpn_add_n)
+ define(func_nc, mpn_add_nc)
+ define(GENRVAL, `addi r3, r3, 1')
+ define(SETCBR, `addic r0, $1, -1')
+ define(CLRCB, `addic r0, r0, 0')
+')
+ifdef(`OPERATION_sub_n',`
+ define(ADDSUBC, subfe)
+ define(ADDSUB, subfc)
+ define(func, mpn_sub_n)
+ define(func_nc, mpn_sub_nc)
+ define(GENRVAL, `neg r3, r3')
+ define(SETCBR, `subfic r0, $1, 0')
+ define(CLRCB, `addic r0, r1, -1')
+')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+PROLOGUE(func_nc)
+ SETCBR(r7)
+ b L(ent)
+EPILOGUE()
+
+PROLOGUE(func)
+ CLRCB
+L(ent): stw r31, -4(r1)
+ stw r30, -8(r1)
+ stw r29, -12(r1)
+ stw r28, -16(r1)
+
+ rlwinm. r0, r6, 0,30,31 C r0 = n & 3, set cr0
+ cmpwi cr6, r0, 2
+ addi r6, r6, 3 C compute count...
+ srwi r6, r6, 2 C ...for ctr
+ mtctr r6 C copy count into ctr
+ beq cr0, L(b00)
+ blt cr6, L(b01)
+ beq cr6, L(b10)
+
+L(b11): lwz r8, 0(r4) C load s1 limb
+ lwz r9, 0(r5) C load s2 limb
+ lwz r10, 4(r4) C load s1 limb
+ lwz r11, 4(r5) C load s2 limb
+ lwz r12, 8(r4) C load s1 limb
+ addi r4, r4, 12
+ lwz r0, 8(r5) C load s2 limb
+ addi r5, r5, 12
+ ADDSUBC r29, r9, r8
+ ADDSUBC r30, r11, r10
+ ADDSUBC r31, r0, r12
+ stw r29, 0(r3)
+ stw r30, 4(r3)
+ stw r31, 8(r3)
+ addi r3, r3, 12
+ bdnz L(go)
+ b L(ret)
+
+L(b01): lwz r12, 0(r4) C load s1 limb
+ addi r4, r4, 4
+ lwz r0, 0(r5) C load s2 limb
+ addi r5, r5, 4
+ ADDSUBC r31, r0, r12 C add
+ stw r31, 0(r3)
+ addi r3, r3, 4
+ bdnz L(go)
+ b L(ret)
+
+L(b10): lwz r10, 0(r4) C load s1 limb
+ lwz r11, 0(r5) C load s2 limb
+ lwz r12, 4(r4) C load s1 limb
+ addi r4, r4, 8
+ lwz r0, 4(r5) C load s2 limb
+ addi r5, r5, 8
+ ADDSUBC r30, r11, r10 C add
+ ADDSUBC r31, r0, r12 C add
+ stw r30, 0(r3)
+ stw r31, 4(r3)
+ addi r3, r3, 8
+ bdnz L(go)
+ b L(ret)
+
+L(b00): C INITCY C clear/set cy
+L(go): lwz r6, 0(r4) C load s1 limb
+ lwz r7, 0(r5) C load s2 limb
+ lwz r8, 4(r4) C load s1 limb
+ lwz r9, 4(r5) C load s2 limb
+ lwz r10, 8(r4) C load s1 limb
+ lwz r11, 8(r5) C load s2 limb
+ lwz r12, 12(r4) C load s1 limb
+ lwz r0, 12(r5) C load s2 limb
+ bdz L(end)
+
+ addi r4, r4, 16
+ addi r5, r5, 16
+
+ ALIGN(16)
+L(top): ADDSUBC r28, r7, r6
+ lwz r6, 0(r4) C load s1 limb
+ lwz r7, 0(r5) C load s2 limb
+ ADDSUBC r29, r9, r8
+ lwz r8, 4(r4) C load s1 limb
+ lwz r9, 4(r5) C load s2 limb
+ ADDSUBC r30, r11, r10
+ lwz r10, 8(r4) C load s1 limb
+ lwz r11, 8(r5) C load s2 limb
+ ADDSUBC r31, r0, r12
+ lwz r12, 12(r4) C load s1 limb
+ lwz r0, 12(r5) C load s2 limb
+ stw r28, 0(r3)
+ addi r4, r4, 16
+ stw r29, 4(r3)
+ addi r5, r5, 16
+ stw r30, 8(r3)
+ stw r31, 12(r3)
+ addi r3, r3, 16
+ bdnz L(top) C decrement ctr and loop back
+
+L(end): ADDSUBC r28, r7, r6
+ ADDSUBC r29, r9, r8
+ ADDSUBC r30, r11, r10
+ ADDSUBC r31, r0, r12
+ stw r28, 0(r3)
+ stw r29, 4(r3)
+ stw r30, 8(r3)
+ stw r31, 12(r3)
+
+L(ret): lwz r31, -4(r1)
+ lwz r30, -8(r1)
+ lwz r29, -12(r1)
+ lwz r28, -16(r1)
+
+ subfe r3, r0, r0 C -cy
+ GENRVAL
+ blr
+EPILOGUE()
--- /dev/null
+/* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004, 2008, 2009,
+2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* 450 MHz POWER3 */
+
+#define DIVREM_1_NORM_THRESHOLD 0 /* always */
+#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_1P_METHOD 2
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 12
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 18
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 8
+#define USE_PREINV_DIVREM_1 1
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
+
+#define MUL_TOOM22_THRESHOLD 10
+#define MUL_TOOM33_THRESHOLD 38
+#define MUL_TOOM44_THRESHOLD 58
+#define MUL_TOOM6H_THRESHOLD 129
+#define MUL_TOOM8H_THRESHOLD 212
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 65
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 63
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 59
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 64
+
+#define SQR_BASECASE_THRESHOLD 0 /* always */
+#define SQR_TOOM2_THRESHOLD 14
+#define SQR_TOOM3_THRESHOLD 53
+#define SQR_TOOM4_THRESHOLD 76
+#define SQR_TOOM6_THRESHOLD 106
+#define SQR_TOOM8_THRESHOLD 284
+
+#define MULMOD_BNM1_THRESHOLD 9
+#define SQRMOD_BNM1_THRESHOLD 9
+
+#define MUL_FFT_MODF_THRESHOLD 220 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 220, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
+ { 9, 5}, { 19, 6}, { 13, 7}, { 7, 6}, \
+ { 16, 7}, { 13, 8}, { 7, 7}, { 19, 8}, \
+ { 11, 7}, { 23, 9}, { 7, 8}, { 15, 7}, \
+ { 33, 8}, { 23, 9}, { 15, 8}, { 35, 9}, \
+ { 23,10}, { 15, 9}, { 31, 8}, { 67, 9}, \
+ { 39, 8}, { 79, 9}, { 47,10}, { 31, 9}, \
+ { 63, 8}, { 127, 9}, { 71, 8}, { 143, 9}, \
+ { 79,10}, { 47,11}, { 31,10}, { 63, 9}, \
+ { 127, 8}, { 255, 9}, { 143,10}, { 79, 9}, \
+ { 159, 8}, { 319, 9}, { 175, 8}, { 351,10}, \
+ { 95, 9}, { 191, 8}, { 383,10}, { 111,11}, \
+ { 63,10}, { 127, 9}, { 255,10}, { 143, 9}, \
+ { 287, 8}, { 575,10}, { 159, 9}, { 319,10}, \
+ { 175, 9}, { 351,11}, { 95,10}, { 191, 9}, \
+ { 383,10}, { 207, 9}, { 415,12}, { 63,11}, \
+ { 127,10}, { 255, 9}, { 511,10}, { 287, 9}, \
+ { 575,11}, { 159,10}, { 351, 9}, { 703, 8}, \
+ { 1407,11}, { 191,10}, { 415,11}, { 223,10}, \
+ { 447, 9}, { 895,12}, { 4096,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 82
+#define MUL_FFT_THRESHOLD 2688
+
+#define SQR_FFT_MODF_THRESHOLD 176 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 176, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
+ { 13, 7}, { 7, 6}, { 16, 7}, { 9, 6}, \
+ { 19, 7}, { 11, 6}, { 23, 7}, { 13, 8}, \
+ { 7, 7}, { 19, 8}, { 11, 7}, { 23, 9}, \
+ { 7, 8}, { 15, 7}, { 31, 8}, { 23, 9}, \
+ { 15, 8}, { 39, 9}, { 23,10}, { 15, 9}, \
+ { 31, 8}, { 63, 9}, { 39, 8}, { 79, 9}, \
+ { 47, 8}, { 95,10}, { 31, 9}, { 63, 8}, \
+ { 127, 9}, { 71, 8}, { 143, 7}, { 287, 6}, \
+ { 575, 9}, { 79, 8}, { 159,10}, { 47, 9}, \
+ { 95,11}, { 31,10}, { 63, 9}, { 127, 8}, \
+ { 255, 9}, { 143, 8}, { 287, 7}, { 575,10}, \
+ { 79, 9}, { 159, 8}, { 319, 9}, { 175,10}, \
+ { 95, 9}, { 191, 8}, { 383,10}, { 111, 9}, \
+ { 223,11}, { 63,10}, { 127, 9}, { 255,10}, \
+ { 143, 9}, { 287, 8}, { 575,10}, { 159, 9}, \
+ { 319,10}, { 175,11}, { 95,10}, { 191, 9}, \
+ { 383,10}, { 223,12}, { 63,11}, { 127,10}, \
+ { 287, 9}, { 575,11}, { 159,10}, { 351, 9}, \
+ { 703, 8}, { 1407,11}, { 191,10}, { 383,11}, \
+ { 223,10}, { 447, 9}, { 895,12}, { 4096,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 87
+#define SQR_FFT_THRESHOLD 1728
+
+#define MULLO_BASECASE_THRESHOLD 2
+#define MULLO_DC_THRESHOLD 33
+#define MULLO_MUL_N_THRESHOLD 5240
+
+#define DC_DIV_QR_THRESHOLD 32
+#define DC_DIVAPPR_Q_THRESHOLD 123
+#define DC_BDIV_QR_THRESHOLD 34
+#define DC_BDIV_Q_THRESHOLD 84
+
+#define INV_MULMOD_BNM1_THRESHOLD 42
+#define INV_NEWTON_THRESHOLD 129
+#define INV_APPR_THRESHOLD 124
+
+#define BINV_NEWTON_THRESHOLD 148
+#define REDC_1_TO_REDC_N_THRESHOLD 38
+
+#define MU_DIV_QR_THRESHOLD 748
+#define MU_DIVAPPR_Q_THRESHOLD 748
+#define MUPI_DIV_QR_THRESHOLD 59
+#define MU_BDIV_QR_THRESHOLD 562
+#define MU_BDIV_Q_THRESHOLD 654
+
+#define MATRIX22_STRASSEN_THRESHOLD 11
+#define HGCD_THRESHOLD 76
+#define GCD_DC_THRESHOLD 205
+#define GCDEXT_DC_THRESHOLD 174
+#define JACOBI_BASE_METHOD 1
+
+#define GET_STR_DC_THRESHOLD 14
+#define GET_STR_PRECOMPUTE_THRESHOLD 27
+#define SET_STR_DC_THRESHOLD 181
+#define SET_STR_PRECOMPUTE_THRESHOLD 525
--- /dev/null
+/* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004, 2008, 2009,
+2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+/* 1800 MHz PowerPC-970 */
+
+#define DIVREM_1_NORM_THRESHOLD 0 /* always */
+#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_1P_METHOD 1
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 6
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 9
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 42
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 14
+#define USE_PREINV_DIVREM_1 1
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD 46
+
+#define MUL_TOOM22_THRESHOLD 20
+#define MUL_TOOM33_THRESHOLD 73
+#define MUL_TOOM44_THRESHOLD 121
+#define MUL_TOOM6H_THRESHOLD 222
+#define MUL_TOOM8H_THRESHOLD 363
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 84
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 107
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 81
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 88
+
+#define SQR_BASECASE_THRESHOLD 0 /* always */
+#define SQR_TOOM2_THRESHOLD 30
+#define SQR_TOOM3_THRESHOLD 74
+#define SQR_TOOM4_THRESHOLD 160
+#define SQR_TOOM6_THRESHOLD 222
+#define SQR_TOOM8_THRESHOLD 357
+
+#define MULMOD_BNM1_THRESHOLD 16
+#define SQRMOD_BNM1_THRESHOLD 18
+
+#define MUL_FFT_MODF_THRESHOLD 444 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 444, 5}, { 15, 6}, { 8, 5}, { 17, 6}, \
+ { 9, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \
+ { 21, 7}, { 11, 6}, { 24, 7}, { 13, 6}, \
+ { 27, 7}, { 15, 6}, { 31, 7}, { 21, 8}, \
+ { 11, 7}, { 27, 8}, { 15, 7}, { 33, 8}, \
+ { 19, 7}, { 39, 8}, { 23, 7}, { 47, 8}, \
+ { 27, 9}, { 15, 8}, { 39, 9}, { 23, 8}, \
+ { 51,10}, { 15, 9}, { 31, 8}, { 67, 9}, \
+ { 39, 8}, { 79, 9}, { 47, 8}, { 95, 9}, \
+ { 55,10}, { 31, 9}, { 79,10}, { 47, 9}, \
+ { 95,11}, { 31,10}, { 63, 9}, { 135,10}, \
+ { 79, 9}, { 159, 8}, { 319,10}, { 95, 8}, \
+ { 383,10}, { 111,11}, { 63,10}, { 127, 9}, \
+ { 255, 8}, { 511,10}, { 143, 9}, { 287, 8}, \
+ { 575, 9}, { 303,10}, { 159, 9}, { 319,11}, \
+ { 95, 9}, { 383,12}, { 63,11}, { 127,10}, \
+ { 255, 9}, { 511,10}, { 271, 9}, { 543, 8}, \
+ { 1087,10}, { 287, 9}, { 575,10}, { 303,11}, \
+ { 159,10}, { 319, 9}, { 639,10}, { 335, 9}, \
+ { 671,10}, { 351, 9}, { 703, 8}, { 1407,10}, \
+ { 383, 9}, { 767,10}, { 415, 9}, { 831,11}, \
+ { 223,10}, { 447,12}, { 4096,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 90
+#define MUL_FFT_THRESHOLD 4736
+
+#define SQR_FFT_MODF_THRESHOLD 308 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 308, 5}, { 15, 6}, { 8, 5}, { 19, 6}, \
+ { 10, 5}, { 21, 6}, { 21, 7}, { 11, 6}, \
+ { 24, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \
+ { 31, 7}, { 21, 8}, { 11, 7}, { 27, 8}, \
+ { 15, 7}, { 33, 8}, { 19, 7}, { 39, 8}, \
+ { 27, 9}, { 15, 8}, { 39, 9}, { 23, 8}, \
+ { 47,10}, { 15, 9}, { 31, 8}, { 63, 9}, \
+ { 39, 8}, { 79, 9}, { 47,10}, { 31, 9}, \
+ { 79,10}, { 47, 9}, { 95,11}, { 31,10}, \
+ { 63, 9}, { 127, 8}, { 255, 9}, { 135,10}, \
+ { 79, 9}, { 159, 8}, { 319, 9}, { 175,10}, \
+ { 95, 9}, { 191, 8}, { 383, 9}, { 207,11}, \
+ { 63,10}, { 127, 9}, { 255, 8}, { 511, 9}, \
+ { 271,10}, { 143, 9}, { 287, 8}, { 575,10}, \
+ { 159, 9}, { 319,10}, { 175,11}, { 95,10}, \
+ { 191, 9}, { 383,10}, { 207,12}, { 63,11}, \
+ { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \
+ { 543, 8}, { 1087,10}, { 287, 9}, { 575,11}, \
+ { 159,10}, { 319, 9}, { 639,10}, { 351, 9}, \
+ { 703,11}, { 191,10}, { 383, 9}, { 767,10}, \
+ { 415, 9}, { 831,11}, { 223,10}, { 447,12}, \
+ { 4096,13}, { 8192,14}, { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 88
+#define SQR_FFT_THRESHOLD 3520
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 62
+#define MULLO_MUL_N_THRESHOLD 8907
+
+#define DC_DIV_QR_THRESHOLD 53
+#define DC_DIVAPPR_Q_THRESHOLD 216
+#define DC_BDIV_QR_THRESHOLD 67
+#define DC_BDIV_Q_THRESHOLD 180
+
+#define INV_MULMOD_BNM1_THRESHOLD 58
+#define INV_NEWTON_THRESHOLD 226
+#define INV_APPR_THRESHOLD 228
+
+#define BINV_NEWTON_THRESHOLD 252
+#define REDC_1_TO_REDC_N_THRESHOLD 67
+
+#define MU_DIV_QR_THRESHOLD 1187
+#define MU_DIVAPPR_Q_THRESHOLD 1308
+#define MUPI_DIV_QR_THRESHOLD 114
+#define MU_BDIV_QR_THRESHOLD 1017
+#define MU_BDIV_Q_THRESHOLD 1187
+
+#define MATRIX22_STRASSEN_THRESHOLD 15
+#define HGCD_THRESHOLD 97
+#define GCD_DC_THRESHOLD 386
+#define GCDEXT_DC_THRESHOLD 298
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 11
+#define GET_STR_PRECOMPUTE_THRESHOLD 24
+#define SET_STR_DC_THRESHOLD 318
+#define SET_STR_PRECOMPUTE_THRESHOLD 929
--- /dev/null
+/* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004, 2008, 2009,
+2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* 1650 MHz POWER5 */
+
+#define DIVREM_1_NORM_THRESHOLD 0 /* always */
+#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_1P_METHOD 1
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 8
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 6
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 9
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 50
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 18
+#define USE_PREINV_DIVREM_1 1
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD 61
+
+#define MUL_TOOM22_THRESHOLD 22
+#define MUL_TOOM33_THRESHOLD 57
+#define MUL_TOOM44_THRESHOLD 130
+#define MUL_TOOM6H_THRESHOLD 189
+#define MUL_TOOM8H_THRESHOLD 309
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 89
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 99
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 83
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 88
+
+#define SQR_BASECASE_THRESHOLD 6
+#define SQR_TOOM2_THRESHOLD 40
+#define SQR_TOOM3_THRESHOLD 77
+#define SQR_TOOM4_THRESHOLD 124
+#define SQR_TOOM6_THRESHOLD 140
+#define SQR_TOOM8_THRESHOLD 238
+
+#define MULMID_TOOM42_THRESHOLD 40
+
+#define MULMOD_BNM1_THRESHOLD 15
+#define SQRMOD_BNM1_THRESHOLD 16
+
+#define POWM_SEC_TABLE 4,29,252,840,2080
+
+#define MUL_FFT_MODF_THRESHOLD 412 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 412, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
+ { 12, 5}, { 25, 6}, { 21, 7}, { 11, 6}, \
+ { 25, 7}, { 13, 6}, { 27, 7}, { 21, 8}, \
+ { 11, 7}, { 27, 8}, { 15, 7}, { 33, 8}, \
+ { 19, 7}, { 39, 8}, { 23, 7}, { 47, 8}, \
+ { 27, 9}, { 15, 8}, { 39, 9}, { 23, 8}, \
+ { 51,10}, { 15, 9}, { 31, 8}, { 67, 9}, \
+ { 39, 8}, { 79, 9}, { 55,10}, { 31, 9}, \
+ { 79,10}, { 47, 9}, { 95,11}, { 31,10}, \
+ { 63, 9}, { 135,10}, { 79, 9}, { 159,10}, \
+ { 95,11}, { 63,10}, { 127, 9}, { 255,10}, \
+ { 143, 9}, { 287,10}, { 159,11}, { 95,10}, \
+ { 191,12}, { 63,11}, { 127,10}, { 255, 9}, \
+ { 511,10}, { 271, 9}, { 543,10}, { 287,11}, \
+ { 159,10}, { 335, 9}, { 671,10}, { 351, 9}, \
+ { 703,11}, { 191,10}, { 383, 9}, { 767,10}, \
+ { 415, 9}, { 831,11}, { 223,12}, { 4096,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 71
+#define MUL_FFT_THRESHOLD 4736
+
+#define SQR_FFT_MODF_THRESHOLD 340 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 340, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
+ { 21, 7}, { 11, 6}, { 24, 7}, { 13, 6}, \
+ { 27, 7}, { 21, 8}, { 11, 7}, { 27, 8}, \
+ { 15, 7}, { 33, 8}, { 19, 7}, { 39, 8}, \
+ { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \
+ { 39, 9}, { 23, 8}, { 47,10}, { 15, 9}, \
+ { 31, 8}, { 67, 9}, { 47,10}, { 31, 9}, \
+ { 71,10}, { 47,11}, { 31,10}, { 63, 9}, \
+ { 127, 8}, { 255, 9}, { 135,10}, { 79, 9}, \
+ { 159,10}, { 95, 9}, { 191,11}, { 63,10}, \
+ { 127, 9}, { 255, 8}, { 511, 9}, { 271,10}, \
+ { 143, 9}, { 287, 8}, { 575, 9}, { 303,10}, \
+ { 159,11}, { 95,10}, { 191,12}, { 63,11}, \
+ { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \
+ { 543,10}, { 287, 9}, { 575,10}, { 303,11}, \
+ { 159,10}, { 319, 9}, { 639,10}, { 335, 9}, \
+ { 671,10}, { 351,11}, { 191,10}, { 383, 9}, \
+ { 767,10}, { 415,11}, { 223,10}, { 447,12}, \
+ { 4096,13}, { 8192,14}, { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 76
+#define SQR_FFT_THRESHOLD 3712
+
+#define MULLO_BASECASE_THRESHOLD 2
+#define MULLO_DC_THRESHOLD 68
+#define MULLO_MUL_N_THRESHOLD 9236
+
+#define DC_DIV_QR_THRESHOLD 69
+#define DC_DIVAPPR_Q_THRESHOLD 220
+#define DC_BDIV_QR_THRESHOLD 75
+#define DC_BDIV_Q_THRESHOLD 188
+
+#define INV_MULMOD_BNM1_THRESHOLD 54
+#define INV_NEWTON_THRESHOLD 230
+#define INV_APPR_THRESHOLD 230
+
+#define BINV_NEWTON_THRESHOLD 278
+#define REDC_1_TO_REDC_N_THRESHOLD 87
+
+#define MU_DIV_QR_THRESHOLD 1210
+#define MU_DIVAPPR_Q_THRESHOLD 1308
+#define MUPI_DIV_QR_THRESHOLD 106
+#define MU_BDIV_QR_THRESHOLD 1017
+#define MU_BDIV_Q_THRESHOLD 1210
+
+#define MATRIX22_STRASSEN_THRESHOLD 14
+#define HGCD_THRESHOLD 110
+#define HGCD_APPR_THRESHOLD 138
+#define HGCD_REDUCE_THRESHOLD 2578
+#define GCD_DC_THRESHOLD 408
+#define GCDEXT_DC_THRESHOLD 298
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 13
+#define GET_STR_PRECOMPUTE_THRESHOLD 24
+#define SET_STR_DC_THRESHOLD 527
+#define SET_STR_PRECOMPUTE_THRESHOLD 1090
--- /dev/null
+/* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004, 2008, 2009,
+2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* 3500 MHz POWER6 */
+
+#define DIVREM_1_NORM_THRESHOLD 0 /* always */
+#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_1P_METHOD 2
+#define MOD_1_NORM_THRESHOLD 3
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD MP_SIZE_T_MAX
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 8
+#define USE_PREINV_DIVREM_1 1
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
+
+#define MUL_TOOM22_THRESHOLD 19
+#define MUL_TOOM33_THRESHOLD 55
+#define MUL_TOOM44_THRESHOLD 88
+#define MUL_TOOM6H_THRESHOLD 137
+#define MUL_TOOM8H_THRESHOLD 181
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 57
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 56
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 57
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 56
+
+#define SQR_BASECASE_THRESHOLD 0 /* always */
+#define SQR_TOOM2_THRESHOLD 30
+#define SQR_TOOM3_THRESHOLD 56
+#define SQR_TOOM4_THRESHOLD 130
+#define SQR_TOOM6_THRESHOLD 189
+#define SQR_TOOM8_THRESHOLD 296
+
+#define MULMID_TOOM42_THRESHOLD 26
+
+#define MULMOD_BNM1_THRESHOLD 7
+#define SQRMOD_BNM1_THRESHOLD 12
+
+#define POWM_SEC_TABLE 2,26,127,453,1068
+
+#define MUL_FFT_MODF_THRESHOLD 212 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 212, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
+ { 13, 7}, { 7, 6}, { 16, 7}, { 9, 6}, \
+ { 19, 7}, { 13, 8}, { 7, 7}, { 19, 8}, \
+ { 11, 7}, { 25, 9}, { 7, 8}, { 15, 7}, \
+ { 31, 8}, { 19, 7}, { 39, 8}, { 23, 9}, \
+ { 15, 8}, { 39, 9}, { 23, 8}, { 47,10}, \
+ { 15, 9}, { 31, 8}, { 63, 9}, { 39, 8}, \
+ { 79, 9}, { 47,10}, { 31, 9}, { 63, 8}, \
+ { 127, 9}, { 71, 8}, { 143, 7}, { 287, 9}, \
+ { 79,10}, { 47,11}, { 31,10}, { 63, 9}, \
+ { 127, 8}, { 255, 7}, { 511, 9}, { 143, 8}, \
+ { 287,10}, { 79, 9}, { 159, 8}, { 319, 9}, \
+ { 175, 8}, { 351,10}, { 95, 9}, { 191, 8}, \
+ { 383, 9}, { 207,10}, { 111,11}, { 63,10}, \
+ { 127, 9}, { 255, 8}, { 511,10}, { 143, 9}, \
+ { 287, 8}, { 575,10}, { 159, 9}, { 319,10}, \
+ { 175, 9}, { 351,11}, { 95,10}, { 191, 9}, \
+ { 383,10}, { 207, 9}, { 415,12}, { 63,11}, \
+ { 127,10}, { 255, 9}, { 511,10}, { 287, 9}, \
+ { 575,11}, { 159,10}, { 351, 9}, { 703,11}, \
+ { 191,10}, { 415, 9}, { 831,11}, { 223,10}, \
+ { 447,12}, { 4096,13}, { 8192,14}, { 16384,15}, \
+ { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 89
+#define MUL_FFT_THRESHOLD 1728
+
+#define SQR_FFT_MODF_THRESHOLD 184 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 184, 5}, { 6, 4}, { 13, 5}, { 13, 6}, \
+ { 7, 5}, { 15, 6}, { 13, 7}, { 7, 6}, \
+ { 16, 7}, { 9, 6}, { 19, 7}, { 11, 6}, \
+ { 23, 7}, { 13, 8}, { 7, 7}, { 19, 8}, \
+ { 11, 7}, { 23, 9}, { 7, 8}, { 23, 9}, \
+ { 15, 8}, { 39, 9}, { 23,10}, { 15, 9}, \
+ { 31, 8}, { 63, 9}, { 39, 8}, { 79, 9}, \
+ { 47,10}, { 31, 9}, { 63, 8}, { 127, 7}, \
+ { 255, 9}, { 71, 8}, { 143, 7}, { 287, 6}, \
+ { 575, 9}, { 79,10}, { 47,11}, { 31,10}, \
+ { 63, 9}, { 127, 8}, { 255, 9}, { 143, 8}, \
+ { 287, 7}, { 575,10}, { 79, 9}, { 159, 8}, \
+ { 319, 9}, { 175, 8}, { 351,10}, { 95, 9}, \
+ { 191, 8}, { 383, 9}, { 207,10}, { 111, 9}, \
+ { 223,11}, { 63,10}, { 127, 9}, { 255,10}, \
+ { 143, 9}, { 287, 8}, { 575,10}, { 159, 9}, \
+ { 319,10}, { 175, 9}, { 351,11}, { 95,10}, \
+ { 191, 9}, { 383,10}, { 207, 9}, { 415,10}, \
+ { 223,12}, { 63,11}, { 127,10}, { 255, 9}, \
+ { 511,10}, { 287, 9}, { 575,11}, { 159,10}, \
+ { 351, 9}, { 703, 8}, { 1407,11}, { 191,10}, \
+ { 415,11}, { 223,10}, { 447, 9}, { 895,12}, \
+ { 4096,13}, { 8192,14}, { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 92
+#define SQR_FFT_THRESHOLD 1600
+
+#define MULLO_BASECASE_THRESHOLD 2
+#define MULLO_DC_THRESHOLD 57
+#define MULLO_MUL_N_THRESHOLD 3176
+
+#define DC_DIV_QR_THRESHOLD 52
+#define DC_DIVAPPR_Q_THRESHOLD 187
+#define DC_BDIV_QR_THRESHOLD 64
+#define DC_BDIV_Q_THRESHOLD 146
+
+#define INV_MULMOD_BNM1_THRESHOLD 68
+#define INV_NEWTON_THRESHOLD 182
+#define INV_APPR_THRESHOLD 182
+
+#define BINV_NEWTON_THRESHOLD 186
+#define REDC_1_TO_REDC_N_THRESHOLD 60
+
+#define MU_DIV_QR_THRESHOLD 924
+#define MU_DIVAPPR_Q_THRESHOLD 807
+#define MUPI_DIV_QR_THRESHOLD 73
+#define MU_BDIV_QR_THRESHOLD 667
+#define MU_BDIV_Q_THRESHOLD 823
+
+#define MATRIX22_STRASSEN_THRESHOLD 8
+#define HGCD_THRESHOLD 61
+#define HGCD_APPR_THRESHOLD 50
+#define HGCD_REDUCE_THRESHOLD 974
+#define GCD_DC_THRESHOLD 195
+#define GCDEXT_DC_THRESHOLD 134
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 9
+#define GET_STR_PRECOMPUTE_THRESHOLD 21
+#define SET_STR_DC_THRESHOLD 190
+#define SET_STR_PRECOMPUTE_THRESHOLD 411
--- /dev/null
+/* PowerPC-32 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2004, 2008, 2009,
+2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* 3550 MHz POWER7/T4 */
+
+#define DIVREM_1_NORM_THRESHOLD 0 /* always */
+#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_1P_METHOD 1
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 7
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 34
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 15
+#define USE_PREINV_DIVREM_1 1
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD 34
+
+#define MUL_TOOM22_THRESHOLD 20
+#define MUL_TOOM33_THRESHOLD 89
+#define MUL_TOOM44_THRESHOLD 130
+#define MUL_TOOM6H_THRESHOLD 286
+#define MUL_TOOM8H_THRESHOLD 363
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 121
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 114
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 89
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 113
+
+#define SQR_BASECASE_THRESHOLD 4
+#define SQR_TOOM2_THRESHOLD 50
+#define SQR_TOOM3_THRESHOLD 89
+#define SQR_TOOM4_THRESHOLD 154
+#define SQR_TOOM6_THRESHOLD 222
+#define SQR_TOOM8_THRESHOLD 381
+
+#define MULMID_TOOM42_THRESHOLD 40
+
+#define MULMOD_BNM1_THRESHOLD 18
+#define SQRMOD_BNM1_THRESHOLD 17
+
+#define POWM_SEC_TABLE 4,35,225,780,2212
+
+#define MUL_FFT_MODF_THRESHOLD 476 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 476, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
+ { 12, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \
+ { 14, 5}, { 29, 6}, { 21, 7}, { 11, 6}, \
+ { 25, 7}, { 13, 6}, { 29, 7}, { 15, 6}, \
+ { 31, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \
+ { 39, 7}, { 21, 8}, { 11, 7}, { 27, 8}, \
+ { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \
+ { 27, 9}, { 15, 8}, { 39, 9}, { 23, 8}, \
+ { 51,10}, { 15, 9}, { 31, 8}, { 67, 9}, \
+ { 39, 8}, { 79, 9}, { 47, 8}, { 95, 9}, \
+ { 55,10}, { 31, 9}, { 79,10}, { 47, 9}, \
+ { 95,11}, { 31,10}, { 63, 9}, { 135,10}, \
+ { 79, 9}, { 159,10}, { 95,11}, { 63,10}, \
+ { 159,11}, { 95,12}, { 63,11}, { 127,10}, \
+ { 255, 9}, { 511,10}, { 271, 9}, { 543, 8}, \
+ { 1087,11}, { 159,10}, { 319, 9}, { 639,10}, \
+ { 335, 9}, { 671, 8}, { 1343,10}, { 351,11}, \
+ { 191,10}, { 415, 9}, { 831,10}, { 431,11}, \
+ { 223,12}, { 4096,13}, { 8192,14}, { 16384,15}, \
+ { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 77
+#define MUL_FFT_THRESHOLD 5312
+
+#define SQR_FFT_MODF_THRESHOLD 344 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 344, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
+ { 21, 7}, { 11, 6}, { 24, 7}, { 13, 6}, \
+ { 27, 7}, { 15, 6}, { 31, 7}, { 21, 8}, \
+ { 11, 7}, { 27, 8}, { 15, 7}, { 33, 8}, \
+ { 19, 7}, { 39, 8}, { 27, 9}, { 15, 8}, \
+ { 39, 9}, { 23, 8}, { 47,10}, { 15, 9}, \
+ { 31, 8}, { 63, 9}, { 39, 8}, { 79, 9}, \
+ { 47,10}, { 31, 9}, { 79,10}, { 47,11}, \
+ { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \
+ { 159,10}, { 95, 9}, { 191,11}, { 63,10}, \
+ { 127, 9}, { 255, 8}, { 511, 9}, { 271,10}, \
+ { 143, 9}, { 287, 8}, { 575, 9}, { 303,10}, \
+ { 159,11}, { 95,10}, { 191,12}, { 63,11}, \
+ { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \
+ { 543, 8}, { 1087,10}, { 287, 9}, { 575,10}, \
+ { 303,11}, { 159,10}, { 319, 9}, { 639,10}, \
+ { 335, 9}, { 671,10}, { 351, 9}, { 703,11}, \
+ { 191,10}, { 383, 9}, { 767,10}, { 415, 9}, \
+ { 831,11}, { 223,10}, { 447,12}, { 4096,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 79
+#define SQR_FFT_THRESHOLD 3712
+
+#define MULLO_BASECASE_THRESHOLD 2
+#define MULLO_DC_THRESHOLD 34
+#define MULLO_MUL_N_THRESHOLD 10323
+
+#define DC_DIV_QR_THRESHOLD 52
+#define DC_DIVAPPR_Q_THRESHOLD 202
+#define DC_BDIV_QR_THRESHOLD 68
+#define DC_BDIV_Q_THRESHOLD 152
+
+#define INV_MULMOD_BNM1_THRESHOLD 66
+#define INV_NEWTON_THRESHOLD 226
+#define INV_APPR_THRESHOLD 189
+
+#define BINV_NEWTON_THRESHOLD 292
+#define REDC_1_TO_REDC_N_THRESHOLD 79
+
+#define MU_DIV_QR_THRESHOLD 1442
+#define MU_DIVAPPR_Q_THRESHOLD 1442
+#define MUPI_DIV_QR_THRESHOLD 91
+#define MU_BDIV_QR_THRESHOLD 1308
+#define MU_BDIV_Q_THRESHOLD 1442
+
+#define MATRIX22_STRASSEN_THRESHOLD 16
+#define HGCD_THRESHOLD 126
+#define HGCD_APPR_THRESHOLD 139
+#define HGCD_REDUCE_THRESHOLD 2681
+#define GCD_DC_THRESHOLD 573
+#define GCDEXT_DC_THRESHOLD 448
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 9
+#define GET_STR_PRECOMPUTE_THRESHOLD 20
+#define SET_STR_DC_THRESHOLD 834
+#define SET_STR_PRECOMPUTE_THRESHOLD 1888
ASM_START()
PROLOGUE(mpn_rshift)
- cmpwi cr0, r5, 12 C more than 12 limbs?
+ cmpwi cr0, r5, 30 C more than 30 limbs?
addi r7, r3, -4 C dst-4
bgt L(BIG) C branch if more than 12 limbs
stw r12, 20(r7)
lmw r24, -32(r1) C restore registers
blr
-EPILOGUE(mpn_rshift)
+EPILOGUE()
--- /dev/null
+dnl PowerPC-32 mpn_tabselect.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C 603e: ?
+C 604e: ?
+C 75x (G3): ?
+C 7400,7410 (G4): ?
+C 744x,745x (G4+): ?
+C power4/ppc970: 3.3
+C power5: ?
+
+C NOTES
+C * This has not been tuned for any specific processor. Its speed should not
+C be too bad, though.
+C * Using VMX could result in significant speedup for certain CPUs.
+
+C mpn_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
+define(`rp', `r3')
+define(`tp', `r4')
+define(`n', `r5')
+define(`nents', `r6')
+define(`which', `r7')
+
+define(`mask', `r8')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_tabselect)
+ addi r0, n, 1
+ srwi r0, r0, 1 C inner loop count
+ andi. r9, n, 1 C set cr0 for use in inner loop
+ subf which, nents, which
+ slwi n, n, 2
+
+L(outer):
+ mtctr r0 C put inner loop count in ctr
+
+ add r9, which, nents C are we at the selected table entry?
+ addic r9, r9, -1 C set CF iff not selected entry
+ subfe mask, r0, r0
+
+ beq cr0, L(top) C branch to loop entry if n even
+
+ lwz r9, 0(tp)
+ addi tp, tp, 4
+ and r9, r9, mask
+ lwz r11, 0(rp)
+ andc r11, r11, mask
+ or r9, r9, r11
+ stw r9, 0(rp)
+ addi rp, rp, 4
+ bdz L(end)
+
+ ALIGN(16)
+L(top): lwz r9, 0(tp)
+ lwz r10, 4(tp)
+ addi tp, tp, 8
+ nop
+ and r9, r9, mask
+ and r10, r10, mask
+ lwz r11, 0(rp)
+ lwz r12, 4(rp)
+ andc r11, r11, mask
+ andc r12, r12, mask
+ or r9, r9, r11
+ or r10, r10, r12
+ stw r9, 0(rp)
+ stw r10, 4(rp)
+ addi rp, rp, 8
+ bdnz L(top)
+
+L(end): subf rp, n, rp C move rp back to beginning
+ cmpwi cr6, nents, 1
+ addi nents, nents, -1
+ bne cr6, L(outer)
+
+ blr
+EPILOGUE()
dnl PowerPC-32 mpn_mod_34lsub1 -- mpn remainder mod 2^24-1.
-dnl Copyright 2002, 2003, 2005, 2006, 2007 Free Software Foundation, Inc.
+dnl Copyright 2002, 2003, 2005, 2006, 2007, 2012 Free Software Foundation,
+dnl Inc.
dnl This file is part of the GNU MP Library.
andi. r7, up, 15
vxor a0, v0, v0
- lis r0, 0xaaaa
+ lis r9, 0xaaaa
vxor a1, v0, v0
- ori r0, r0, 0xaaab
+ ori r9, r9, 0xaaab
vxor a2, v0, v0
li r5, 16
vxor c0, v0, v0
li r6, 32
vxor c1, v0, v0
- LEAL( r11, cnsts)
+ LEAL( r11, cnsts) C CAUTION clobbers r0 for elf, darwin
vxor c2, v0, v0
vxor z, v0, v0
vsldoi a2, z, a2, 12
addi n, n, 9
- mulhwu r0, n, r0
+ mulhwu r0, n, r9
srwi r0, r0, 3 C r0 = floor(n/12)
mtctr r0
vsldoi a1, z, a1, 8
addi n, n, 6
- mulhwu r0, n, r0
+ mulhwu r0, n, r9
srwi r0, r0, 3 C r0 = floor(n/12)
mtctr r0
vsldoi a0, z, a0, 4
addi n, n, 3
- mulhwu r0, n, r0
+ mulhwu r0, n, r9
srwi r0, r0, 3 C r0 = floor(n/12)
mtctr r0
b L(0)
L(aligned16):
- mulhwu r0, n, r0
+ mulhwu r0, n, r9
srwi r0, r0, 3 C r0 = floor(n/12)
mtctr r0
MULFUNC_PROLOGUE(mpn_popcount)
include_mpn(`powerpc64/vmx/popcount.asm')
-
-C cycles/limb
-C 7400,7410 (G4): 2.75
-C 744x,745x (G4+): 2.25
-C 970 (G5): 5.3
one store per cycle.
L1 load latency: to gregs 3-4 cycles, to fregs 5-6 cycles.
Operations that modify the address register might be split
- to use also a an integer issue slot.
+ to use also an integer issue slot.
Simple integer: 2 operations every cycle, latency 2.
Integer multiply: 2 operations every 6th cycle, latency 7 cycles.
Integer divide: ?
divert(-1)
dnl m4 macros for AIX 64-bit assembly.
-dnl Copyright 2000, 2001, 2002, 2005, 2006 Free Software Foundation, Inc.
+dnl Copyright 2000, 2001, 2002, 2005, 2006, 2010, 2012 Free Software
+dnl Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
define(`ASM_START',
- `.machine "ppc64"
+ `.machine "any"
.toc')
dnl Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
.csect [DS], 3
$1:
.llong .$1, TOC[tc0], 0
- .csect [PR]
- .align 4
+ .csect .$1[PR], 6
.$1:')
define(`EPILOGUE_cpu',
include(`../config.m4')
-C cycles/limb
-C POWER3/PPC630: 1?
-C POWER4/PPC970: 1.6
+C cycles/limb
+C POWER3/PPC630 1?
+C POWER4/PPC970 1.6
+C POWER5 ?
+C POWER6 ?
+C POWER7 1.45
C TODO
C * 8-way unrolling brings timing down to about 1.3 cycles/limb.
include(`../config.m4')
-C cycles/limb
-C POWER3/PPC630: 1
-C POWER4/PPC970: 1
+C cycles/limb
+C POWER3/PPC630 1
+C POWER4/PPC970 1
+C POWER5 ?
+C POWER6 ?
+C POWER7 1.4
C INPUT PARAMETERS
C rp r3
include(`../config.m4')
-C cycles/limb
-C POWER3/PPC630: 1
-C POWER4/PPC970: 1
+C cycles/limb
+C POWER3/PPC630 1
+C POWER4/PPC970 1
+C POWER5 ?
+C POWER6 ?
+C POWER7 1.4
C INPUT PARAMETERS
C rp r3
m4_assert_numargs(1)
` .text
.globl $1
- .align 4
+ .align 5
$1:')
define(`EPILOGUE_cpu',
.size $1, 24
.type .$1, @function
.section ".text"
- .align 4
+ .align 5
.$1:')
define(`EPILOGUE_cpu',
include(`../config.m4')
-C cycles/limb
-C POWER3/PPC630: 1.75
-C POWER4/PPC970: 2.10
+C cycles/limb
+C POWER3/PPC630 1.75
+C POWER4/PPC970 2.10
+C POWER5 ?
+C POWER6 ?
+C POWER7 1.75
C n POWER3/PPC630 POWER4/PPC970
C 1 15.00 15.33
dnl PowerPC-64 mpn_lshift -- rp[] = up[] << cnt
-dnl Copyright 2003, 2005 Free Software Foundation, Inc.
+dnl Copyright 2003, 2005, 2010, 2011 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
-C cycles/limb
-C POWER3/PPC630: 1.5
-C POWER4/PPC970: 3.0
+C cycles/limb
+C POWER3/PPC630 ?
+C POWER4/PPC970 ?
+C POWER5 2.25
+C POWER6 9.75
+C POWER7 2.15
-C INPUT PARAMETERS
-define(`rp',`r3')
-define(`up',`r4')
-define(`n',`r5')
-define(`cnt',`r6')
+C TODO
+C * Try to reduce the number of needed live registers
+C * Micro-optimise header code
+C * Keep in synch with rshift.asm and lshiftc.asm
-define(`tnc',`r5')
-define(`v0',`r0')
-define(`v1',`r7')
-define(`u0',`r8')
-define(`u1',`r9')
-define(`h0',`r10')
-define(`h1',`r11')
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`n', `r5')
+define(`cnt', `r6')
+define(`tnc',`r0')
+define(`u0',`r30')
+define(`u1',`r31')
+define(`retval',`r5')
ASM_START()
PROLOGUE(mpn_lshift)
+ std r31, -8(r1)
+ std r30, -16(r1)
+ subfic tnc, cnt, 64
+ sldi r7, n, 3 C byte count corresponding to n
+ add up, up, r7 C up = up + n
+ add rp, rp, r7 C rp = rp + n
+ rldicl. r30, n, 0,62 C r30 = n & 3, set cr0
+ cmpdi cr6, r30, 2
+ addi r31, n, 3 C compute count...
+ ld r10, -8(up) C load 1st limb for b00...b11
+ srd retval, r10, tnc
ifdef(`HAVE_ABI_mode32',
-` rldicl r7, r5, 0, 32 C zero extend n
- mtctr r7', C copy n to count register
-` mtctr n') C copy n to count register
-
-ifdef(`HAVE_ABI_mode32',
-` rldic r0, n, 3, 32', C byte count corresponding to n
-` rldicr r0, n, 3, 60') C byte count corresponding to n
-
- add rp, rp, r0 C rp = rp + n
- add up, up, r0 C up = up + n
- addi rp, rp, 8 C rp now points 16 beyond end
- addi up, up, -8 C up now points to last limb
- subfic tnc, cnt, 64 C reverse shift count
-
- ld u0, 0(up)
- sld h0, u0, cnt
- srd r12, u0, tnc C return value
- bdz L(1) C jump for n = 1
-
- ld u1, -8(up)
- bdz L(2) C jump for n = 2
-
- ldu u0, -16(up)
- bdz L(end) C jump for n = 3
-
-L(oop): srd v1, u1, tnc
- sld h1, u1, cnt
- ld u1, -8(up)
- or h0, v1, h0
- stdu h0, -16(rp)
-
- bdz L(exit)
-
- srd v0, u0, tnc
- sld h0, u0, cnt
- ldu u0, -16(up)
- or h1, v0, h1
- std h1, -8(rp)
-
- bdnz L(oop)
-
-L(end): srd v1, u1, tnc
- sld h1, u1, cnt
- or h0, v1, h0
- stdu h0, -16(rp)
- srd v0, u0, tnc
- sld h0, u0, cnt
- or h1, v0, h1
- std h1, -8(rp)
-L(1): std h0, -16(rp)
-ifdef(`HAVE_ABI_mode32',
-` srdi r3, r12, 32
- mr r4, r12
-',` mr r3, r12
-')
- blr
-
-L(exit): srd v0, u0, tnc
- sld h0, u0, cnt
- or h1, v0, h1
- std h1, -8(rp)
-L(2): srd v1, u1, tnc
- sld h1, u1, cnt
- or h0, v1, h0
- stdu h0, -16(rp)
- std h1, -8(rp)
+` rldicl r31, r31, 62,34', C ...branch count
+` srdi r31, r31, 2') C ...for ctr
+ mtctr r31 C copy count into ctr
+ beq cr0, L(b00)
+ blt cr6, L(b01)
+ ld r11, -16(up) C load 2nd limb for b10 and b11
+ beq cr6, L(b10)
+
+ ALIGN(16)
+L(b11): sld r8, r10, cnt
+ srd r9, r11, tnc
+ ld u1, -24(up)
+ addi up, up, -24
+ sld r12, r11, cnt
+ srd r7, u1, tnc
+ addi rp, rp, 16
+ bdnz L(gt3)
+
+ or r11, r8, r9
+ sld r8, u1, cnt
+ b L(cj3)
+
+ ALIGN(16)
+L(gt3): ld u0, -8(up)
+ or r11, r8, r9
+ sld r8, u1, cnt
+ srd r9, u0, tnc
+ ld u1, -16(up)
+ or r10, r12, r7
+ b L(L11)
+
+ ALIGN(32)
+L(b10): sld r12, r10, cnt
+ addi rp, rp, 24
+ srd r7, r11, tnc
+ bdnz L(gt2)
+
+ sld r8, r11, cnt
+ or r10, r12, r7
+ b L(cj2)
+
+L(gt2): ld u0, -24(up)
+ sld r8, r11, cnt
+ srd r9, u0, tnc
+ ld u1, -32(up)
+ or r10, r12, r7
+ sld r12, u0, cnt
+ srd r7, u1, tnc
+ ld u0, -40(up)
+ or r11, r8, r9
+ addi up, up, -16
+ b L(L10)
+
+ ALIGN(16)
+L(b00): ld u1, -16(up)
+ sld r12, r10, cnt
+ srd r7, u1, tnc
+ ld u0, -24(up)
+ sld r8, u1, cnt
+ srd r9, u0, tnc
+ ld u1, -32(up)
+ or r10, r12, r7
+ sld r12, u0, cnt
+ srd r7, u1, tnc
+ addi rp, rp, 8
+ bdz L(cj4)
+
+L(gt4): addi up, up, -32
+ ld u0, -8(up)
+ or r11, r8, r9
+ b L(L00)
+
+ ALIGN(16)
+L(b01): bdnz L(gt1)
+ sld r8, r10, cnt
+ std r8, -8(rp)
+ b L(ret)
+
+L(gt1): ld u0, -16(up)
+ sld r8, r10, cnt
+ srd r9, u0, tnc
+ ld u1, -24(up)
+ sld r12, u0, cnt
+ srd r7, u1, tnc
+ ld u0, -32(up)
+ or r11, r8, r9
+ sld r8, u1, cnt
+ srd r9, u0, tnc
+ ld u1, -40(up)
+ addi up, up, -40
+ or r10, r12, r7
+ bdz L(end)
+
+ ALIGN(32)
+L(top): sld r12, u0, cnt
+ srd r7, u1, tnc
+ ld u0, -8(up)
+ std r11, -8(rp)
+ or r11, r8, r9
+L(L00): sld r8, u1, cnt
+ srd r9, u0, tnc
+ ld u1, -16(up)
+ std r10, -16(rp)
+ or r10, r12, r7
+L(L11): sld r12, u0, cnt
+ srd r7, u1, tnc
+ ld u0, -24(up)
+ std r11, -24(rp)
+ or r11, r8, r9
+L(L10): sld r8, u1, cnt
+ srd r9, u0, tnc
+ ld u1, -32(up)
+ addi up, up, -32
+ std r10, -32(rp)
+ addi rp, rp, -32
+ or r10, r12, r7
+ bdnz L(top)
+
+ ALIGN(32)
+L(end): sld r12, u0, cnt
+ srd r7, u1, tnc
+ std r11, -8(rp)
+L(cj4): or r11, r8, r9
+ sld r8, u1, cnt
+ std r10, -16(rp)
+L(cj3): or r10, r12, r7
+ std r11, -24(rp)
+L(cj2): std r10, -32(rp)
+ std r8, -40(rp)
+
+L(ret): ld r31, -8(r1)
+ ld r30, -16(r1)
ifdef(`HAVE_ABI_mode32',
-` srdi r3, r12, 32
- mr r4, r12
-',` mr r3, r12
-')
+` srdi r3, retval, 32
+ mr r4, retval
+',` mr r3, retval')
blr
EPILOGUE()
--- /dev/null
+dnl PowerPC-64 mpn_lshiftc -- rp[] = ~up[] << cnt
+
+dnl Copyright 2003, 2005, 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C POWER3/PPC630 ?
+C POWER4/PPC970 ?
+C POWER5 2.25
+C POWER6 9.5
+C POWER7 2.15
+
+C TODO
+C * Try to reduce the number of needed live registers
+C * Micro-optimise header code
+C * Keep in synch with lshift.asm and rshift.asm
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`n', `r5')
+define(`cnt', `r6')
+
+define(`tnc',`r0')
+define(`u0',`r30')
+define(`u1',`r31')
+define(`retval',`r5')
+
+ASM_START()
+PROLOGUE(mpn_lshiftc)
+ std r31, -8(r1)
+ std r30, -16(r1)
+ subfic tnc, cnt, 64
+ sldi r7, n, 3 C byte count corresponding to n
+ add up, up, r7 C up = up + n
+ add rp, rp, r7 C rp = rp + n
+ rldicl. r30, n, 0,62 C r30 = n & 3, set cr0
+ cmpdi cr6, r30, 2
+ addi r31, n, 3 C compute count...
+ ld r10, -8(up) C load 1st limb for b00...b11
+ srd retval, r10, tnc
+ srdi r31, r31, 2 C ...for ctr
+ mtctr r31 C copy count into ctr
+ beq cr0, L(b00)
+ blt cr6, L(b01)
+ ld r11, -16(up) C load 2nd limb for b10 and b11
+ beq cr6, L(b10)
+
+ ALIGN(16)
+L(b11): sld r8, r10, cnt
+ srd r9, r11, tnc
+ ld u1, -24(up)
+ addi up, up, -24
+ sld r12, r11, cnt
+ srd r7, u1, tnc
+ addi rp, rp, 16
+ bdnz L(gt3)
+
+ nor r11, r8, r9
+ sld r8, u1, cnt
+ nor r8, r8, r8
+ b L(cj3)
+
+ ALIGN(16)
+L(gt3): ld u0, -8(up)
+ nor r11, r8, r9
+ sld r8, u1, cnt
+ srd r9, u0, tnc
+ ld u1, -16(up)
+ nor r10, r12, r7
+ b L(L11)
+
+ ALIGN(32)
+L(b10): sld r12, r10, cnt
+ addi rp, rp, 24
+ srd r7, r11, tnc
+ bdnz L(gt2)
+
+ sld r8, r11, cnt
+ nor r10, r12, r7
+ nor r8, r8, r8
+ b L(cj2)
+
+L(gt2): ld u0, -24(up)
+ sld r8, r11, cnt
+ srd r9, u0, tnc
+ ld u1, -32(up)
+ nor r10, r12, r7
+ sld r12, u0, cnt
+ srd r7, u1, tnc
+ ld u0, -40(up)
+ nor r11, r8, r9
+ addi up, up, -16
+ b L(L10)
+
+ ALIGN(16)
+L(b00): ld u1, -16(up)
+ sld r12, r10, cnt
+ srd r7, u1, tnc
+ ld u0, -24(up)
+ sld r8, u1, cnt
+ srd r9, u0, tnc
+ ld u1, -32(up)
+ nor r10, r12, r7
+ sld r12, u0, cnt
+ srd r7, u1, tnc
+ addi rp, rp, 8
+ bdz L(cj4)
+
+L(gt4): addi up, up, -32
+ ld u0, -8(up)
+ nor r11, r8, r9
+ b L(L00)
+
+ ALIGN(16)
+L(b01): bdnz L(gt1)
+ sld r8, r10, cnt
+ nor r8, r8, r8
+ std r8, -8(rp)
+ b L(ret)
+
+L(gt1): ld u0, -16(up)
+ sld r8, r10, cnt
+ srd r9, u0, tnc
+ ld u1, -24(up)
+ sld r12, u0, cnt
+ srd r7, u1, tnc
+ ld u0, -32(up)
+ nor r11, r8, r9
+ sld r8, u1, cnt
+ srd r9, u0, tnc
+ ld u1, -40(up)
+ addi up, up, -40
+ nor r10, r12, r7
+ bdz L(end)
+
+ ALIGN(32)
+L(top): sld r12, u0, cnt
+ srd r7, u1, tnc
+ ld u0, -8(up)
+ std r11, -8(rp)
+ nor r11, r8, r9
+L(L00): sld r8, u1, cnt
+ srd r9, u0, tnc
+ ld u1, -16(up)
+ std r10, -16(rp)
+ nor r10, r12, r7
+L(L11): sld r12, u0, cnt
+ srd r7, u1, tnc
+ ld u0, -24(up)
+ std r11, -24(rp)
+ nor r11, r8, r9
+L(L10): sld r8, u1, cnt
+ srd r9, u0, tnc
+ ld u1, -32(up)
+ addi up, up, -32
+ std r10, -32(rp)
+ addi rp, rp, -32
+ nor r10, r12, r7
+ bdnz L(top)
+
+ ALIGN(32)
+L(end): sld r12, u0, cnt
+ srd r7, u1, tnc
+ std r11, -8(rp)
+L(cj4): nor r11, r8, r9
+ sld r8, u1, cnt
+ std r10, -16(rp)
+ nor r8, r8, r8
+L(cj3): nor r10, r12, r7
+ std r11, -24(rp)
+L(cj2): std r10, -32(rp)
+ std r8, -40(rp)
+
+L(ret): ld r31, -8(r1)
+ ld r30, -16(r1)
+ifdef(`HAVE_ABI_mode32',
+` srdi r3, retval, 32
+ mr r4, retval
+',` mr r3, retval')
+ blr
+EPILOGUE()
--- /dev/null
+/* PowerPC-64 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 2008, 2009, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+/* 1800 MHz PPC970 */
+
+#define DIVREM_1_NORM_THRESHOLD 0 /* always */
+#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_1P_METHOD 1
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 6
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 46
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 14
+#define USE_PREINV_DIVREM_1 1
+#define DIV_QR_2_PI2_THRESHOLD 12
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD 90
+
+#define MUL_TOOM22_THRESHOLD 16
+#define MUL_TOOM33_THRESHOLD 57
+#define MUL_TOOM44_THRESHOLD 94
+#define MUL_TOOM6H_THRESHOLD 125
+#define MUL_TOOM8H_THRESHOLD 187
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 65
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 99
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 61
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 56
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 70
+
+#define SQR_BASECASE_THRESHOLD 4
+#define SQR_TOOM2_THRESHOLD 30
+#define SQR_TOOM3_THRESHOLD 98
+#define SQR_TOOM4_THRESHOLD 136
+#define SQR_TOOM6_THRESHOLD 180
+#define SQR_TOOM8_THRESHOLD 272
+
+#define MULMID_TOOM42_THRESHOLD 34
+
+#define MULMOD_BNM1_THRESHOLD 12
+#define SQRMOD_BNM1_THRESHOLD 13
+
+#define MUL_FFT_MODF_THRESHOLD 244 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 244, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
+ { 15, 7}, { 8, 6}, { 17, 7}, { 9, 6}, \
+ { 19, 7}, { 13, 8}, { 7, 7}, { 17, 8}, \
+ { 9, 7}, { 20, 8}, { 11, 7}, { 23, 8}, \
+ { 13, 7}, { 29, 8}, { 19, 9}, { 11, 8}, \
+ { 27,10}, { 7, 9}, { 15, 8}, { 33, 9}, \
+ { 19, 8}, { 39, 9}, { 23, 8}, { 47, 9}, \
+ { 27,10}, { 15, 9}, { 39,10}, { 23, 9}, \
+ { 47,11}, { 15,10}, { 31, 9}, { 67,10}, \
+ { 39, 9}, { 83,10}, { 47, 9}, { 95, 8}, \
+ { 191, 9}, { 99,10}, { 55,11}, { 31,10}, \
+ { 63, 9}, { 127, 8}, { 255,10}, { 71, 9}, \
+ { 143, 8}, { 287,10}, { 79, 9}, { 159, 8}, \
+ { 319,11}, { 47,10}, { 95, 9}, { 191, 8}, \
+ { 383,10}, { 103,12}, { 31,11}, { 63,10}, \
+ { 127, 9}, { 255, 8}, { 511,10}, { 143, 9}, \
+ { 287,11}, { 79,10}, { 159, 9}, { 319, 8}, \
+ { 639,10}, { 175, 9}, { 351, 8}, { 703,11}, \
+ { 95,10}, { 191, 9}, { 383, 8}, { 767,10}, \
+ { 207, 9}, { 415,10}, { 223, 9}, { 447,12}, \
+ { 63,11}, { 127,10}, { 255, 9}, { 511,11}, \
+ { 143,10}, { 287, 9}, { 575, 8}, { 1151,11}, \
+ { 159,10}, { 319, 9}, { 639,11}, { 175,10}, \
+ { 351, 9}, { 703,12}, { 95,11}, { 191,10}, \
+ { 383, 9}, { 767,11}, { 207,10}, { 415, 9}, \
+ { 831,11}, { 223,10}, { 447,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 106
+#define MUL_FFT_THRESHOLD 2688
+
+#define SQR_FFT_MODF_THRESHOLD 212 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 212, 5}, { 13, 6}, { 15, 7}, { 8, 6}, \
+ { 17, 7}, { 9, 6}, { 19, 7}, { 13, 8}, \
+ { 7, 7}, { 17, 8}, { 9, 7}, { 20, 8}, \
+ { 11, 7}, { 23, 8}, { 13, 7}, { 27, 9}, \
+ { 7, 8}, { 21, 9}, { 11, 8}, { 25,10}, \
+ { 7, 9}, { 15, 8}, { 33, 9}, { 19, 8}, \
+ { 39, 9}, { 23, 8}, { 47, 9}, { 27,10}, \
+ { 15, 9}, { 39,10}, { 23, 9}, { 47,11}, \
+ { 15,10}, { 31, 9}, { 63, 8}, { 127, 9}, \
+ { 67,10}, { 39, 9}, { 79, 8}, { 159,10}, \
+ { 47, 9}, { 95, 8}, { 191,11}, { 31,10}, \
+ { 63, 9}, { 127, 8}, { 255,10}, { 71, 9}, \
+ { 143, 8}, { 287,10}, { 79, 9}, { 159, 8}, \
+ { 319,11}, { 47, 9}, { 191, 8}, { 383,12}, \
+ { 31,11}, { 63,10}, { 127, 9}, { 255, 8}, \
+ { 511,10}, { 143, 9}, { 287, 8}, { 575,11}, \
+ { 79,10}, { 159, 9}, { 319, 8}, { 639,10}, \
+ { 175, 9}, { 351, 8}, { 703,10}, { 191, 9}, \
+ { 383, 8}, { 767,10}, { 207, 9}, { 415,11}, \
+ { 111,10}, { 223,12}, { 63,11}, { 127,10}, \
+ { 255, 9}, { 511,11}, { 143,10}, { 287, 9}, \
+ { 575,11}, { 159,10}, { 319, 9}, { 639,11}, \
+ { 175,10}, { 351, 9}, { 703, 8}, { 1407,11}, \
+ { 191,10}, { 383, 9}, { 767,11}, { 207,10}, \
+ { 415,11}, { 223,10}, { 447,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 102
+#define SQR_FFT_THRESHOLD 1984
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 55
+#define MULLO_MUL_N_THRESHOLD 5240
+
+#define DC_DIV_QR_THRESHOLD 27
+#define DC_DIVAPPR_Q_THRESHOLD 108
+#define DC_BDIV_QR_THRESHOLD 51
+#define DC_BDIV_Q_THRESHOLD 126
+
+#define INV_MULMOD_BNM1_THRESHOLD 38
+#define INV_NEWTON_THRESHOLD 129
+#define INV_APPR_THRESHOLD 116
+
+#define BINV_NEWTON_THRESHOLD 198
+#define REDC_1_TO_REDC_N_THRESHOLD 51
+
+#define MU_DIV_QR_THRESHOLD 807
+#define MU_DIVAPPR_Q_THRESHOLD 807
+#define MUPI_DIV_QR_THRESHOLD 54
+#define MU_BDIV_QR_THRESHOLD 748
+#define MU_BDIV_Q_THRESHOLD 872
+
+#define POWM_SEC_TABLE 4,35,152,780,2145
+
+#define MATRIX22_STRASSEN_THRESHOLD 11
+#define HGCD_THRESHOLD 104
+#define HGCD_APPR_THRESHOLD 118
+#define HGCD_REDUCE_THRESHOLD 1329
+#define GCD_DC_THRESHOLD 268
+#define GCDEXT_DC_THRESHOLD 241
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 9
+#define GET_STR_PRECOMPUTE_THRESHOLD 18
+#define SET_STR_DC_THRESHOLD 996
+#define SET_STR_PRECOMPUTE_THRESHOLD 2170
+
+#define FAC_DSC_THRESHOLD 442
+#define FAC_ODD_THRESHOLD 26
--- /dev/null
+dnl PowerPC-64 mpn_sqr_diagonal.
+
+dnl Copyright 2001, 2002, 2003, 2005, 2006, 20010 Free Software Foundation,
+dnl Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C POWER3/PPC630 18
+C POWER4/PPC970 ?
+C POWER5 7.25
+C POWER6 9.5
+
+C INPUT PARAMETERS
+define(`rp', r3)
+define(`up', r4)
+define(`n', r5)
+
+ASM_START()
+PROLOGUE(mpn_sqr_diagonal)
+ifdef(`HAVE_ABI_mode32',
+` rldicl n, n, 0, 32') C zero extend n
+
+ rldicl. r0, n, 0,62 C r0 = n & 3, set cr0
+ addi n, n, 3 C compute count...
+ cmpdi cr6, r0, 2
+ srdi n, n, 2 C ...for ctr
+ mtctr n C copy count into ctr
+ beq cr0, L(b00)
+ blt cr6, L(b01)
+ beq cr6, L(b10)
+
+L(b11): ld r0, 0(up)
+ ld r10, 8(up)
+ ld r12, 16(up)
+ addi rp, rp, -16
+ mulld r7, r0, r0
+ mulhdu r8, r0, r0
+ mulld r9, r10, r10
+ mulhdu r10, r10, r10
+ mulld r11, r12, r12
+ mulhdu r12, r12, r12
+ addi up, up, 24
+ b L(11)
+
+ ALIGN(16)
+L(b01): ld r0, 0(up)
+ addi rp, rp, -48
+ addi up, up, 8
+ mulld r11, r0, r0
+ mulhdu r12, r0, r0
+ b L(01)
+
+ ALIGN(16)
+L(b10): ld r0, 0(up)
+ ld r12, 8(up)
+ addi rp, rp, -32
+ addi up, up, 16
+ mulld r9, r0, r0
+ mulhdu r10, r0, r0
+ mulld r11, r12, r12
+ mulhdu r12, r12, r12
+ b L(10)
+
+ ALIGN(32)
+L(b00):
+L(top): ld r0, 0(up)
+ ld r8, 8(up)
+ ld r10, 16(up)
+ ld r12, 24(up)
+ mulld r5, r0, r0
+ mulhdu r6, r0, r0
+ mulld r7, r8, r8
+ mulhdu r8, r8, r8
+ mulld r9, r10, r10
+ mulhdu r10, r10, r10
+ mulld r11, r12, r12
+ mulhdu r12, r12, r12
+ addi up, up, 32
+ std r5, 0(rp)
+ std r6, 8(rp)
+L(11): std r7, 16(rp)
+ std r8, 24(rp)
+L(10): std r9, 32(rp)
+ std r10, 40(rp)
+L(01): std r11, 48(rp)
+ std r12, 56(rp)
+ addi rp, rp, 64
+ bdnz L(top)
+
+ blr
+EPILOGUE()
+++ /dev/null
-dnl PowerPC-64 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
-
-dnl Copyright 2003, 2005 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C POWER3/PPC630: 2 (1.5 c/l should be possible)
-C POWER4/PPC970: 4 (2.0 c/l should be possible)
-
-C INPUT PARAMETERS
-C rp r3
-C up r4
-C vp r5
-C n r6
-
-define(`rp',`r3')
-define(`up',`r4')
-define(`vp',`r5')
-
-define(`s0',`r6')
-define(`s1',`r7')
-define(`u0',`r8')
-define(`v0',`r10')
-define(`v1',`r11')
-
-ASM_START()
-PROLOGUE(mpn_addlsh1_n)
- mtctr r6 C copy n in ctr
- addic r31, r31, 0 C clear cy
-
- ld v0, 0(vp) C load v limb
- ld u0, 0(up) C load u limb
- addi up, up, -8 C update up
- addi rp, rp, -8 C update rp
- sldi s1, v0, 1
- bdz L(end) C If done, skip loop
-
-L(oop): ld v1, 8(vp) C load v limb
- adde s1, s1, u0 C add limbs with cy, set cy
- std s1, 8(rp) C store result limb
- srdi s0, v0, 63 C shift down previous v limb
- ldu u0, 16(up) C load u limb and update up
- rldimi s0, v1, 1, 0 C left shift v limb and merge with prev v limb
-
- bdz L(exit) C decrement ctr and exit if done
-
- ldu v0, 16(vp) C load v limb and update vp
- adde s0, s0, u0 C add limbs with cy, set cy
- stdu s0, 16(rp) C store result limb and update rp
- srdi s1, v1, 63 C shift down previous v limb
- ld u0, 8(up) C load u limb
- rldimi s1, v0, 1, 0 C left shift v limb and merge with prev v limb
-
- bdnz L(oop) C decrement ctr and loop back
-
-L(end): adde r7, s1, u0
- std r7, 8(rp) C store last result limb
- srdi r3, v0, 63
- addze r3, r3
- blr
-L(exit): adde r7, s0, u0
- std r7, 16(rp) C store last result limb
- srdi r3, v1, 63
- addze r3, r3
- blr
-EPILOGUE()
+++ /dev/null
-dnl PowerPC-64 mpn_addmul_1 -- Multiply a limb vector with a limb and add
-dnl the result to a second limb vector.
-
-dnl Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006 Free Software
-dnl Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C POWER3/PPC630: 6-18
-C POWER4/PPC970: 8
-C POWER5: 8
-
-C TODO
-C * Reduce the number of registers used. Some mul destination registers could
-C be coalesced.
-C * Delay std for preserving registers, and suppress them for n=1.
-C * Write faster feed-in code. If nothing else, avoid one or two up updates.
-
-C INPUT PARAMETERS
-define(`rp', `r3')
-define(`up', `r4')
-define(`n', `r5')
-define(`vl', `r6')
-
-ASM_START()
-PROLOGUE(mpn_addmul_1)
- std r31, -8(r1)
- std r30, -16(r1)
- std r29, -24(r1)
- std r28, -32(r1)
- std r27, -40(r1)
- std r26, -48(r1)
-
- rldicl. r0, n, 0,62 C r0 = n & 3, set cr0
- cmpdi cr6, r0, 2
- addi n, n, 3 C compute count...
- srdi n, n, 2 C ...for ctr
- mtctr n C copy count into ctr
- beq cr0, L(b00)
- blt cr6, L(b01)
- beq cr6, L(b10)
-
-L(b11): ld r26, 0(up)
- ld r28, 0(rp)
- addi up, up, 8
- nop
- mulld r0, r26, r6
- mulhdu r12, r26, r6
- addc r0, r0, r28
- std r0, 0(rp)
- addi rp, rp, 8
- b L(fic)
-
-L(b00): ld r26, 0(up)
- ld r27, 8(up)
- ld r28, 0(rp)
- ld r29, 8(rp)
- addi up, up, 16
- nop
- mulld r0, r26, r6
- mulhdu r5, r26, r6
- mulld r7, r27, r6
- mulhdu r8, r27, r6
- addc r7, r7, r5
- addze r12, r8
- addc r0, r0, r28
- std r0, 0(rp)
- adde r7, r7, r29
- std r7, 8(rp)
- addi rp, rp, 16
- b L(fic)
-
-L(b01): bdnz L(gt1)
- ld r26, 0(up)
- ld r28, 0(rp)
- mulld r0, r26, r6
- mulhdu r8, r26, r6
- addc r0, r0, r28
- std r0, 0(rp)
- b L(ret)
-L(gt1): ld r26, 0(up)
- ld r27, 8(up)
- mulld r0, r26, r6
- mulhdu r5, r26, r6
- ld r26, 16(up)
- ld r28, 0(rp)
- mulld r7, r27, r6
- mulhdu r8, r27, r6
- ld r29, 8(rp)
- ld r30, 16(rp)
- mulld r9, r26, r6
- mulhdu r10, r26, r6
- addc r7, r7, r5
- adde r9, r9, r8
- addze r12, r10
- addc r0, r0, r28
- std r0, 0(rp)
- adde r7, r7, r29
- std r7, 8(rp)
- adde r9, r9, r30
- std r9, 16(rp)
- addi up, up, 24
- addi rp, rp, 24
- b L(fic)
-
-L(b10): addic r0, r0, 0
- li r12, 0 C cy_limb = 0
-L(fic): ld r26, 0(up)
- ld r27, 8(up)
- addi up, up, 16
- bdz L(end)
- C registers dying
-L(top): mulld r0, r26, r6 C
- mulhdu r5, r26, r6 C 26
- ld r26, 0(up) C
- ld r28, 0(rp) C
- mulld r7, r27, r6 C
- mulhdu r8, r27, r6 C 27
- ld r27, 8(up) C
- ld r29, 8(rp) C
- adde r0, r0, r12 C 0 12
- adde r7, r7, r5 C 5 7
- mulld r9, r26, r6 C
- mulhdu r10, r26, r6 C 26
- ld r26, 16(up) C
- ld r30, 16(rp) C
- mulld r11, r27, r6 C
- mulhdu r12, r27, r6 C 27
- ld r27, 24(up) C
- ld r31, 24(rp) C
- adde r9, r9, r8 C 8 9
- adde r11, r11, r10 C 10 11
- addze r12, r12 C 12
- addc r0, r0, r28 C 0 28
- std r0, 0(rp) C 0
- adde r7, r7, r29 C 7 29
- std r7, 8(rp) C 7
- adde r9, r9, r30 C 9 30
- std r9, 16(rp) C 9
- adde r11, r11, r31 C 11 31
- std r11, 24(rp) C 11
- addi up, up, 32 C
- addi rp, rp, 32 C
- bdnz L(top) C
-
-L(end): mulld r0, r26, r6
- mulhdu r5, r26, r6
- ld r28, 0(rp)
- nop
- mulld r7, r27, r6
- mulhdu r8, r27, r6
- ld r29, 8(rp)
- nop
- adde r0, r0, r12
- adde r7, r7, r5
- addze r8, r8
- addc r0, r0, r28
- std r0, 0(rp)
- adde r7, r7, r29
- std r7, 8(rp)
-L(ret): addze r3, r8
- ld r31, -8(r1)
- ld r30, -16(r1)
- ld r29, -24(r1)
- ld r28, -32(r1)
- ld r27, -40(r1)
- ld r26, -48(r1)
- blr
-EPILOGUE()
dnl PowerPC-64 mpn_add_n/mpn_sub_n -- mpn addition and subtraction.
-dnl Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2007 Free Software
+dnl Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2007, 2011 Free Software
dnl Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
-C cycles/limb
-C POWER3/PPC630: 1.5
-C POWER4/PPC970: 2
-
-C n POWER3/PPC630 POWER4/PPC970
-C 1 17.00 19.00
-C 2 9.00 10.49
-C 3 5.33 7.66
-C 4 4.50 5.14
-C 5 4.20 4.80
-C 6 3.83 4.33
-C 7 3.00 3.99
-C 8 2.87 3.55
-C 9 2.89 3.40
-C 10 2.60 3.42
-C 11 2.45 3.15
-C 12 2.41 2.99
-C 13 2.46 3.01
-C 14 2.42 2.97
-C 15 2.20 2.85
-C 50 1.78 2.44
-C 100 1.83 2.20
-C 200 1.55 2.12
-C 400 1.53 2.05
-C 1000 1.98 2.02#
-C 2000 1.50# 2.04
-C 4000 2.55 2.50
-C 8000 2.70 2.45
-C 16000 2.65 5.94
-C 32000 2.62 16.41
-C 64000 2.73 18.94
+C cycles/limb
+C POWER3/PPC630 1.5
+C POWER4/PPC970 2
+C POWER5 2
+C POWER6 2.63
+C POWER7 2.25-2.87
C This code is a little bit slower for POWER3/PPC630 than the simple code used
C previously, but it is much faster for POWER4/PPC970. The reason for the
addi r4, r4, 32
addi r5, r5, 32
-L(oop): ADDSUBC r28, r7, r6
+ ALIGN(16)
+L(top): ADDSUBC r28, r7, r6
ld r6, 0(r4) C load s1 limb
ld r7, 0(r5) C load s2 limb
ADDSUBC r29, r9, r8
std r30, 16(r3)
std r31, 24(r3)
addi r3, r3, 32
- bdnz L(oop) C decrement ctr and loop back
+ bdnz L(top) C decrement ctr and loop back
L(end): ADDSUBC r28, r7, r6
ADDSUBC r29, r9, r8
--- /dev/null
+dnl PowerPC-64 mpn_addcnd_n/mpn_subcnd_n.
+
+dnl Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2007, 2011, 2012 Free
+dnl Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C POWER3/PPC630 ?
+C POWER4/PPC970 2.25
+C POWER5 ?
+C POWER6 3
+C POWER7 ?
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`vp', `r5')
+define(`n', `r6')
+define(`cnd', `r7')
+
+ifdef(`OPERATION_addcnd_n',`
+ define(ADDSUBC, adde)
+ define(ADDSUB, addc)
+ define(func, mpn_addcnd_n)
+ define(GENRVAL, `addi r3, r3, 1')
+ define(SETCBR, `addic r0, $1, -1')
+ define(CLRCB, `addic r0, r0, 0')
+')
+ifdef(`OPERATION_subcnd_n',`
+ define(ADDSUBC, subfe)
+ define(ADDSUB, subfc)
+ define(func, mpn_subcnd_n)
+ define(GENRVAL, `neg r3, r3')
+ define(SETCBR, `subfic r0, $1, 0')
+ define(CLRCB, `addic r0, r1, -1')
+')
+
+MULFUNC_PROLOGUE(mpn_addcnd_n mpn_subcnd_n)
+
+ASM_START()
+PROLOGUE(func)
+ std r31, -8(r1)
+ std r30, -16(r1)
+ std r29, -24(r1)
+ std r28, -32(r1)
+ std r27, -40(r1)
+
+ subfic cnd, cnd, 0
+ subfe cnd, cnd, cnd
+
+ rldicl. r0, r6, 0,62 C r0 = n & 3, set cr0
+ cmpdi cr6, r0, 2
+ addi r6, r6, 3 C compute count...
+ srdi r6, r6, 2 C ...for ctr
+ mtctr r6 C copy count into ctr
+ beq cr0, L(b00)
+ blt cr6, L(b01)
+ beq cr6, L(b10)
+
+L(b11): ld r8, 0(up) C load s1 limb
+ ld r9, 0(vp) C load s2 limb
+ ld r10, 8(up) C load s1 limb
+ ld r11, 8(vp) C load s2 limb
+ ld r12, 16(up) C load s1 limb
+ addi up, up, 24
+ ld r0, 16(vp) C load s2 limb
+ addi vp, vp, 24
+ and r9, r9, cnd
+ and r11, r11, cnd
+ and r0, r0, cnd
+ ADDSUB r29, r9, r8
+ ADDSUBC r30, r11, r10
+ ADDSUBC r31, r0, r12
+ std r29, 0(rp)
+ std r30, 8(rp)
+ std r31, 16(rp)
+ addi rp, rp, 24
+ bdnz L(go)
+ b L(ret)
+
+L(b01): ld r12, 0(up) C load s1 limb
+ addi up, up, 8
+ ld r0, 0(vp) C load s2 limb
+ addi vp, vp, 8
+ and r0, r0, cnd
+ ADDSUB r31, r0, r12 C add
+ std r31, 0(rp)
+ addi rp, rp, 8
+ bdnz L(go)
+ b L(ret)
+
+L(b10): ld r10, 0(up) C load s1 limb
+ ld r11, 0(vp) C load s2 limb
+ ld r12, 8(up) C load s1 limb
+ addi up, up, 16
+ ld r0, 8(vp) C load s2 limb
+ addi vp, vp, 16
+ and r11, r11, cnd
+ and r0, r0, cnd
+ ADDSUB r30, r11, r10 C add
+ ADDSUBC r31, r0, r12 C add
+ std r30, 0(rp)
+ std r31, 8(rp)
+ addi rp, rp, 16
+ bdnz L(go)
+ b L(ret)
+
+L(b00): CLRCB C clear/set cy
+L(go): ld r6, 0(up) C load s1 limb
+ ld r27, 0(vp) C load s2 limb
+ ld r8, 8(up) C load s1 limb
+ ld r9, 8(vp) C load s2 limb
+ ld r10, 16(up) C load s1 limb
+ ld r11, 16(vp) C load s2 limb
+ ld r12, 24(up) C load s1 limb
+ ld r0, 24(vp) C load s2 limb
+ and r27, r27, cnd
+ and r9, r9, cnd
+ and r11, r11, cnd
+ and r0, r0, cnd
+ bdz L(end)
+
+ addi up, up, 32
+ addi vp, vp, 32
+
+L(top): ADDSUBC r28, r27, r6
+ ld r6, 0(up) C load s1 limb
+ ld r27, 0(vp) C load s2 limb
+ ADDSUBC r29, r9, r8
+ ld r8, 8(up) C load s1 limb
+ ld r9, 8(vp) C load s2 limb
+ ADDSUBC r30, r11, r10
+ ld r10, 16(up) C load s1 limb
+ ld r11, 16(vp) C load s2 limb
+ ADDSUBC r31, r0, r12
+ ld r12, 24(up) C load s1 limb
+ ld r0, 24(vp) C load s2 limb
+ std r28, 0(rp)
+ addi up, up, 32
+ std r29, 8(rp)
+ addi vp, vp, 32
+ std r30, 16(rp)
+ std r31, 24(rp)
+ addi rp, rp, 32
+ and r27, r27, cnd
+ and r9, r9, cnd
+ and r11, r11, cnd
+ and r0, r0, cnd
+ bdnz L(top) C decrement ctr and loop back
+
+L(end): ADDSUBC r28, r27, r6
+ ADDSUBC r29, r9, r8
+ ADDSUBC r30, r11, r10
+ ADDSUBC r31, r0, r12
+ std r28, 0(rp)
+ std r29, 8(rp)
+ std r30, 16(rp)
+ std r31, 24(rp)
+
+L(ret): ld r31, -8(r1)
+ ld r30, -16(r1)
+ ld r29, -24(r1)
+ ld r28, -32(r1)
+ ld r27, -40(r1)
+
+ subfe r3, r0, r0 C -cy
+ GENRVAL
+ blr
+EPILOGUE()
--- /dev/null
+dnl PowerPC-64 mpn_addlsh1_n and mpn_sublsh1_n.
+
+dnl Copyright 2003, 2005, 2009, 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+define(LSH, 1)
+define(RSH, 63)
+
+ifdef(`OPERATION_addlsh1_n',`
+ define(ADDSUBC, addc)
+ define(ADDSUBE, adde)
+ define(INITCY, `addic $1, r1, 0')
+ define(RETVAL, `addze r3, $1')
+ define(func, mpn_addlsh1_n)
+')
+ifdef(`OPERATION_sublsh1_n',`
+ define(ADDSUBC, subfc)
+ define(ADDSUBE, subfe)
+ define(INITCY, `addic $1, r1, -1')
+ define(RETVAL, `subfze r3, $1
+ neg r3, r3')
+ define(func, mpn_sublsh1_n)
+')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
+
+include_mpn(`powerpc64/mode64/aorslshC_n.asm')
--- /dev/null
+dnl PowerPC-64 mpn_addlsh2_n and mpn_sublsh2_n.
+
+dnl Copyright 2003, 2005, 2009, 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+define(LSH, 2)
+define(RSH, 62)
+
+ifdef(`OPERATION_addlsh2_n',`
+ define(ADDSUBC, addc)
+ define(ADDSUBE, adde)
+ define(INITCY, `addic $1, r1, 0')
+ define(RETVAL, `addze r3, $1')
+ define(func, mpn_addlsh2_n)
+')
+ifdef(`OPERATION_sublsh2_n',`
+ define(ADDSUBC, subfc)
+ define(ADDSUBE, subfe)
+ define(INITCY, `addic $1, r1, -1')
+ define(RETVAL, `subfze r3, $1
+ neg r3, r3')
+ define(func, mpn_sublsh2_n)
+')
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n)
+
+include_mpn(`powerpc64/mode64/aorslshC_n.asm')
--- /dev/null
+dnl PowerPC-64 mpn_addlshC_n and mpn_sublshC_n, where C is a small constant.
+
+dnl Copyright 2003, 2005, 2009, 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+C cycles/limb
+C POWER3/PPC630 1.83 (1.5 c/l should be possible)
+C POWER4/PPC970 3 (2.0 c/l should be possible)
+C POWER5 3
+C POWER6 3.5-47
+C POWER7 3
+
+C STATUS
+C * Try combining upx+up, and vpx+vp.
+C * The worst case 47 c/l for POWER6 happens if the 3rd operand for ldx is
+C greater than the 2nd operand. Yes, this addition is non-commutative wrt
+C performance.
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`vp', `r5')
+define(`n', `r6')
+
+define(`rpx', `r6')
+define(`upx', `r7')
+define(`vpx', `r12')
+
+define(`s0', `r0') define(`s1', `r9')
+define(`u0', `r8')
+define(`v0', `r10') define(`v1', `r11')
+
+
+ASM_START()
+PROLOGUE(func)
+ cmpldi cr0, n, 13
+ bgt L(big)
+
+ mtctr n C copy n in ctr
+ INITCY( r0) C clear cy
+
+ ld v0, 0(vp) C load v limb
+ ld u0, 0(up) C load u limb
+ addi up, up, -8 C update up
+ addi rp, rp, -8 C update rp
+ sldi s1, v0, LSH
+ bdz L(ex1) C If done, skip loop
+
+ ALIGN(16)
+L(lo0): ld v1, 8(vp) C load v limb
+ ADDSUBE s1, s1, u0 C add limbs with cy, set cy
+ ldu u0, 16(up) C load u limb and update up
+ srdi s0, v0, RSH C shift down previous v limb
+ std s1, 8(rp) C store result limb
+ rldimi s0, v1, LSH, 0 C left shift v limb and merge with prev v limb
+ bdz L(ex0) C decrement ctr and exit if done
+ ldu v0, 16(vp) C load v limb and update vp
+ ADDSUBE s0, s0, u0 C add limbs with cy, set cy
+ ld u0, 8(up) C load u limb
+ srdi s1, v1, RSH C shift down previous v limb
+ stdu s0, 16(rp) C store result limb and update rp
+ rldimi s1, v0, LSH, 0 C left shift v limb and merge with prev v limb
+ bdnz L(lo0) C decrement ctr and loop back
+
+L(ex1): ADDSUBE r7, s1, u0
+ std r7, 8(rp) C store last result limb
+ srdi r0, v0, RSH
+ RETVAL( r0)
+ blr
+L(ex0): ADDSUBE r7, s0, u0
+ std r7, 16(rp) C store last result limb
+ srdi r0, v1, RSH
+ RETVAL( r0)
+ blr
+
+
+L(big): rldicl. r0, n, 0,63 C r0 = n & 1, set cr0
+ addi r6, n, -1 C ...for ctr
+ srdi r6, r6, 1 C ...for ctr
+ mtctr r6 C copy count into ctr
+ beq cr0, L(b0)
+
+L(b1): ld v1, 0(vp)
+ ld u0, 0(up)
+ sldi s1, v1, LSH
+ srdi s0, v1, RSH
+ ld v0, 8(vp)
+ ADDSUBC s1, s1, u0 C add limbs without cy, set cy
+ addi rpx, rp, -16
+ addi rp, rp, -8
+ sub upx, up, rp
+ sub vpx, vp, rp
+ sub up, up, rpx
+ sub vp, vp, rpx
+ addi up, up, 8
+ addi upx, upx, 16
+ addi vp, vp, 16
+ addi vpx, vpx, 24
+ b L(mid)
+
+L(b0): ld v0, 0(vp)
+ ld u0, 0(up)
+ sldi s0, v0, LSH
+ srdi s1, v0, RSH
+ ld v1, 8(vp)
+ ADDSUBC s0, s0, u0 C add limbs without cy, set cy
+ addi rpx, rp, -8
+ addi rp, rp, -16
+ sub upx, up, rpx
+ sub vpx, vp, rpx
+ sub up, up, rp
+ sub vp, vp, rp
+ addi up, up, 8
+ addi upx, upx, 16
+ addi vp, vp, 16
+ addi vpx, vpx, 24
+
+ ALIGN(32)
+L(top): ldx u0, rp, up
+ ldx v0, rp, vp
+ rldimi s1, v1, LSH, 0
+ stdu s0, 16(rp)
+ srdi s0, v1, RSH
+ ADDSUBE s1, s1, u0 C add limbs with cy, set cy
+L(mid): ldx u0, rpx, upx
+ ldx v1, rpx, vpx
+ rldimi s0, v0, LSH, 0
+ stdu s1, 16(rpx)
+ srdi s1, v0, RSH
+ ADDSUBE s0, s0, u0 C add limbs with cy, set cy
+ bdnz L(top) C decrement CTR and loop back
+
+ ldx u0, rp, up
+ rldimi s1, v1, LSH, 0
+ std s0, 16(rp)
+ srdi s0, v1, RSH
+ ADDSUBE s1, s1, u0 C add limbs with cy, set cy
+ std s1, 24(rp)
+
+ RETVAL( r0)
+ blr
+EPILOGUE()
--- /dev/null
+dnl PowerPC-64 mpn_addmul_1 and mpn_submul_1.
+
+dnl Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2010, 2011, 2012
+dnl Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C mpn_addmul_1 mpn_submul_1
+C cycles/limb cycles/limb
+C POWER3/PPC630 6-18 6-18
+C POWER4/PPC970 8 8.3
+C POWER5 8 8.25
+C POWER6 16.25 16.75
+C POWER7 3.77 4.9
+
+C TODO
+C * Try to reduce the number of needed live registers
+C * Add support for _1c entry points
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`n', `r5')
+define(`vl', `r6')
+
+ifdef(`OPERATION_addmul_1',`
+ define(ADDSUBC, adde)
+ define(ADDSUB, addc)
+ define(func, mpn_addmul_1)
+ define(func_nc, mpn_addmul_1c) C FIXME: not really supported
+ define(SM, `')
+')
+ifdef(`OPERATION_submul_1',`
+ define(ADDSUBC, subfe)
+ define(ADDSUB, subfc)
+ define(func, mpn_submul_1)
+ define(func_nc, mpn_submul_1c) C FIXME: not really supported
+ define(SM, `$1')
+')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+
+ASM_START()
+PROLOGUE(func)
+ std r31, -8(r1)
+ rldicl. r0, n, 0,62 C r0 = n & 3, set cr0
+ std r30, -16(r1)
+ cmpdi cr6, r0, 2
+ std r29, -24(r1)
+ addi n, n, 3 C compute count...
+ std r28, -32(r1)
+ srdi n, n, 2 C ...for ctr
+ std r27, -40(r1)
+ mtctr n C copy count into ctr
+ beq cr0, L(b00)
+ blt cr6, L(b01)
+ beq cr6, L(b10)
+
+L(b11): ld r9, 0(up)
+ ld r28, 0(rp)
+ mulld r0, r9, r6
+ mulhdu r12, r9, r6
+ ADDSUB r0, r0, r28
+ std r0, 0(rp)
+ addi rp, rp, 8
+ ld r9, 8(up)
+ ld r27, 16(up)
+ addi up, up, 24
+SM(` subfe r11, r11, r11 ')
+ b L(bot)
+
+ ALIGN(16)
+L(b00): ld r9, 0(up)
+ ld r27, 8(up)
+ ld r28, 0(rp)
+ ld r29, 8(rp)
+ mulld r0, r9, r6
+ mulhdu r5, r9, r6
+ mulld r7, r27, r6
+ mulhdu r8, r27, r6
+ addc r7, r7, r5
+ addze r12, r8
+ ADDSUB r0, r0, r28
+ std r0, 0(rp)
+ ADDSUBC r7, r7, r29
+ std r7, 8(rp)
+ addi rp, rp, 16
+ ld r9, 16(up)
+ ld r27, 24(up)
+ addi up, up, 32
+SM(` subfe r11, r11, r11 ')
+ b L(bot)
+
+ ALIGN(16)
+L(b01): bdnz L(gt1)
+ ld r9, 0(up)
+ ld r11, 0(rp)
+ mulld r0, r9, r6
+ mulhdu r8, r9, r6
+ ADDSUB r0, r0, r11
+ std r0, 0(rp)
+SM(` subfe r11, r11, r11 ')
+SM(` addic r11, r11, 1 ')
+ addze r3, r8
+ blr
+L(gt1): ld r9, 0(up)
+ ld r27, 8(up)
+ mulld r0, r9, r6
+ mulhdu r5, r9, r6
+ mulld r7, r27, r6
+ mulhdu r8, r27, r6
+ ld r9, 16(up)
+ ld r28, 0(rp)
+ ld r29, 8(rp)
+ ld r30, 16(rp)
+ mulld r11, r9, r6
+ mulhdu r10, r9, r6
+ addc r7, r7, r5
+ adde r11, r11, r8
+ addze r12, r10
+ ADDSUB r0, r0, r28
+ std r0, 0(rp)
+ ADDSUBC r7, r7, r29
+ std r7, 8(rp)
+ ADDSUBC r11, r11, r30
+ std r11, 16(rp)
+ addi rp, rp, 24
+ ld r9, 24(up)
+ ld r27, 32(up)
+ addi up, up, 40
+SM(` subfe r11, r11, r11 ')
+ b L(bot)
+
+L(b10): addic r0, r0, 0
+ li r12, 0 C cy_limb = 0
+ ld r9, 0(up)
+ ld r27, 8(up)
+ bdz L(end)
+ addi up, up, 16
+
+ ALIGN(16)
+L(top): mulld r0, r9, r6
+ mulhdu r5, r9, r6 C 9
+ mulld r7, r27, r6
+ mulhdu r8, r27, r6 C 27
+ ld r9, 0(up)
+ ld r28, 0(rp)
+ ld r27, 8(up)
+ ld r29, 8(rp)
+ adde r0, r0, r12 C 0 12
+ adde r7, r7, r5 C 5 7
+ mulld r5, r9, r6
+ mulhdu r10, r9, r6 C 9
+ mulld r11, r27, r6
+ mulhdu r12, r27, r6 C 27
+ ld r9, 16(up)
+ ld r30, 16(rp)
+ ld r27, 24(up)
+ ld r31, 24(rp)
+ adde r5, r5, r8 C 8 5
+ adde r11, r11, r10 C 10 11
+ addze r12, r12 C 12
+ ADDSUB r0, r0, r28 C 0 28
+ std r0, 0(rp) C 0
+ ADDSUBC r7, r7, r29 C 7 29
+ std r7, 8(rp) C 7
+ ADDSUBC r5, r5, r30 C 5 30
+ std r5, 16(rp) C 5
+ ADDSUBC r11, r11, r31 C 11 31
+ std r11, 24(rp) C 11
+ addi up, up, 32
+SM(` subfe r11, r11, r11 ')
+ addi rp, rp, 32
+L(bot):
+SM(` addic r11, r11, 1 ')
+ bdnz L(top)
+
+L(end): mulld r0, r9, r6
+ mulhdu r5, r9, r6
+ mulld r7, r27, r6
+ mulhdu r8, r27, r6
+ ld r28, 0(rp)
+ ld r29, 8(rp)
+ adde r0, r0, r12
+ adde r7, r7, r5
+ addze r8, r8
+ ADDSUB r0, r0, r28
+ std r0, 0(rp)
+ ADDSUBC r7, r7, r29
+ std r7, 8(rp)
+SM(` subfe r11, r11, r11 ')
+SM(` addic r11, r11, 1 ')
+ addze r3, r8
+ ld r31, -8(r1)
+ ld r30, -16(r1)
+ ld r29, -24(r1)
+ ld r28, -32(r1)
+ ld r27, -40(r1)
+ blr
+EPILOGUE()
dnl PPC64 mpn_bdiv_dbm1c.
-dnl Copyright 2008 Free Software Foundation, Inc.
+dnl Copyright 2008, 2010 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
-C cycles/limb
-C POWER3/PPC630: 6-18
-C POWER4/PPC970: 8.5
-C POWER5: ?
+C cycles/limb
+C POWER3/PPC630 6-18
+C POWER4/PPC970 8.5?
+C POWER5 8.5 fluctuating as function of n % 3
+C POWER6 15
+C POWER6 15
+C POWER7 4.75
C TODO
C * Nothing to do...
blt cr6, L(b01)
beq cr6, L(b10)
+ ALIGN(16)
L(b11): mulld r5, r0, r6
mulhdu r12, r0, r6
ld r0, 8(r4)
addi r3, r3, -24
b L(3)
+ ALIGN(16)
L(b00): mulld r9, r0, r6
mulhdu r8, r0, r6
- ld r0, 8(r4)
addi r4, r4, -16
addi r3, r3, -16
b L(0)
+ ALIGN(16)
L(b01): mulld r5, r0, r6
mulhdu r12, r0, r6
addi r3, r3, -8
addi r4, r4, -8
b L(1)
+ ALIGN(16)
L(b10): mulld r9, r0, r6
mulhdu r8, r0, r6
- ld r0, 8(r4)
ble cr7, L(e2)
ALIGN(16)
-L(top): mulld r5, r0, r6
- mulhdu r12, r0, r6
- subfc r11, r9, r7
+L(top): subfc r11, r9, r7
+ ld r10, 8(r4)
ld r0, 16(r4)
subfe r7, r8, r11
std r11, 0(r3)
+ mulld r5, r10, r6
+ mulhdu r12, r10, r6
L(1): mulld r9, r0, r6
mulhdu r8, r0, r6
subfc r11, r5, r7
- ld r0, 24(r4)
subfe r7, r12, r11
std r11, 8(r3)
-L(0): mulld r5, r0, r6
- mulhdu r12, r0, r6
- subfc r11, r9, r7
+L(0): subfc r11, r9, r7
+ ld r10, 24(r4)
ld r0, 32(r4)
subfe r7, r8, r11
std r11, 16(r3)
+ mulld r5, r10, r6
+ mulhdu r12, r10, r6
L(3): mulld r9, r0, r6
mulhdu r8, r0, r6
subfc r11, r5, r7
- ld r0, 40(r4)
subfe r7, r12, r11
std r11, 24(r3)
addi r4, r4, 32
addi r3, r3, 32
bdnz L(top)
-L(e2): mulld r5, r0, r6
- mulhdu r12, r0, r6
+L(e2): ld r10, 8(r4)
+ mulld r5, r10, r6
+ mulhdu r12, r10, r6
subfc r11, r9, r7
subfe r7, r8, r11
std r11, 0(r3)
dnl PowerPC-64 mpn_divexact_1 -- mpn by limb exact division.
-dnl Copyright 2006 Free Software Foundation, Inc.
+dnl Copyright 2006, 2010 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
-C cycles/limb
-C POWER3/PPC630: 13-19
-C POWER4/PPC970: 16
-C POWER5: 16
+C cycles/limb
+C norm unorm
+C POWER3/PPC630 13-19
+C POWER4/PPC970 16
+C POWER5 16 16
+C POWER6 37 46
+C POWER7 12 12
C TODO
C * Check if n=1 code is really an improvement. It probably isn't.
-C * Perhaps remove L(norm) code, it is currently unreachable.
C * Make more similar to mode1o.asm.
C INPUT PARAMETERS
mtctr n
LEA( r5, binvert_limb_table)
rldicl r11, d, 63, 57
-C cmpdi cr7, r0, 0
lbzx r0, r5, r11
mulld r9, r0, r0
sldi r0, r0, 1
sldi r0, r0, 1
mulld r9, d, r9
subf r7, r9, r0 C r7 = 1/d mod 2^64
-C beq cr7, L(norm)
+ bne cr0, L(norm)
subfic r8, r10, 64 C set carry as side effect
li r5, 0
+ srd r11, r12, r10
ALIGN(16)
L(loop0):
- srd r11, r12, r10
ld r12, 8(up)
+ nop
addi up, up, 8
sld r0, r12, r8
or r11, r11, r0
subfe r9, r5, r11
+ srd r11, r12, r10
mulld r0, r7, r9
+ mulhdu r5, r0, d
std r0, 0(rp)
addi rp, rp, 8
- mulhdu r5, r0, d
bdnz L(loop0)
- srd r0, r12, r10
- subfe r0, r5, r0
+ subfe r0, r5, r11
mulld r0, r7, r0
std r0, 0(rp)
blr
ALIGN(16)
L(norm):
mulld r11, r12, r7
+ mulhdu r5, r11, d
std r11, 0(rp)
ALIGN(16)
L(loop1):
- mulhdu r5, r11, d
ld r9, 8(up)
addi up, up, 8
subfe r5, r5, r9
mulld r11, r7, r5
+ mulhdu r5, r11, d C result not used
std r11, 8(rp)
addi rp, rp, 8
bdnz L(loop1)
dnl PowerPC-64 mpn_divrem_1 -- Divide an mpn number by an unnormalized limb.
-dnl Copyright 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
+dnl Copyright 2003, 2004, 2005, 2007, 2008, 2010, 2012 Free Software
+dnl Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
-C cycles/limb
-C norm unorm frac
-C POWER3/PPC630 16-34 16-34 ~11
-C POWER4/PPC970 29 19
-C POWER5 29 29 ~20
+C cycles/limb
+C norm unorm frac
+C POWER3/PPC630 16-34 16-34 ~11 outdated figures
+C POWER4/PPC970 28 28 19
+C POWER5 29 29 ~19
+C POWER6 49 59 ~42
+C POWER7 24.5 23 ~14
C INPUT PARAMETERS
C qp = r3
sldi r6, r6, 3
ALIGN(16)
L(uloop):
- addi r11, r31, 1
ldx r8, r26, r6
+ nop
mulld r0, r31, r3
mulhdu r10, r31, r3
- addi r6, r6, -8
+ addi r11, r31, 1
srd r9, r8, r5
+ addi r6, r6, -8
or r9, r7, r9
addc r0, r0, r9
adde r10, r10, r11
mulld r31, r10, r30
subf r31, r31, r9
- subfc r0, r0, r31 C r >= ql
- subfe r0, r0, r0 C r0 = -(r >= ql)
- not r7, r0
- add r10, r7, r10 C qh -= (r >= ql)
- andc r0, r30, r0
- add r31, r31, r0
+ subfc r0, r31, r0 C r <= ql
+ subfe r0, r0, r0 C r0 = -(r <= ql)
+ and r9, r30, r0
+ add r31, r31, r9
+ add r10, r0, r10 C qh -= (r >= ql)
cmpld cr7, r31, r30
bge- cr7, L(164)
L(123):
L(ufloop):
addi r11, r31, 1
nop
- mulld r7, r3, r31
+ mulld r0, r3, r31
mulhdu r10, r3, r31
add r10, r10, r11
mulld r31, r9, r10
ifelse(0,1,`
- subfc r0, r7, r31
+ subfc r0, r0, r31
subfe r0, r0, r0 C r0 = -(r >= ql)
not r7, r0
add r10, r7, r10 C qh -= (r >= ql)
andc r0, r30, r0
add r31, r31, r0
',`
- cmpld cr7, r31, r7
+ cmpld cr7, r31, r0
blt cr7, L(29)
add r31, r30, r31
addi r10, r10, -1
and r0, r0, r7
subf r31, r0, r31
L(8):
-L(10):
mr r3, r30
CALL( mpn_invert_limb)
- nop
+ li r27, 0
addic. r6, r28, -1
- blt- cr0, L(150)
+ blt- cr0, L(110)
mtctr r28
sldi r6, r6, 3
ALIGN(16)
addi r11, r31, 1
ldx r8, r26, r6
mulld r0, r31, r3
- addi r6, r6, -8
mulhdu r10, r31, r3
- addc r7, r0, r8
+ addi r6, r6, -8
+ addc r0, r0, r8
adde r10, r10, r11
mulld r31, r10, r30
subf r31, r31, r8 C r = nl - qh * d
- subfc r0, r7, r31 C r >= ql
- subfe r0, r0, r0 C r0 = -(r >= ql)
- not r7, r0
- add r10, r7, r10 C qh -= (r >= ql)
- andc r0, r30, r0
- add r31, r31, r0
+ subfc r0, r31, r0 C r <= ql
+ subfe r0, r0, r0 C r0 = -(r <= ql)
+ and r9, r30, r0
+ add r31, r31, r9
+ add r10, r0, r10 C qh -= (r >= ql)
cmpld cr7, r31, r30
bge- cr7, L(167)
L(51):
std r10, 0(r29)
addi r29, r29, -8
bdnz L(nloop)
+ b L(110)
-L(150):
- addic. r9, r25, -1
- blt- cr0, L(152)
- mtctr r25
- neg r9, r30
- ALIGN(16)
-L(nfloop):
- addi r11, r31, 1
- nop
- mulld r7, r3, r31
- mulhdu r10, r3, r31
- add r10, r10, r11
- mulld r31, r9, r10
-ifelse(0,1,`
- subfc r0, r7, r31
- subfe r0, r0, r0 C r0 = -(r >= ql)
- not r7, r0
- add r10, r7, r10 C qh -= (r >= ql)
- andc r0, r30, r0
- add r31, r31, r0
-',`
- cmpld cr7, r31, r7
- blt cr7, L(28)
- add r31, r30, r31
- addi r10, r10, -1
-L(28):
-')
- std r10, 0(r29)
- addi r29, r29, -8
- bdnz L(nfloop)
-L(152):
- addi r1, r1, 176
- mr r3, r31
- ld r0, 16(r1)
- lwz r12, 8(r1)
- mtlr r0
- ld r25, -56(r1)
- ld r26, -48(r1)
- mtcrf 8, r12
- ld r27, -40(r1)
- ld r28, -32(r1)
- ld r29, -24(r1)
- ld r30, -16(r1)
- ld r31, -8(r1)
- blr
L(164):
subf r31, r30, r31
addi r10, r10, 1
include(`../config.m4')
-C cycles/limb
-C norm frac
+C cycles/limb
+C norm frac
C POWER3/PPC630
-C POWER4/PPC970 39* 39*
-C POWER5 39* 39*
-
-C STATUS
-C * Performace fluctuates like crazy
+C POWER4/PPC970 ? ?
+C POWER5 37 ?
+C POWER6 62 ?
+C POWER6 30.5 ?
C INPUT PARAMETERS
C qp = r3
mulld r6, r29, r3
addc r6, r6, r31
adde r8, r8, r29
+ cmpd cr7, r27, r25
mulld r0, r30, r8
- subf r31, r0, r31
mulhdu r11, r28, r8
mulld r10, r28, r8
+ subf r31, r0, r31
li r7, 0
- cmpd cr7, r27, r25
blt cr7, L(60)
ld r7, 0(r26)
addi r26, r26, -8
dnl PowerPC-64 mpn_invert_limb -- Invert a normalized limb.
-dnl Copyright 2004, 2005, 2006, 2008 Free Software Foundation, Inc.
+dnl Copyright 2004, 2005, 2006, 2008, 2010 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
-C cycles/limb
-C POWER3/PPC630: ?
-C POWER4/PPC970: 75 (including call+ret)
-
-C TODO:
-C * Pair multiply instructions.
+C cycles/limb (approximate)
+C POWER3/PPC630 80
+C POWER4/PPC970 86
+C POWER5 86
+C POWER6 170
+C POWER7 66
ASM_START()
PROLOGUE(mpn_invert_limb)
LEAL( r12, approx_tab)
-
- srdi r11, r3, 32 C r11 = d >> 32
- rlwinm r9, r11, 10, 23, 30 C r9 = ((d >> 55) & 0xff) << 1
- lhzx r0, r12, r9 C load initial approximation
- rldic r10, r0, 6, 42
- mulld r8, r10, r10
- sldi r9, r10, 17
- mulld r0, r8, r11
- srdi r0, r0, 31
- subf r10, r0, r9
- mulld r8, r10, r10
- sldi r11, r10, 33
- mulhdu r0, r8, r3
- sldi r9, r0, 1
- subf r10, r9, r11
- sldi r11, r10, 2
- mulhdu r0, r10, r10
- mulld r8, r10, r10
- mulhdu r10, r8, r3
- mulld r9, r0, r3
- mulhdu r0, r0, r3
- addc r8, r9, r10
- addze r10, r0
- srdi r0, r8, 62
- rldimi r0, r10, 2, 0
- sldi r9, r8, 2
- subfic r10, r9, 0
- subfe r8, r0, r11
- mulhdu r10, r3, r8
- add r10, r10, r3
- mulld r9, r3, r8
- subf r11, r10, r8
- addi r0, r10, 1
- addi r8, r11, -1
- and r0, r3, r0
- addc r11, r9, r0
- addze r10, r10
- addc r0, r11, r3
- addze r10, r10
- subf r3, r10, r8
+ srdi r9, r3, 32
+ rlwinm r9, r9, 10, 23, 30 C (d >> 55) & 0x1fe
+ srdi r10, r3, 24 C d >> 24
+ lis r11, 0x1000
+ rldicl r8, r3, 0, 63 C d mod 2
+ addi r10, r10, 1 C d40
+ sldi r11, r11, 32 C 2^60
+ srdi r7, r3, 1 C d/2
+ add r7, r7, r8 C d63 = ceil(d/2)
+ neg r8, r8 C mask = -(d mod 2)
+ lhzx r0, r9, r12
+ mullw r9, r0, r0 C v0*v0
+ sldi r6, r0, 11 C v0 << 11
+ addi r0, r6, -1 C (v0 << 11) - 1
+ mulld r9, r9, r10 C v0*v0*d40
+ srdi r9, r9, 40 C v0*v0*d40 >> 40
+ subf r9, r9, r0 C v1 = (v0 << 11) - (v0*v0*d40 >> 40) - 1
+ mulld r0, r9, r10 C v1*d40
+ sldi r6, r9, 13 C v1 << 13
+ subf r0, r0, r11 C 2^60 - v1*d40
+ mulld r0, r0, r9 C v1 * (2^60 - v1*d40)
+ srdi r0, r0, 47 C v1 * (2^60 - v1*d40) >> 47
+ add r0, r0, r6 C v2 = (v1 << 13) + (v1 * (2^60 - v1*d40) >> 47)
+ mulld r11, r0, r7 C v2 * d63
+ srdi r10, r0, 1 C v2 >> 1
+ sldi r9, r0, 31 C v2 << 31
+ and r8, r10, r8 C (v2 >> 1) & mask
+ subf r8, r11, r8 C ((v2 >> 1) & mask) - v2 * d63
+ mulhdu r0, r8, r0 C p1 = v2 * (((v2 >> 1) & mask) - v2 * d63)
+ srdi r0, r0, 1 C p1 >> 1
+ add r0, r0, r9 C v3 = (v2 << 31) + (p1 >> 1)
+ nop
+ mulhdu r9, r0, r3
+ mulld r11, r0, r3
+ addc r10, r11, r3
+ adde r3, r9, r3
+ subf r3, r3, r0
blr
EPILOGUE()
DEF_OBJECT(approx_tab)
- .short 1023,1020,1016,1012,1008,1004,1000,996
- .short 992,989,985,981,978,974,970,967
- .short 963,960,956,953,949,946,942,939
- .short 936,932,929,926,923,919,916,913
- .short 910,907,903,900,897,894,891,888
- .short 885,882,879,876,873,870,868,865
- .short 862,859,856,853,851,848,845,842
- .short 840,837,834,832,829,826,824,821
- .short 819,816,814,811,809,806,804,801
- .short 799,796,794,791,789,787,784,782
- .short 780,777,775,773,771,768,766,764
- .short 762,759,757,755,753,751,748,746
- .short 744,742,740,738,736,734,732,730
- .short 728,726,724,722,720,718,716,714
- .short 712,710,708,706,704,702,700,699
- .short 697,695,693,691,689,688,686,684
- .short 682,680,679,677,675,673,672,670
- .short 668,667,665,663,661,660,658,657
- .short 655,653,652,650,648,647,645,644
- .short 642,640,639,637,636,634,633,631
- .short 630,628,627,625,624,622,621,619
- .short 618,616,615,613,612,611,609,608
- .short 606,605,604,602,601,599,598,597
- .short 595,594,593,591,590,589,587,586
- .short 585,583,582,581,579,578,577,576
- .short 574,573,572,571,569,568,567,566
- .short 564,563,562,561,560,558,557,556
- .short 555,554,553,551,550,549,548,547
- .short 546,544,543,542,541,540,539,538
- .short 537,536,534,533,532,531,530,529
- .short 528,527,526,525,524,523,522,521
- .short 520,519,518,517,516,515,514,513
+ .short 0x7fd,0x7f5,0x7ed,0x7e5,0x7dd,0x7d5,0x7ce,0x7c6
+ .short 0x7bf,0x7b7,0x7b0,0x7a8,0x7a1,0x79a,0x792,0x78b
+ .short 0x784,0x77d,0x776,0x76f,0x768,0x761,0x75b,0x754
+ .short 0x74d,0x747,0x740,0x739,0x733,0x72c,0x726,0x720
+ .short 0x719,0x713,0x70d,0x707,0x700,0x6fa,0x6f4,0x6ee
+ .short 0x6e8,0x6e2,0x6dc,0x6d6,0x6d1,0x6cb,0x6c5,0x6bf
+ .short 0x6ba,0x6b4,0x6ae,0x6a9,0x6a3,0x69e,0x698,0x693
+ .short 0x68d,0x688,0x683,0x67d,0x678,0x673,0x66e,0x669
+ .short 0x664,0x65e,0x659,0x654,0x64f,0x64a,0x645,0x640
+ .short 0x63c,0x637,0x632,0x62d,0x628,0x624,0x61f,0x61a
+ .short 0x616,0x611,0x60c,0x608,0x603,0x5ff,0x5fa,0x5f6
+ .short 0x5f1,0x5ed,0x5e9,0x5e4,0x5e0,0x5dc,0x5d7,0x5d3
+ .short 0x5cf,0x5cb,0x5c6,0x5c2,0x5be,0x5ba,0x5b6,0x5b2
+ .short 0x5ae,0x5aa,0x5a6,0x5a2,0x59e,0x59a,0x596,0x592
+ .short 0x58e,0x58a,0x586,0x583,0x57f,0x57b,0x577,0x574
+ .short 0x570,0x56c,0x568,0x565,0x561,0x55e,0x55a,0x556
+ .short 0x553,0x54f,0x54c,0x548,0x545,0x541,0x53e,0x53a
+ .short 0x537,0x534,0x530,0x52d,0x52a,0x526,0x523,0x520
+ .short 0x51c,0x519,0x516,0x513,0x50f,0x50c,0x509,0x506
+ .short 0x503,0x500,0x4fc,0x4f9,0x4f6,0x4f3,0x4f0,0x4ed
+ .short 0x4ea,0x4e7,0x4e4,0x4e1,0x4de,0x4db,0x4d8,0x4d5
+ .short 0x4d2,0x4cf,0x4cc,0x4ca,0x4c7,0x4c4,0x4c1,0x4be
+ .short 0x4bb,0x4b9,0x4b6,0x4b3,0x4b0,0x4ad,0x4ab,0x4a8
+ .short 0x4a5,0x4a3,0x4a0,0x49d,0x49b,0x498,0x495,0x493
+ .short 0x490,0x48d,0x48b,0x488,0x486,0x483,0x481,0x47e
+ .short 0x47c,0x479,0x477,0x474,0x472,0x46f,0x46d,0x46a
+ .short 0x468,0x465,0x463,0x461,0x45e,0x45c,0x459,0x457
+ .short 0x455,0x452,0x450,0x44e,0x44b,0x449,0x447,0x444
+ .short 0x442,0x440,0x43e,0x43b,0x439,0x437,0x435,0x432
+ .short 0x430,0x42e,0x42c,0x42a,0x428,0x425,0x423,0x421
+ .short 0x41f,0x41d,0x41b,0x419,0x417,0x414,0x412,0x410
+ .short 0x40e,0x40c,0x40a,0x408,0x406,0x404,0x402,0x400
END_OBJECT(approx_tab)
ASM_END()
--- /dev/null
+dnl PowerPC-64 mpn_mod_1_1p
+
+dnl Copyright 2010, 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C POWER3/PPC630 ?
+C POWER4/PPC970 17
+C POWER5 16
+C POWER6 30
+C POWER7 10.2
+
+C TODO
+C * Optimise, in particular the cps function. This was compiler-generated and
+C then hand optimised.
+
+C INPUT PARAMETERS
+define(`ap', `r3')
+define(`n', `r4')
+define(`d', `r5')
+define(`cps', `r6')
+
+ASM_START()
+
+EXTERN_FUNC(mpn_invert_limb)
+
+PROLOGUE(mpn_mod_1_1p)
+ sldi r10, r4, 3
+ addi r4, r4, -1
+ add r3, r3, r10
+ ld r0, 16(r6) C B1modb
+ ld r12, 24(r6) C B2modb
+ ld r9, -8(r3)
+ ld r10, -16(r3)
+ mtctr r4
+ mulhdu r8, r9, r0
+ mulld r7, r9, r0
+ addc r11, r7, r10
+ addze r9, r8
+ bdz L(end)
+
+ ALIGN(16)
+L(top): ld r4, -24(r3)
+ addi r3, r3, -8
+ nop
+ mulld r10, r11, r0
+ mulld r8, r9, r12
+ mulhdu r11, r11, r0
+ mulhdu r9, r9, r12
+ addc r7, r10, r4
+ addze r10, r11
+ addc r11, r8, r7
+ adde r9, r9, r10
+ bdnz L(top)
+
+L(end): lwz r0, 12(r6)
+ ld r3, 0(r6)
+ cmpdi cr7, r0, 0
+ beq- cr7, L(4)
+ subfic r10, r0, 64
+ sld r9, r9, r0
+ srd r10, r11, r10
+ or r9, r10, r9
+L(4): subfc r10, r5, r9
+ subfe r10, r10, r10
+ nand r10, r10, r10
+ sld r11, r11, r0
+ and r10, r10, r5
+ subf r9, r10, r9
+ mulhdu r10, r9, r3
+ mulld r3, r9, r3
+ addi r9, r9, 1
+ addc r8, r3, r11
+ adde r3, r10, r9
+ mulld r3, r3, r5
+ subf r3, r3, r11
+ cmpld cr7, r8, r3
+ bge cr7, L(5) C FIXME: Make branch-less
+ add r3, r3, r5
+L(5): cmpld cr7, r3, r5
+ bge- cr7, L(10)
+ srd r3, r3, r0
+ blr
+
+L(10): subf r3, r5, r3
+ srd r3, r3, r0
+ blr
+EPILOGUE()
+
+PROLOGUE(mpn_mod_1_1p_cps)
+ mflr r0
+ std r29, -24(r1)
+ std r30, -16(r1)
+ std r31, -8(r1)
+ cntlzd r31, r4
+ std r0, 16(r1)
+ extsw r31, r31
+ mr r29, r3
+ stdu r1, -144(r1)
+ sld r30, r4, r31
+ mr r3, r30
+ CALL( mpn_invert_limb)
+ nop
+ cmpdi cr7, r31, 0
+ neg r0, r30
+ beq- cr7, L(13)
+ subfic r11, r31, 64
+ li r0, 1
+ neg r9, r30
+ srd r11, r3, r11
+ sld r0, r0, r31
+ or r0, r11, r0
+ mulld r0, r0, r9
+L(13): mulhdu r9, r0, r3
+ mulld r11, r0, r3
+ add r9, r0, r9
+ nor r9, r9, r9
+ mulld r9, r9, r30
+ cmpld cr7, r11, r9
+ bge cr7, L(14)
+ add r9, r9, r30
+L(14): addi r1, r1, 144
+ srd r0, r0, r31
+ std r31, 8(r29)
+ std r3, 0(r29)
+ std r0, 16(r29)
+ ld r0, 16(r1)
+ srd r9, r9, r31
+ ld r30, -16(r1)
+ ld r31, -8(r1)
+ std r9, 24(r29)
+ ld r29, -24(r1)
+ mtlr r0
+ blr
+EPILOGUE()
--- /dev/null
+dnl PowerPC-64 mpn_mod_1s_4p
+
+dnl Copyright 2010, 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C POWER3/PPC630 ?
+C POWER4/PPC970 9
+C POWER5 9
+C POWER6 13
+C POWER7 3.5
+
+C TODO
+C * Optimise, in particular the cps function. This was compiler-generated and
+C then hand optimised.
+
+C INPUT PARAMETERS
+define(`ap', `r3')
+define(`n', `r4')
+define(`d', `r5')
+define(`cps', `r6')
+
+ASM_START()
+
+EXTERN_FUNC(mpn_invert_limb)
+
+PROLOGUE(mpn_mod_1s_4p)
+ std r23, -72(r1)
+ ld r23, 48(cps)
+ std r24, -64(r1)
+ std r25, -56(r1)
+ ld r24, 32(cps)
+ ld r25, 24(cps)
+ std r26, -48(r1)
+ std r27, -40(r1)
+ ld r26, 16(cps)
+ std r28, -32(r1)
+ std r29, -24(r1)
+ std r30, -16(r1)
+ std r31, -8(r1)
+ ld r30, 40(cps)
+
+ rldicl. r0, n, 0,62
+ sldi r31, n, 3
+ add ap, ap, r31 C make ap point at end of operand
+
+ cmpdi cr7, r0, 2
+ beq cr0, L(b00)
+ blt cr7, L(b01)
+ beq cr7, L(b10)
+
+L(b11): ld r11, -16(ap)
+ ld r9, -8(ap)
+ ld r0, -24(ap)
+ mulhdu r27, r11, r26
+ mulld r8, r11, r26
+ mulhdu r11, r9, r25
+ mulld r9, r9, r25
+ addc r31, r8, r0
+ addze r10, r27
+ addc r0, r9, r31
+ adde r9, r11, r10
+ addi ap, ap, -40
+ b L(6)
+
+ ALIGN(16)
+L(b00): ld r11, -24(ap)
+ ld r10, -16(ap)
+ ld r9, -8(ap)
+ ld r0, -32(ap)
+ mulld r8, r11, r26
+ mulhdu r7, r10, r25
+ mulhdu r27, r11, r26
+ mulhdu r11, r9, r24
+ mulld r10, r10, r25
+ mulld r9, r9, r24
+ addc r31, r8, r0
+ addze r0, r27
+ addc r8, r31, r10
+ adde r10, r0, r7
+ addc r0, r9, r8
+ adde r9, r11, r10
+ addi ap, ap, -48
+ b L(6)
+
+ ALIGN(16)
+L(b01): li r9, 0
+ ld r0, -8(ap)
+ addi ap, ap, -24
+ b L(6)
+
+ ALIGN(16)
+L(b10): ld r9, -8(ap)
+ ld r0, -16(ap)
+ addi ap, ap, -32
+
+ ALIGN(16)
+L(6): addi r10, n, 3
+ srdi r7, r10, 2
+ mtctr r7
+ bdz L(end)
+
+ ALIGN(16)
+L(top): ld r31, -16(ap)
+ ld r10, -8(ap)
+ ld r11, 8(ap)
+ ld r12, 0(ap)
+ mulld r29, r0, r30 C rl * B4modb
+ mulhdu r0, r0, r30 C rl * B4modb
+ mulhdu r27, r10, r26
+ mulld r10, r10, r26
+ mulhdu r7, r9, r23 C rh * B5modb
+ mulld r9, r9, r23 C rh * B5modb
+ mulhdu r28, r11, r24
+ mulld r11, r11, r24
+ mulhdu r4, r12, r25
+ mulld r12, r12, r25
+ addc r8, r10, r31
+ addze r10, r27
+ addi ap, ap, -32
+ addc r27, r8, r12
+ adde r12, r10, r4
+ addc r11, r27, r11
+ adde r31, r12, r28
+ addc r12, r11, r29
+ adde r4, r31, r0
+ addc r0, r9, r12
+ adde r9, r7, r4
+ bdnz L(top)
+
+L(end): lwz r3, 12(cps)
+ mulld r10, r9, r26
+ mulhdu r9, r9, r26
+ addc r11, r0, r10
+ addze r9, r9
+ ld r10, 0(cps)
+ subfic r8, r3, 64
+ sld r9, r9, r3
+ srd r8, r11, r8
+ sld r11, r11, r3
+ or r9, r8, r9
+ mulld r0, r9, r10
+ mulhdu r10, r9, r10
+ addi r9, r9, 1
+ addc r8, r0, r11
+ adde r0, r10, r9
+ mulld r0, r0, d
+ subf r0, r0, r11
+ cmpld cr7, r8, r0
+ bge cr7, L(9)
+ add r0, r0, d
+L(9): cmpld cr7, r0, d
+ bge- cr7, L(16)
+L(10): srd r3, r0, r3
+ ld r23, -72(r1)
+ ld r24, -64(r1)
+ ld r25, -56(r1)
+ ld r26, -48(r1)
+ ld r27, -40(r1)
+ ld r28, -32(r1)
+ ld r29, -24(r1)
+ ld r30, -16(r1)
+ ld r31, -8(r1)
+ blr
+
+L(16): subf r0, d, r0
+ b L(10)
+EPILOGUE()
+
+PROLOGUE(mpn_mod_1s_4p_cps)
+ mflr r0
+ std r29, -24(r1)
+ std r30, -16(r1)
+ mr r29, r3
+ std r0, 16(r1)
+ std r31, -8(r1)
+ stdu r1, -144(r1)
+ cntlzd r31, r4
+ sld r30, r4, r31
+ mr r3, r30
+ CALL( mpn_invert_limb)
+ nop
+ subfic r9, r31, 64
+ li r10, 1
+ sld r10, r10, r31
+ srd r9, r3, r9
+ neg r0, r30
+ or r10, r10, r9
+ mulld r10, r10, r0
+ mulhdu r11, r10, r3
+ nor r11, r11, r11
+ subf r11, r10, r11
+ mulld r11, r11, r30
+ mulld r0, r10, r3
+ cmpld cr7, r0, r11
+ bge cr7, L(18)
+ add r11, r11, r30
+L(18): mulhdu r9, r11, r3
+ add r9, r11, r9
+ nor r9, r9, r9
+ mulld r9, r9, r30
+ mulld r0, r11, r3
+ cmpld cr7, r0, r9
+ bge cr7, L(19)
+ add r9, r9, r30
+L(19): mulhdu r0, r9, r3
+ add r0, r9, r0
+ nor r0, r0, r0
+ mulld r0, r0, r30
+ mulld r8, r9, r3
+ cmpld cr7, r8, r0
+ bge cr7, L(20)
+ add r0, r0, r30
+L(20): mulhdu r8, r0, r3
+ add r8, r0, r8
+ nor r8, r8, r8
+ mulld r8, r8, r30
+ mulld r7, r0, r3
+ cmpld cr7, r7, r8
+ bge cr7, L(21)
+ add r8, r8, r30
+L(21): srd r0, r0, r31
+ addi r1, r1, 144
+ srd r8, r8, r31
+ srd r10, r10, r31
+ srd r11, r11, r31
+ std r0, 40(r29)
+ std r31, 8(r29)
+ srd r9, r9, r31
+ ld r0, 16(r1)
+ ld r30, -16(r1)
+ std r8, 48(r29)
+ std r3, 0(r29)
+ mtlr r0
+ ld r31, -8(r1)
+ std r10, 16(r29)
+ std r11, 24(r29)
+ std r9, 32(r29)
+ ld r29, -24(r1)
+ blr
+EPILOGUE()
-dnl PowerPC-64 mpn_mod_34lsub1 -- modulo 2^24-1.
+dnl PowerPC-64 mpn_mod_34lsub1 -- modulo 2^48-1.
dnl Copyright 2005 Free Software Foundation, Inc.
include(`../config.m4')
-C cycles/limb
-C POWER3/PPC630: 1.33
-C POWER4/PPC970: 1.5
-C POWER5: 1.57
+C cycles/limb
+C POWER3/PPC630 1.33
+C POWER4/PPC970 1.5
+C POWER5 1.32
+C POWER6 2.35
+C POWER7 1
C INPUT PARAMETERS
define(`up',`r3')
include(`../config.m4')
-C cycles/limb
-C POWER3/PPC630: 13-19
-C POWER4/PPC970: 16
-C POWER5: 16
+C cycles/limb
+C POWER3/PPC630 13-19
+C POWER4/PPC970 16
+C POWER5 16
+C POWER6 ?
+C POWER7 12
C TODO
C * Check if n=1 code is really an improvement. It probably isn't.
dnl PowerPC-64 mpn_mul_1 -- Multiply a limb vector with a limb and store
dnl the result in a second limb vector.
-dnl Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006 Free Software
+dnl Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2010 Free Software
dnl Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
-C cycles/limb
-C POWER3/PPC630: 6-18
-C POWER4/PPC970: 7.25
-C POWER5: 7.75
+C cycles/limb
+C POWER3/PPC630 6-18
+C POWER4/PPC970 7.25? not updated for last file revision
+C POWER5 7.25
+C POWER6 14
+C POWER7 2.9
C TODO
C * Try to reduce the number of needed live registers (at least r5 and r10
L(top): mulld r0, r26, r6
mulhdu r5, r26, r6
- ld r26, 0(up)
- nop
-
mulld r7, r27, r6
mulhdu r8, r27, r6
+ ld r26, 0(up)
ld r27, 8(up)
- nop
-
adde r0, r0, r12
adde r7, r7, r5
-
mulld r9, r26, r6
mulhdu r10, r26, r6
- ld r26, 16(up)
- nop
-
mulld r11, r27, r6
mulhdu r12, r27, r6
+ ld r26, 16(up)
ld r27, 24(up)
-
std r0, 0(rp)
adde r9, r9, r8
std r7, 8(rp)
L(end): mulld r0, r26, r6
mulhdu r5, r26, r6
-
mulld r7, r27, r6
mulhdu r8, r27, r6
-
adde r0, r0, r12
adde r7, r7, r5
-
std r0, 0(rp)
std r7, 8(rp)
L(ret): addze r3, r8
-dnl PowerPC-64 mpn_basecase.
+dnl PowerPC-64 mpn_mul_basecase.
dnl Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2008 Free Software
dnl Foundation, Inc.
include(`../config.m4')
-C cycles/limb
-C POWER3/PPC630: 6-18
-C POWER4/PPC970: 8
-C POWER5: 8
-
+C cycles/limb
+C POWER3/PPC630 6-18
+C POWER4/PPC970 8
+C POWER5 8
+C POWER6 24
C INPUT PARAMETERS
define(`rp', `r3')
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* never */
-#define MOD_1U_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD MP_SIZE_T_MAX
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD MP_SIZE_T_MAX
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 18
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 17
#define USE_PREINV_DIVREM_1 0
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
#define MUL_TOOM33_THRESHOLD 33
#define MUL_TOOM44_THRESHOLD 46
#define MUL_TOOM6H_THRESHOLD 77
-#define MUL_TOOM8H_THRESHOLD 115
+#define MUL_TOOM8H_THRESHOLD 139
#define MUL_TOOM32_TO_TOOM43_THRESHOLD 49
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 38
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 33
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 32
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 47
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 49
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 49
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 34
-#define SQR_BASECASE_THRESHOLD 0 /* always */
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
#define SQR_TOOM2_THRESHOLD 14
-#define SQR_TOOM3_THRESHOLD 49
+#define SQR_TOOM3_THRESHOLD 45
#define SQR_TOOM4_THRESHOLD 64
-#define SQR_TOOM6_THRESHOLD 84
-#define SQR_TOOM8_THRESHOLD 127
+#define SQR_TOOM6_THRESHOLD 85
+#define SQR_TOOM8_THRESHOLD 139
+
+#define MULMID_TOOM42_THRESHOLD 22
#define MULMOD_BNM1_THRESHOLD 8
-#define SQRMOD_BNM1_THRESHOLD 9
+#define SQRMOD_BNM1_THRESHOLD 10
#define MUL_FFT_MODF_THRESHOLD 220 /* k = 5 */
#define MUL_FFT_TABLE3 \
#define SQR_FFT_TABLE3_SIZE 118
#define SQR_FFT_THRESHOLD 1728
-#define MULLO_BASECASE_THRESHOLD 3
+#define MULLO_BASECASE_THRESHOLD 2
#define MULLO_DC_THRESHOLD 27
-#define MULLO_MUL_N_THRESHOLD 4940
+#define MULLO_MUL_N_THRESHOLD 2511
-#define DC_DIV_QR_THRESHOLD 27
-#define DC_DIVAPPR_Q_THRESHOLD 95
-#define DC_BDIV_QR_THRESHOLD 28
-#define DC_BDIV_Q_THRESHOLD 62
+#define DC_DIV_QR_THRESHOLD 23
+#define DC_DIVAPPR_Q_THRESHOLD 87
+#define DC_BDIV_QR_THRESHOLD 27
+#define DC_BDIV_Q_THRESHOLD 60
-#define INV_MULMOD_BNM1_THRESHOLD 35
-#define INV_NEWTON_THRESHOLD 97
-#define INV_APPR_THRESHOLD 94
+#define INV_MULMOD_BNM1_THRESHOLD 27
+#define INV_NEWTON_THRESHOLD 91
+#define INV_APPR_THRESHOLD 91
#define BINV_NEWTON_THRESHOLD 115
-#define REDC_1_TO_REDC_N_THRESHOLD 30
+#define REDC_1_TO_REDC_N_THRESHOLD 31
#define MU_DIV_QR_THRESHOLD 551
#define MU_DIVAPPR_Q_THRESHOLD 551
-#define MUPI_DIV_QR_THRESHOLD 49
-#define MU_BDIV_QR_THRESHOLD 492
+#define MUPI_DIV_QR_THRESHOLD 42
+#define MU_BDIV_QR_THRESHOLD 483
#define MU_BDIV_Q_THRESHOLD 492
-#define MATRIX22_STRASSEN_THRESHOLD 9
-#define HGCD_THRESHOLD 55
-#define GCD_DC_THRESHOLD 162
-#define GCDEXT_DC_THRESHOLD 124
+#define POWM_SEC_TABLE 2,23,140,556,713,746
+
+#define MATRIX22_STRASSEN_THRESHOLD 8
+#define HGCD_THRESHOLD 56
+#define HGCD_APPR_THRESHOLD 51
+#define HGCD_REDUCE_THRESHOLD 688
+#define GCD_DC_THRESHOLD 333
+#define GCDEXT_DC_THRESHOLD 126
#define JACOBI_BASE_METHOD 1
#define GET_STR_DC_THRESHOLD 17
-#define GET_STR_PRECOMPUTE_THRESHOLD 27
-#define SET_STR_DC_THRESHOLD 354
+#define GET_STR_PRECOMPUTE_THRESHOLD 28
+#define SET_STR_DC_THRESHOLD 375
#define SET_STR_PRECOMPUTE_THRESHOLD 812
+
+#define FAC_DSC_THRESHOLD 351
+#define FAC_ODD_THRESHOLD 0 /* always */
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 10
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 6
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 8
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 23
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 6
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 9
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 20
#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 16
#define USE_PREINV_DIVREM_1 0
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 43
+#define BMOD_1_TO_MOD_1_THRESHOLD 37
-#define MUL_TOOM22_THRESHOLD 14
-#define MUL_TOOM33_THRESHOLD 54
-#define MUL_TOOM44_THRESHOLD 154
-#define MUL_TOOM6H_THRESHOLD 206
+#define MUL_TOOM22_THRESHOLD 18
+#define MUL_TOOM33_THRESHOLD 53
+#define MUL_TOOM44_THRESHOLD 106
+#define MUL_TOOM6H_THRESHOLD 180
#define MUL_TOOM8H_THRESHOLD 309
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 89
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 99
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 97
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 97
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 61
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 91
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 92
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 51
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 79
-#define SQR_BASECASE_THRESHOLD 5
-#define SQR_TOOM2_THRESHOLD 36
-#define SQR_TOOM3_THRESHOLD 61
-#define SQR_TOOM4_THRESHOLD 154
-#define SQR_TOOM6_THRESHOLD 206
-#define SQR_TOOM8_THRESHOLD 309
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 28
+#define SQR_TOOM3_THRESHOLD 73
+#define SQR_TOOM4_THRESHOLD 136
+#define SQR_TOOM6_THRESHOLD 194
+#define SQR_TOOM8_THRESHOLD 272
-#define MULMOD_BNM1_THRESHOLD 12
-#define SQRMOD_BNM1_THRESHOLD 14
+#define MULMID_TOOM42_THRESHOLD 32
-#define MUL_FFT_MODF_THRESHOLD 380 /* k = 5 */
+#define MULMOD_BNM1_THRESHOLD 11
+#define SQRMOD_BNM1_THRESHOLD 16
+
+#define MUL_FFT_MODF_THRESHOLD 372 /* k = 5 */
#define MUL_FFT_TABLE3 \
- { { 380, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
- { 10, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
- { 23, 7}, { 12, 6}, { 25, 7}, { 25, 8}, \
- { 13, 7}, { 30, 6}, { 61, 7}, { 32, 8}, \
- { 17, 7}, { 35, 8}, { 29, 9}, { 15, 8}, \
- { 35, 9}, { 19, 8}, { 41, 9}, { 23, 8}, \
- { 47, 9}, { 27,10}, { 15, 9}, { 39,10}, \
- { 23, 9}, { 55,11}, { 15,10}, { 31, 9}, \
- { 71,10}, { 39, 9}, { 83,10}, { 47, 9}, \
- { 99,10}, { 55,11}, { 31,10}, { 63, 9}, \
- { 127,10}, { 79,11}, { 47,10}, { 103,12}, \
- { 31,11}, { 63,10}, { 127, 9}, { 255,10}, \
- { 135, 9}, { 271,11}, { 79,10}, { 159, 9}, \
- { 319,10}, { 167,11}, { 95,10}, { 191, 9}, \
- { 383, 8}, { 767,10}, { 207,11}, { 111,12}, \
- { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \
- { 271,11}, { 143,10}, { 287, 9}, { 575,10}, \
- { 303, 9}, { 607,11}, { 159,10}, { 319, 9}, \
- { 639,10}, { 335, 9}, { 671,12}, { 95,11}, \
- { 191,10}, { 383, 9}, { 767,11}, { 207,10}, \
- { 415, 9}, { 831,13}, { 63,12}, { 127,11}, \
- { 255,10}, { 511,11}, { 271,10}, { 543, 9}, \
- { 1087,11}, { 287,10}, { 575,11}, { 303,10}, \
- { 607,12}, { 159,11}, { 319,10}, { 639,11}, \
- { 335,10}, { 671,11}, { 351,10}, { 703,11}, \
- { 367,12}, { 191,11}, { 383,10}, { 767,11}, \
- { 415,10}, { 831,12}, { 223,11}, { 447,10}, \
- { 895,13}, { 127,12}, { 255,11}, { 511,10}, \
- { 1023,11}, { 543,10}, { 1087,12}, { 287,11}, \
- { 575,10}, { 1151,11}, { 607,10}, { 1215,12}, \
- { 319,11}, { 639,10}, { 1279,11}, { 671,12}, \
- { 351,11}, { 703,10}, { 1407,13}, { 191,12}, \
- { 383,11}, { 767,12}, { 415,11}, { 831,10}, \
- { 1663,12}, { 447,11}, { 895,12}, { 479,14}, \
- { 127,13}, { 255,12}, { 511,11}, { 1023,12}, \
- { 543,11}, { 1087,10}, { 2175,12}, { 575,11}, \
- { 1151,12}, { 607,11}, { 1215,13}, { 319,12}, \
- { 639,11}, { 1279,12}, { 671,11}, { 1343,10}, \
- { 2687,12}, { 703,11}, { 1407,12}, { 735,13}, \
- { 383,12}, { 767,11}, { 1535,12}, { 799,11}, \
- { 1599,12}, { 831,11}, { 1663,13}, { 447,12}, \
- { 959,14}, { 255,13}, { 511,12}, { 1087,11}, \
- { 2175,13}, { 575,12}, { 1215,11}, { 2431,13}, \
- { 639,12}, { 1343,11}, { 2687,13}, { 703,12}, \
- { 1407,14}, { 383,13}, { 767,12}, { 1599,13}, \
- { 831,12}, { 1663,13}, { 959,15}, { 255,14}, \
- { 511,13}, { 1087,12}, { 2175,13}, { 1215,12}, \
- { 2431,14}, { 639,13}, { 1343,12}, { 2687,13}, \
- { 1471,12}, { 2943,14}, { 767,13}, { 1599,12}, \
- { 3199,13}, { 1663,14}, { 895,13}, { 1855,15}, \
- { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
- { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
- {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 209
-#define MUL_FFT_THRESHOLD 7296
-
-#define SQR_FFT_MODF_THRESHOLD 308 /* k = 5 */
+ { { 372, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
+ { 8, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
+ { 10, 5}, { 21, 6}, { 19, 7}, { 10, 6}, \
+ { 21, 7}, { 11, 6}, { 23, 7}, { 21, 8}, \
+ { 11, 7}, { 25, 8}, { 13, 7}, { 31, 8}, \
+ { 17, 7}, { 35, 8}, { 21, 9}, { 11, 8}, \
+ { 27, 9}, { 15, 8}, { 35, 9}, { 19, 8}, \
+ { 41, 9}, { 23, 8}, { 49, 9}, { 27,10}, \
+ { 15, 9}, { 39,10}, { 23, 9}, { 55,11}, \
+ { 15,10}, { 31, 9}, { 71,10}, { 39, 9}, \
+ { 83,10}, { 47, 9}, { 99,10}, { 55,11}, \
+ { 31,10}, { 63, 9}, { 127,10}, { 79,11}, \
+ { 47,10}, { 95, 9}, { 191,10}, { 103, 9}, \
+ { 207,12}, { 31,11}, { 63,10}, { 127, 9}, \
+ { 255,10}, { 135, 9}, { 271,11}, { 79,10}, \
+ { 159, 9}, { 319,10}, { 167,11}, { 95,10}, \
+ { 191, 9}, { 383, 8}, { 767,10}, { 207, 9}, \
+ { 415,11}, { 111,10}, { 223,12}, { 63,11}, \
+ { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \
+ { 543,11}, { 143,10}, { 287, 9}, { 575, 8}, \
+ { 1151,10}, { 303, 9}, { 607,10}, { 319, 9}, \
+ { 639,10}, { 335,12}, { 95,11}, { 191,10}, \
+ { 383, 9}, { 767,11}, { 207,10}, { 415, 9}, \
+ { 831,11}, { 223,10}, { 447,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16}, { 65536,17}, { 131072,18}, \
+ { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+ {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 106
+#define MUL_FFT_THRESHOLD 3264
+
+#define SQR_FFT_MODF_THRESHOLD 284 /* k = 5 */
#define SQR_FFT_TABLE3 \
- { { 308, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
- { 19, 7}, { 10, 6}, { 21, 7}, { 11, 6}, \
- { 23, 7}, { 21, 8}, { 11, 7}, { 24, 8}, \
+ { { 280, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
+ { 8, 5}, { 17, 6}, { 19, 7}, { 10, 6}, \
+ { 21, 7}, { 21, 8}, { 11, 7}, { 24, 8}, \
{ 13, 7}, { 29, 8}, { 15, 7}, { 31, 8}, \
- { 21, 9}, { 11, 8}, { 27, 9}, { 15, 8}, \
- { 33, 9}, { 19, 8}, { 39, 9}, { 23, 8}, \
- { 47, 9}, { 27,10}, { 15, 9}, { 39,10}, \
- { 23, 9}, { 51,11}, { 15,10}, { 31, 9}, \
- { 67,10}, { 39, 9}, { 83,10}, { 47, 9}, \
- { 95,10}, { 55,11}, { 31,10}, { 79,11}, \
- { 47,10}, { 95, 9}, { 191, 8}, { 383,12}, \
- { 31,11}, { 63,10}, { 127, 9}, { 255, 8}, \
- { 511,10}, { 135, 9}, { 271,11}, { 79,10}, \
- { 159, 9}, { 319,10}, { 175, 9}, { 351,11}, \
- { 95,10}, { 191, 9}, { 383,10}, { 207, 9}, \
- { 415,11}, { 111,12}, { 63,11}, { 127,10}, \
- { 255, 9}, { 511, 8}, { 1023,10}, { 271, 9}, \
- { 543,10}, { 287, 9}, { 575, 8}, { 1151,10}, \
- { 303,11}, { 159,10}, { 319, 9}, { 639,11}, \
- { 175,10}, { 351,12}, { 95,11}, { 191,10}, \
- { 383, 9}, { 767,11}, { 207,10}, { 415, 9}, \
- { 831,11}, { 223,13}, { 63,12}, { 127,11}, \
- { 255,10}, { 511, 9}, { 1023,11}, { 271,10}, \
- { 543,11}, { 287,10}, { 575, 9}, { 1151,11}, \
- { 303,12}, { 159,11}, { 319,10}, { 639,11}, \
- { 351,10}, { 703,12}, { 191,11}, { 383,10}, \
- { 767,11}, { 415,10}, { 831,12}, { 223,11}, \
- { 447,10}, { 895,11}, { 479,10}, { 959,13}, \
- { 127,12}, { 255,11}, { 511,10}, { 1023,11}, \
- { 543,12}, { 287,11}, { 575,10}, { 1151,11}, \
- { 607,12}, { 319,11}, { 639,10}, { 1279,12}, \
- { 351,11}, { 703,13}, { 191,12}, { 383,11}, \
- { 767,12}, { 415,11}, { 831,10}, { 1663,12}, \
- { 447,11}, { 895,12}, { 479,11}, { 959,14}, \
- { 127,13}, { 255,12}, { 511,11}, { 1023,12}, \
- { 543,11}, { 1087,10}, { 2175,12}, { 575,11}, \
- { 1151,12}, { 607,13}, { 319,12}, { 639,11}, \
- { 1279,12}, { 671,11}, { 1343,12}, { 703,11}, \
- { 1407,13}, { 383,12}, { 767,11}, { 1535,12}, \
- { 831,11}, { 1663,13}, { 447,12}, { 959,11}, \
- { 1919,14}, { 255,13}, { 511,12}, { 1087,11}, \
- { 2175,13}, { 575,12}, { 1215,11}, { 2431,13}, \
- { 639,12}, { 1343,13}, { 703,12}, { 1407,14}, \
- { 383,13}, { 767,12}, { 1535,13}, { 831,12}, \
- { 1663,13}, { 959,12}, { 1919,15}, { 255,14}, \
- { 511,13}, { 1087,12}, { 2175,13}, { 1215,12}, \
- { 2431,14}, { 639,13}, { 1343,12}, { 2687,13}, \
- { 1407,12}, { 2815,13}, { 1471,14}, { 767,13}, \
- { 1535,12}, { 3071,13}, { 1663,14}, { 895,13}, \
- { 1791,12}, { 3839,15}, { 32768,16}, { 65536,17}, \
+ { 17, 7}, { 35, 8}, { 21, 9}, { 11, 8}, \
+ { 27, 9}, { 15, 8}, { 33, 9}, { 19, 8}, \
+ { 41, 9}, { 23, 8}, { 47, 9}, { 27,10}, \
+ { 15, 9}, { 39,10}, { 23, 9}, { 51,11}, \
+ { 15,10}, { 31, 9}, { 67,10}, { 39, 9}, \
+ { 83,10}, { 47, 9}, { 95,10}, { 55,11}, \
+ { 31,10}, { 71, 9}, { 143,10}, { 79,11}, \
+ { 47,10}, { 95, 9}, { 191, 8}, { 383,10}, \
+ { 103,12}, { 31,11}, { 63,10}, { 127, 9}, \
+ { 255, 8}, { 511,10}, { 135, 9}, { 271,10}, \
+ { 143, 9}, { 287,11}, { 79,10}, { 159, 9}, \
+ { 319, 8}, { 639,10}, { 175, 9}, { 351,11}, \
+ { 95,10}, { 191, 9}, { 383, 8}, { 767,10}, \
+ { 207, 9}, { 415,12}, { 63,11}, { 127,10}, \
+ { 255, 9}, { 511,10}, { 271,11}, { 143,10}, \
+ { 287, 9}, { 575,11}, { 159,10}, { 319, 9}, \
+ { 639,11}, { 175,10}, { 351,12}, { 95,11}, \
+ { 191,10}, { 383, 9}, { 767,11}, { 207,10}, \
+ { 415, 9}, { 831,11}, { 223,10}, { 447,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16}, { 65536,17}, \
{ 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
{2097152,22}, {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 207
+#define SQR_FFT_TABLE3_SIZE 103
#define SQR_FFT_THRESHOLD 2752
-#define MULLO_BASECASE_THRESHOLD 5
-#define MULLO_DC_THRESHOLD 34
-#define MULLO_MUL_N_THRESHOLD 10950
+#define MULLO_BASECASE_THRESHOLD 3
+#define MULLO_DC_THRESHOLD 40
+#define MULLO_MUL_N_THRESHOLD 6440
-#define DC_DIV_QR_THRESHOLD 30
-#define DC_DIVAPPR_Q_THRESHOLD 103
-#define DC_BDIV_QR_THRESHOLD 48
-#define DC_BDIV_Q_THRESHOLD 120
+#define DC_DIV_QR_THRESHOLD 43
+#define DC_DIVAPPR_Q_THRESHOLD 166
+#define DC_BDIV_QR_THRESHOLD 47
+#define DC_BDIV_Q_THRESHOLD 112
#define INV_MULMOD_BNM1_THRESHOLD 50
-#define INV_NEWTON_THRESHOLD 131
-#define INV_APPR_THRESHOLD 115
+#define INV_NEWTON_THRESHOLD 181
+#define INV_APPR_THRESHOLD 165
-#define BINV_NEWTON_THRESHOLD 204
+#define BINV_NEWTON_THRESHOLD 214
#define REDC_1_TO_REDC_N_THRESHOLD 55
#define MU_DIV_QR_THRESHOLD 998
-#define MU_DIVAPPR_Q_THRESHOLD 998
-#define MUPI_DIV_QR_THRESHOLD 61
-#define MU_BDIV_QR_THRESHOLD 889
-#define MU_BDIV_Q_THRESHOLD 1078
+#define MU_DIVAPPR_Q_THRESHOLD 1017
+#define MUPI_DIV_QR_THRESHOLD 84
+#define MU_BDIV_QR_THRESHOLD 855
+#define MU_BDIV_Q_THRESHOLD 1017
+
+#define POWM_SEC_TABLE 4,32,327,1100,2826
-#define MATRIX22_STRASSEN_THRESHOLD 11
-#define HGCD_THRESHOLD 96
-#define GCD_DC_THRESHOLD 249
-#define GCDEXT_DC_THRESHOLD 209
-#define JACOBI_BASE_METHOD 1
+#define MATRIX22_STRASSEN_THRESHOLD 12
+#define HGCD_THRESHOLD 109
+#define HGCD_APPR_THRESHOLD 107
+#define HGCD_REDUCE_THRESHOLD 2121
+#define GCD_DC_THRESHOLD 348
+#define GCDEXT_DC_THRESHOLD 246
+#define JACOBI_BASE_METHOD 4
#define GET_STR_DC_THRESHOLD 11
#define GET_STR_PRECOMPUTE_THRESHOLD 23
-#define SET_STR_DC_THRESHOLD 532
-#define SET_STR_PRECOMPUTE_THRESHOLD 1781
+#define SET_STR_DC_THRESHOLD 650
+#define SET_STR_PRECOMPUTE_THRESHOLD 1713
+
+#define FAC_DSC_THRESHOLD 562
+#define FAC_ODD_THRESHOLD 23
-/* gmp-mparam.h -- Compiler/machine parameter header file.
+/* POWER5 gmp-mparam.h -- Compiler/machine parameter header file.
Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2009, 2010 Free
Software Foundation, Inc.
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 9
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 10
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 22
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 16
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 6
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 15
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 11
#define USE_PREINV_DIVREM_1 0
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 59
-
-#define MUL_TOOM22_THRESHOLD 16
-#define MUL_TOOM33_THRESHOLD 56
-#define MUL_TOOM44_THRESHOLD 118
-#define MUL_TOOM6H_THRESHOLD 206
-#define MUL_TOOM8H_THRESHOLD 309
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 82
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 91
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 81
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 88
-
-#define SQR_BASECASE_THRESHOLD 10
-#define SQR_TOOM2_THRESHOLD 51
-#define SQR_TOOM3_THRESHOLD 78
-#define SQR_TOOM4_THRESHOLD 100
-#define SQR_TOOM6_THRESHOLD 150
-#define SQR_TOOM8_THRESHOLD 309
-
-#define MULMOD_BNM1_THRESHOLD 5
-#define SQRMOD_BNM1_THRESHOLD 7
-
-#define MUL_FFT_MODF_THRESHOLD 348 /* k = 5 */
+#define BMOD_1_TO_MOD_1_THRESHOLD 40
+
+#define MUL_TOOM22_THRESHOLD 21
+#define MUL_TOOM33_THRESHOLD 24
+#define MUL_TOOM44_THRESHOLD 70
+#define MUL_TOOM6H_THRESHOLD 262
+#define MUL_TOOM8H_THRESHOLD 393
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 49
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 126
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 85
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 94
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 70
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 24
+#define SQR_TOOM3_THRESHOLD 81
+#define SQR_TOOM4_THRESHOLD 142
+#define SQR_TOOM6_THRESHOLD 189
+#define SQR_TOOM8_THRESHOLD 284
+
+#define MULMID_TOOM42_THRESHOLD 36
+
+#define MULMOD_BNM1_THRESHOLD 12
+#define SQRMOD_BNM1_THRESHOLD 15
+
+#define MUL_FFT_MODF_THRESHOLD 304 /* k = 5 */
#define MUL_FFT_TABLE3 \
{ { 348, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
{ 10, 5}, { 21, 6}, { 21, 7}, { 11, 6}, \
{ 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
{1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
#define MUL_FFT_TABLE3_SIZE 208
-#define MUL_FFT_THRESHOLD 3712
+#define MUL_FFT_THRESHOLD 4224
-#define SQR_FFT_MODF_THRESHOLD 272 /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD 284 /* k = 5 */
#define SQR_FFT_TABLE3 \
{ { 272, 5}, { 15, 6}, { 8, 5}, { 17, 6}, \
{ 19, 7}, { 17, 8}, { 9, 7}, { 21, 8}, \
{ 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
{4194304,23}, {8388608,24} }
#define SQR_FFT_TABLE3_SIZE 190
-#define SQR_FFT_THRESHOLD 2752
+#define SQR_FFT_THRESHOLD 3264
-#define MULLO_BASECASE_THRESHOLD 5
-#define MULLO_DC_THRESHOLD 25
-#define MULLO_MUL_N_THRESHOLD 6633
+#define MULLO_BASECASE_THRESHOLD 6
+#define MULLO_DC_THRESHOLD 60
+#define MULLO_MUL_N_THRESHOLD 7463
-#define DC_DIV_QR_THRESHOLD 29
-#define DC_DIVAPPR_Q_THRESHOLD 102
-#define DC_BDIV_QR_THRESHOLD 47
-#define DC_BDIV_Q_THRESHOLD 112
+#define DC_DIV_QR_THRESHOLD 58
+#define DC_DIVAPPR_Q_THRESHOLD 232
+#define DC_BDIV_QR_THRESHOLD 78
+#define DC_BDIV_Q_THRESHOLD 238
-#define INV_MULMOD_BNM1_THRESHOLD 76
-#define INV_NEWTON_THRESHOLD 129
-#define INV_APPR_THRESHOLD 109
+#define INV_MULMOD_BNM1_THRESHOLD 92
+#define INV_NEWTON_THRESHOLD 155
+#define INV_APPR_THRESHOLD 157
-#define BINV_NEWTON_THRESHOLD 197
-#define REDC_1_TO_REDC_N_THRESHOLD 54
+#define BINV_NEWTON_THRESHOLD 155
+#define REDC_1_TO_REDC_N_THRESHOLD 61
-#define MU_DIV_QR_THRESHOLD 872
-#define MU_DIVAPPR_Q_THRESHOLD 855
-#define MUPI_DIV_QR_THRESHOLD 53
-#define MU_BDIV_QR_THRESHOLD 792
+#define MU_DIV_QR_THRESHOLD 998
+#define MU_DIVAPPR_Q_THRESHOLD 979
+#define MUPI_DIV_QR_THRESHOLD 79
+#define MU_BDIV_QR_THRESHOLD 823
#define MU_BDIV_Q_THRESHOLD 942
-#define MATRIX22_STRASSEN_THRESHOLD 15
-#define HGCD_THRESHOLD 86
-#define GCD_DC_THRESHOLD 241
-#define GCDEXT_DC_THRESHOLD 229
-#define JACOBI_BASE_METHOD 1
+#define MATRIX22_STRASSEN_THRESHOLD 14
+#define HGCD_THRESHOLD 74
+#define HGCD_APPR_THRESHOLD 155
+#define HGCD_REDUCE_THRESHOLD 2479
+#define GCD_DC_THRESHOLD 351
+#define GCDEXT_DC_THRESHOLD 288
+#define JACOBI_BASE_METHOD 4
#define GET_STR_DC_THRESHOLD 12
#define GET_STR_PRECOMPUTE_THRESHOLD 21
-#define SET_STR_DC_THRESHOLD 532
-#define SET_STR_PRECOMPUTE_THRESHOLD 1655
+#define SET_STR_DC_THRESHOLD 650
+#define SET_STR_PRECOMPUTE_THRESHOLD 1585
+
+#define FAC_DSC_THRESHOLD 662
+#define FAC_ODD_THRESHOLD 28
--- /dev/null
+dnl PowerPC-64 mpn_addmul_1 and mpn_submul_1 optimised for power6.
+
+dnl Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2008, 2010, 2011
+dnl Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C mpn_addmul_1 mpn_submul_1
+C cycles/limb cycles/limb
+C POWER3/PPC630 ? ?
+C POWER4/PPC970 ? ?
+C POWER5 ? ?
+C POWER6 12.25 12.8
+C POWER7 ? ?
+
+C TODO
+C * Reduce register usage.
+C * Schedule function entry code.
+C * Unroll more. 8-way unrolling would bring us to 10 c/l, 16-way unrolling
+C would bring us to 9 c/l.
+C * Handle n = 1 and perhaps n = 2 seperately, without saving any registers.
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`n', `r5')
+define(`v0', `r6')
+
+ifdef(`OPERATION_addmul_1',`
+ define(ADDSUBC, adde)
+ define(ADDSUB, addc)
+ define(func, mpn_addmul_1)
+ define(func_nc, mpn_addmul_1c) C FIXME: not really supported
+ define(AM, `$1')
+ define(SM, `')
+ define(CLRRSC, `addic $1, r0, 0')
+')
+ifdef(`OPERATION_submul_1',`
+ define(ADDSUBC, subfe)
+ define(ADDSUB, subfc)
+ define(func, mpn_submul_1)
+ define(func_nc, mpn_submul_1c) C FIXME: not really supported
+ define(AM, `')
+ define(SM, `$1')
+ define(CLRRSC, `subfc $1, r0, r0')
+')
+
+ASM_START()
+PROLOGUE(func)
+ std r31, -8(r1)
+ std r30, -16(r1)
+ std r29, -24(r1)
+ std r28, -32(r1)
+ std r27, -40(r1)
+
+ rldicl. r0, n, 0,62 C r0 = n & 3, set cr0
+ cmpdi cr6, r0, 2
+ addi n, n, 3 C compute count...
+ srdi n, n, 2 C ...for ctr
+ mtctr n C copy loop count into ctr
+ beq cr0, L(b0)
+ blt cr6, L(b1)
+ beq cr6, L(b2)
+
+L(b3): ld r8, 0(up)
+ ld r7, 8(up)
+ ld r27, 16(up)
+ addi up, up, 16
+ addi rp, rp, 16
+ mulld r5, r8, v0
+ mulhdu r8, r8, v0
+ mulld r9, r7, v0
+ mulhdu r7, r7, v0
+ mulld r11, r27, v0
+ mulhdu r27, r27, v0
+ ld r29, -16(rp)
+ ld r30, -8(rp)
+ ld r31, 0(rp)
+ addc r9, r9, r8
+ adde r11, r11, r7
+ addze r12, r27
+ ADDSUB r5, r5, r29
+ b L(l3)
+
+L(b2): ld r7, 0(up)
+ ld r27, 8(up)
+ addi up, up, 8
+ addi rp, rp, 8
+ mulld r9, r7, v0
+ mulhdu r7, r7, v0
+ mulld r11, r27, v0
+ mulhdu r27, r27, v0
+ ld r30, -8(rp)
+ ld r31, 0(rp)
+ addc r11, r11, r7
+ addze r12, r27
+ ADDSUB r9, r9, r30
+ b L(l2)
+
+L(b1): ld r27, 0(up)
+ ld r31, 0(rp)
+ mulld r11, r27, v0
+ mulhdu r12, r27, v0
+ ADDSUB r11, r11, r31
+ b L(l1)
+
+L(b0): addi up, up, -8
+ addi rp, rp, -8
+ CLRRSC( r12) C clear r12 and clr/set cy
+
+ ALIGN(32)
+L(top):
+SM(` subfe r11, r0, r0') C complement...
+SM(` addic r11, r11, 1') C ...carry flag
+ ld r10, 8(up)
+ ld r8, 16(up)
+ ld r7, 24(up)
+ ld r27, 32(up)
+ addi up, up, 32
+ addi rp, rp, 32
+ mulld r0, r10, v0
+ mulhdu r10, r10, v0
+ mulld r5, r8, v0
+ mulhdu r8, r8, v0
+ mulld r9, r7, v0
+ mulhdu r7, r7, v0
+ mulld r11, r27, v0
+ mulhdu r27, r27, v0
+ ld r28, -24(rp)
+ adde r0, r0, r12
+ ld r29, -16(rp)
+ adde r5, r5, r10
+ ld r30, -8(rp)
+ ld r31, 0(rp)
+ adde r9, r9, r8
+ adde r11, r11, r7
+ addze r12, r27
+ ADDSUB r0, r0, r28
+ std r0, -24(rp)
+ ADDSUBC r5, r5, r29
+L(l3): std r5, -16(rp)
+ ADDSUBC r9, r9, r30
+L(l2): std r9, -8(rp)
+ ADDSUBC r11, r11, r31
+L(l1): std r11, 0(rp)
+ bdnz L(top)
+
+AM(` addze r3, r12')
+SM(` subfe r11, r0, r0') C complement...
+ ld r31, -8(r1)
+SM(` subf r3, r11, r12')
+ ld r30, -16(r1)
+ ld r29, -24(r1)
+ ld r28, -32(r1)
+ ld r27, -40(r1)
+ blr
+EPILOGUE()
-/* gmp-mparam.h -- Compiler/machine parameter header file.
+/* POWER6 gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2009, 2010, 2012 Free
-Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2009, 2010, 2011
+Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* never */
-#define MOD_1U_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD MP_SIZE_T_MAX
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD MP_SIZE_T_MAX
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 55
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 12
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 6
#define USE_PREINV_DIVREM_1 0
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
-
-#define MUL_TOOM22_THRESHOLD 14
-#define MUL_TOOM33_THRESHOLD 37
-#define MUL_TOOM44_THRESHOLD 160
-#define MUL_TOOM6H_THRESHOLD 177
-#define MUL_TOOM8H_THRESHOLD 321
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 86
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 103
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 90
-
-#define SQR_BASECASE_THRESHOLD 5
-#define SQR_TOOM2_THRESHOLD 22
-#define SQR_TOOM3_THRESHOLD 43
-#define SQR_TOOM4_THRESHOLD 296
-#define SQR_TOOM6_THRESHOLD 309
-#define SQR_TOOM8_THRESHOLD 562
-
-#define MULMOD_BNM1_THRESHOLD 12
+#define BMOD_1_TO_MOD_1_THRESHOLD 21
+
+#define MUL_TOOM22_THRESHOLD 20
+#define MUL_TOOM33_THRESHOLD 50
+#define MUL_TOOM44_THRESHOLD 106
+#define MUL_TOOM6H_THRESHOLD 274
+#define MUL_TOOM8H_THRESHOLD 339
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 62
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 76
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 73
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 66
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 88
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 24
+#define SQR_TOOM3_THRESHOLD 49
+#define SQR_TOOM4_THRESHOLD 130
+#define SQR_TOOM6_THRESHOLD 226
+#define SQR_TOOM8_THRESHOLD 272
+
+#define MULMID_TOOM42_THRESHOLD 36
+
+#define MULMOD_BNM1_THRESHOLD 14
#define SQRMOD_BNM1_THRESHOLD 14
-#define MUL_FFT_MODF_THRESHOLD 272 /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD 380 /* k = 5 */
#define MUL_FFT_TABLE3 \
- { { 272, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
- { 8, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
- { 13, 7}, { 7, 6}, { 17, 7}, { 9, 6}, \
- { 19, 7}, { 17, 8}, { 9, 7}, { 20, 8}, \
- { 11, 7}, { 24, 8}, { 21, 9}, { 11, 8}, \
- { 25, 9}, { 15, 8}, { 33, 9}, { 19, 8}, \
- { 39, 9}, { 23, 8}, { 47,10}, { 15, 9}, \
- { 39,10}, { 23, 9}, { 47,11}, { 15,10}, \
- { 31, 9}, { 63,10}, { 47,11}, { 31,10}, \
- { 71,11}, { 47,12}, { 31,11}, { 63,10}, \
- { 127, 9}, { 255, 8}, { 511,10}, { 143,11}, \
- { 79,10}, { 159, 9}, { 319, 8}, { 639,10}, \
- { 175, 9}, { 351,11}, { 95,10}, { 191, 9}, \
- { 383,12}, { 63,11}, { 127,10}, { 255, 9}, \
- { 511,11}, { 143,10}, { 287, 9}, { 575,11}, \
- { 159,10}, { 319, 9}, { 639,11}, { 175,10}, \
- { 351,12}, { 95,11}, { 191,10}, { 383, 9}, \
- { 767,13}, { 63,12}, { 127,11}, { 255,10}, \
- { 511, 9}, { 1023,11}, { 271,10}, { 543,11}, \
- { 287,10}, { 575,12}, { 159,11}, { 319,10}, \
- { 639,11}, { 351,10}, { 703, 9}, { 1407,12}, \
- { 191,11}, { 383,10}, { 767,11}, { 415,10}, \
- { 831, 9}, { 1663,12}, { 223,11}, { 447,10}, \
- { 959, 9}, { 1919,13}, { 127,12}, { 255,11}, \
- { 511,10}, { 1023,12}, { 287,11}, { 575,10}, \
- { 1151,12}, { 319,11}, { 639,12}, { 351,11}, \
- { 703,10}, { 1407,13}, { 191,12}, { 383,11}, \
- { 767,12}, { 415,11}, { 831,10}, { 1663,12}, \
- { 447,11}, { 959,10}, { 1919, 9}, { 3839,13}, \
- { 255,12}, { 511,11}, { 1023,12}, { 543,11}, \
- { 1087,10}, { 2175,12}, { 575,11}, { 1151,13}, \
- { 319,12}, { 639,11}, { 1279,12}, { 703,11}, \
- { 1407,10}, { 2815,13}, { 383,12}, { 831,11}, \
- { 1663,13}, { 447,12}, { 959,11}, { 1919,10}, \
- { 3839,14}, { 255,13}, { 511,12}, { 1087,11}, \
- { 2175,13}, { 575,12}, { 1151,13}, { 639,12}, \
- { 1279,13}, { 703,12}, { 1407,11}, { 2815,14}, \
- { 383,13}, { 831,12}, { 1663,13}, { 959,12}, \
- { 1919,11}, { 3839,15}, { 255,14}, { 511,13}, \
- { 1087,12}, { 2175,13}, { 1151,14}, { 639,13}, \
- { 1407,12}, { 2815,13}, { 1471,14}, { 767,13}, \
- { 1663,14}, { 895,13}, { 1919,12}, { 3839,11}, \
- { 7679,15}, { 32768,16}, { 65536,17}, { 131072,18}, \
- { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
- {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 178
-#define MUL_FFT_THRESHOLD 1856
-
-#define SQR_FFT_MODF_THRESHOLD 208 /* k = 5 */
+ { { 340, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \
+ { 11, 5}, { 23, 6}, { 21, 7}, { 11, 6}, \
+ { 23, 7}, { 12, 6}, { 25, 7}, { 21, 8}, \
+ { 11, 7}, { 24, 8}, { 13, 7}, { 27, 8}, \
+ { 21, 9}, { 11, 8}, { 25, 9}, { 15, 8}, \
+ { 33, 9}, { 23, 8}, { 47, 9}, { 27,10}, \
+ { 15, 9}, { 39,10}, { 23, 9}, { 47,11}, \
+ { 15,10}, { 31, 9}, { 63,10}, { 47,11}, \
+ { 31,10}, { 71,11}, { 47,12}, { 31,11}, \
+ { 63,10}, { 127, 9}, { 255, 8}, { 511,10}, \
+ { 135, 9}, { 271,11}, { 79, 9}, { 319, 8}, \
+ { 639,10}, { 175,11}, { 95,10}, { 191, 9}, \
+ { 383,10}, { 207,12}, { 63,10}, { 255, 9}, \
+ { 511,10}, { 271, 9}, { 543,11}, { 143,10}, \
+ { 287, 9}, { 575,10}, { 303, 9}, { 607,10}, \
+ { 319, 9}, { 639,11}, { 175,12}, { 95,11}, \
+ { 191,10}, { 383,11}, { 207,10}, { 415,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16}, { 65536,17}, \
+ { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+ {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 79
+#define MUL_FFT_THRESHOLD 3520
+
+#define SQR_FFT_MODF_THRESHOLD 308 /* k = 5 */
#define SQR_FFT_TABLE3 \
- { { 208, 5}, { 7, 4}, { 15, 5}, { 13, 6}, \
- { 7, 5}, { 15, 6}, { 8, 5}, { 17, 6}, \
- { 17, 7}, { 9, 6}, { 19, 7}, { 17, 8}, \
- { 9, 7}, { 20, 8}, { 11, 7}, { 23, 8}, \
- { 19, 9}, { 11, 8}, { 25, 9}, { 15, 8}, \
+ { { 280, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
+ { 21, 7}, { 11, 6}, { 23, 7}, { 21, 8}, \
+ { 11, 7}, { 24, 8}, { 13, 7}, { 27, 8}, \
+ { 21, 9}, { 11, 8}, { 25, 9}, { 15, 8}, \
{ 33, 9}, { 19, 8}, { 39, 9}, { 23, 8}, \
- { 47,10}, { 15, 9}, { 39,10}, { 23, 9}, \
- { 47,11}, { 15,10}, { 31, 9}, { 63,10}, \
- { 47,11}, { 31,10}, { 63, 9}, { 127, 8}, \
- { 255,10}, { 71, 9}, { 143, 8}, { 287,11}, \
- { 47,12}, { 31,11}, { 63,10}, { 127, 9}, \
- { 255, 8}, { 511,10}, { 143, 9}, { 287,11}, \
- { 79,10}, { 159, 9}, { 319, 8}, { 639,10}, \
- { 175, 9}, { 351, 8}, { 703,11}, { 95,10}, \
- { 191, 9}, { 383, 8}, { 767,10}, { 207, 9}, \
- { 415,12}, { 63,11}, { 127,10}, { 255, 9}, \
- { 511, 8}, { 1023,11}, { 143,10}, { 287, 9}, \
- { 575,11}, { 159,10}, { 319, 9}, { 639,11}, \
- { 175,10}, { 351, 9}, { 703,12}, { 95,11}, \
- { 191,10}, { 383, 9}, { 767,11}, { 207,10}, \
- { 415,13}, { 63,12}, { 127,11}, { 255,10}, \
- { 511, 9}, { 1023,11}, { 287,10}, { 575,12}, \
- { 159,11}, { 319,10}, { 639,11}, { 351,10}, \
- { 703,12}, { 191,11}, { 383,10}, { 767,11}, \
- { 415,10}, { 831, 9}, { 1663,12}, { 223,11}, \
- { 447,10}, { 895,13}, { 127,12}, { 255,11}, \
- { 511,10}, { 1023,12}, { 287,11}, { 575,10}, \
- { 1151,12}, { 319,11}, { 639,12}, { 351,11}, \
- { 703,10}, { 1407,13}, { 191,12}, { 383,11}, \
- { 767,12}, { 415,11}, { 831,10}, { 1663,12}, \
- { 447,11}, { 959,14}, { 127,13}, { 255,12}, \
- { 511,11}, { 1023,12}, { 543,11}, { 1087,10}, \
- { 2175,12}, { 575,11}, { 1151,13}, { 319,12}, \
- { 639,11}, { 1279,12}, { 703,11}, { 1407,13}, \
- { 383,12}, { 831,11}, { 1663,13}, { 447,12}, \
- { 959,14}, { 255,13}, { 511,12}, { 1087,11}, \
- { 2175,13}, { 575,12}, { 1215,13}, { 639,12}, \
- { 1279,13}, { 703,12}, { 1407,14}, { 383,13}, \
- { 831,12}, { 1663,13}, { 959,15}, { 255,14}, \
- { 511,13}, { 1087,12}, { 2303,13}, { 1215,14}, \
- { 639,13}, { 1407,12}, { 2815,14}, { 767,13}, \
- { 1663,14}, { 895,13}, { 1919,12}, { 3839,15}, \
- { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
- { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
- {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 177
-#define SQR_FFT_THRESHOLD 1856
-
-#define MULLO_BASECASE_THRESHOLD 3
-#define MULLO_DC_THRESHOLD 37
-#define MULLO_MUL_N_THRESHOLD 3574
-
-#define DC_DIV_QR_THRESHOLD 23
-#define DC_DIVAPPR_Q_THRESHOLD 95
-#define DC_BDIV_QR_THRESHOLD 41
-#define DC_BDIV_Q_THRESHOLD 90
-
-#define INV_MULMOD_BNM1_THRESHOLD 45
-#define INV_NEWTON_THRESHOLD 85
-#define INV_APPR_THRESHOLD 85
-
-#define BINV_NEWTON_THRESHOLD 151
-#define REDC_1_TO_REDC_N_THRESHOLD 43
-
-#define MU_DIV_QR_THRESHOLD 748
-#define MU_DIVAPPR_Q_THRESHOLD 1210
-#define MUPI_DIV_QR_THRESHOLD 42
-#define MU_BDIV_QR_THRESHOLD 618
-#define MU_BDIV_Q_THRESHOLD 807
-
-#define MATRIX22_STRASSEN_THRESHOLD 10
-#define HGCD_THRESHOLD 77
-#define GCD_DC_THRESHOLD 358
-#define GCDEXT_DC_THRESHOLD 241
-#define JACOBI_BASE_METHOD 3
-
-#define GET_STR_DC_THRESHOLD 12
-#define GET_STR_PRECOMPUTE_THRESHOLD 25
-#define SET_STR_DC_THRESHOLD 552
-#define SET_STR_PRECOMPUTE_THRESHOLD 1416
+ { 47, 9}, { 27,10}, { 15, 9}, { 39,10}, \
+ { 23, 9}, { 47,11}, { 15,10}, { 31, 9}, \
+ { 63,10}, { 47,11}, { 31,10}, { 71, 9}, \
+ { 143,11}, { 47,12}, { 31,11}, { 63, 9}, \
+ { 255, 8}, { 511, 9}, { 271,10}, { 143,11}, \
+ { 79,10}, { 159, 9}, { 319,10}, { 175, 9}, \
+ { 351,11}, { 95,10}, { 191, 9}, { 383,10}, \
+ { 207,12}, { 63,11}, { 127,10}, { 255, 9}, \
+ { 511, 8}, { 1023,10}, { 271, 9}, { 543,11}, \
+ { 143,10}, { 287, 9}, { 575,11}, { 159,10}, \
+ { 319, 9}, { 639,11}, { 175,10}, { 351,12}, \
+ { 95,11}, { 191,10}, { 383,11}, { 207,10}, \
+ { 415,13}, { 8192,14}, { 16384,15}, { 32768,16}, \
+ { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+ {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 80
+#define SQR_FFT_THRESHOLD 2752
+
+#define MULLO_BASECASE_THRESHOLD 5
+#define MULLO_DC_THRESHOLD 62
+#define MULLO_MUL_N_THRESHOLD 2995
+
+#define DC_DIV_QR_THRESHOLD 59
+#define DC_DIVAPPR_Q_THRESHOLD 200
+#define DC_BDIV_QR_THRESHOLD 70
+#define DC_BDIV_Q_THRESHOLD 168
+
+#define INV_MULMOD_BNM1_THRESHOLD 53
+#define INV_NEWTON_THRESHOLD 170
+#define INV_APPR_THRESHOLD 166
+
+#define BINV_NEWTON_THRESHOLD 220
+#define REDC_1_TO_REDC_N_THRESHOLD 67
+
+#define MU_DIV_QR_THRESHOLD 998
+#define MU_DIVAPPR_Q_THRESHOLD 942
+#define MUPI_DIV_QR_THRESHOLD 57
+#define MU_BDIV_QR_THRESHOLD 889
+#define MU_BDIV_Q_THRESHOLD 1078
+
+#define POWM_SEC_TABLE 4,26,216,804,1731
+
+#define MATRIX22_STRASSEN_THRESHOLD 13
+#define HGCD_THRESHOLD 106
+#define HGCD_APPR_THRESHOLD 109
+#define HGCD_REDUCE_THRESHOLD 2205
+#define GCD_DC_THRESHOLD 492
+#define GCDEXT_DC_THRESHOLD 327
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 16
+#define GET_STR_PRECOMPUTE_THRESHOLD 28
+#define SET_STR_DC_THRESHOLD 537
+#define SET_STR_PRECOMPUTE_THRESHOLD 1576
+
+#define FAC_DSC_THRESHOLD 426
+#define FAC_ODD_THRESHOLD 0 /* always */
--- /dev/null
+dnl PowerPC-64 mpn_mul_basecase.
+
+dnl Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2008, 2010 Free
+dnl Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C POWER3/PPC630 ?
+C POWER4/PPC970 ?
+C POWER5 ?
+C POWER6 12.25
+
+C TODO
+C * Reduce register usage. At least 4 register less can be used.
+C * Unroll more. 8-way unrolling would bring us to 10 c/l, 16-way unrolling
+C would bring us to 9 c/l.
+C * The bdz insns for b1 and b2 will never branch,
+C * Align things better, perhaps by moving things like pointer updates from
+C before to after loops.
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`un', `r5')
+define(`vp', `r6')
+define(`vn', `r7')
+
+define(`v0', `r25')
+define(`outer_rp', `r22')
+define(`outer_up', `r23')
+
+ASM_START()
+PROLOGUE(mpn_mul_basecase)
+
+C Special code for un <= 2, for efficiency of these important cases,
+C and since it simplifies the default code.
+ cmpdi cr0, un, 2
+ bgt cr0, L(un_gt2)
+ cmpdi cr6, vn, 1
+ ld r7, 0(vp)
+ ld r5, 0(up)
+ mulld r8, r5, r7 C weight 0
+ mulhdu r9, r5, r7 C weight 1
+ std r8, 0(rp)
+ beq cr0, L(2x)
+ std r9, 8(rp)
+ blr
+ ALIGN(16)
+L(2x): ld r0, 8(up)
+ mulld r8, r0, r7 C weight 1
+ mulhdu r10, r0, r7 C weight 2
+ addc r9, r9, r8
+ addze r10, r10
+ bne cr6, L(2x2)
+ std r9, 8(rp)
+ std r10, 16(rp)
+ blr
+ ALIGN(16)
+L(2x2): ld r6, 8(vp)
+ nop
+ mulld r8, r5, r6 C weight 1
+ mulhdu r11, r5, r6 C weight 2
+ mulld r12, r0, r6 C weight 2
+ mulhdu r0, r0, r6 C weight 3
+ addc r9, r9, r8
+ std r9, 8(rp)
+ adde r11, r11, r10
+ addze r0, r0
+ addc r11, r11, r12
+ addze r0, r0
+ std r11, 16(rp)
+ std r0, 24(rp)
+ blr
+
+L(un_gt2):
+ std r31, -8(r1)
+ std r30, -16(r1)
+ std r29, -24(r1)
+ std r28, -32(r1)
+ std r27, -40(r1)
+ std r26, -48(r1)
+ std r25, -56(r1)
+ std r24, -64(r1)
+ std r23, -72(r1)
+ std r22, -80(r1)
+ std r21, -88(r1)
+ std r20, -96(r1)
+
+ mr outer_rp, rp
+ mr outer_up, up
+
+ ld v0, 0(vp) C new v limb
+ addi vp, vp, 8
+ ld r26, 0(up)
+
+ rldicl. r0, un, 0,62 C r0 = n & 3, set cr0
+ cmpdi cr6, r0, 2
+ addi un, un, 4 C compute count...
+ srdi un, un, 2 C ...for ctr
+ mtctr un C copy inner loop count into ctr
+ beq cr0, L(b0)
+ blt cr6, L(b1)
+ beq cr6, L(b2)
+
+
+ ALIGN(16)
+L(b3):
+ ld r27, 8(up)
+ ld r20, 16(up)
+ mulld r0, r26, v0
+ mulhdu r31, r26, v0
+ mulld r24, r27, v0
+ mulhdu r8, r27, v0
+ mulld r9, r20, v0
+ mulhdu r10, r20, v0
+ addc r24, r24, r31
+ adde r9, r9, r8
+ addze r12, r10
+ std r0, 0(rp)
+ std r24, 8(rp)
+ std r9, 16(rp)
+ addi up, up, 16
+ addi rp, rp, 16
+ bdz L(end_m_3)
+
+ ALIGN(32)
+L(lo_m_3):
+ ld r26, 8(up)
+ ld r27, 16(up)
+ ld r20, 24(up)
+ ld r21, 32(up)
+ mulld r0, r26, v0
+ mulhdu r31, r26, v0
+ mulld r24, r27, v0
+ mulhdu r8, r27, v0
+ mulld r9, r20, v0
+ mulhdu r27, r20, v0
+ mulld r11, r21, v0
+ mulhdu r26, r21, v0
+ adde r0, r0, r12
+ adde r24, r24, r31
+ std r0, 8(rp)
+ adde r9, r9, r8
+ std r24, 16(rp)
+ adde r11, r11, r27
+ std r9, 24(rp)
+ addi up, up, 32
+ std r11, 32(rp)
+ addi rp, rp, 32
+ mr r12, r26
+ bdnz L(lo_m_3)
+
+ ALIGN(16)
+L(end_m_3):
+ addze r12, r12
+ addic. vn, vn, -1
+ std r12, 8(rp)
+ beq L(ret)
+
+ ALIGN(16)
+L(outer_lo_3):
+ mtctr un C copy inner loop count into ctr
+ addi rp, outer_rp, 24
+ addi up, outer_up, 16
+ addi outer_rp, outer_rp, 8
+ ld v0, 0(vp) C new v limb
+ addi vp, vp, 8
+ ld r26, -16(up)
+ ld r27, -8(up)
+ ld r20, 0(up)
+ mulld r0, r26, v0
+ mulhdu r31, r26, v0
+ mulld r24, r27, v0
+ mulhdu r8, r27, v0
+ mulld r9, r20, v0
+ mulhdu r10, r20, v0
+ ld r28, -16(rp)
+ ld r29, -8(rp)
+ ld r30, 0(rp)
+ addc r24, r24, r31
+ adde r9, r9, r8
+ addze r12, r10
+ addc r0, r0, r28
+ std r0, -16(rp)
+ adde r24, r24, r29
+ std r24, -8(rp)
+ adde r9, r9, r30
+ std r9, 0(rp)
+ bdz L(end_3)
+
+ ALIGN(32) C registers dying
+L(lo_3):
+ ld r26, 8(up)
+ ld r27, 16(up)
+ ld r20, 24(up) C
+ ld r21, 32(up) C
+ addi up, up, 32 C
+ addi rp, rp, 32 C
+ mulld r0, r26, v0 C
+ mulhdu r10, r26, v0 C 26
+ mulld r24, r27, v0 C
+ mulhdu r8, r27, v0 C 27
+ mulld r9, r20, v0 C
+ mulhdu r27, r20, v0 C 26
+ mulld r11, r21, v0 C
+ mulhdu r26, r21, v0 C 27
+ ld r28, -24(rp) C
+ adde r0, r0, r12 C 0 12
+ ld r29, -16(rp) C
+ adde r24, r24, r10 C 24 10
+ ld r30, -8(rp) C
+ ld r31, 0(rp) C
+ adde r9, r9, r8 C 8 9
+ adde r11, r11, r27 C 27 11
+ addze r12, r26 C 26
+ addc r0, r0, r28 C 0 28
+ std r0, -24(rp) C 0
+ adde r24, r24, r29 C 7 29
+ std r24, -16(rp) C 7
+ adde r9, r9, r30 C 9 30
+ std r9, -8(rp) C 9
+ adde r11, r11, r31 C 11 31
+ std r11, 0(rp) C 11
+ bdnz L(lo_3) C
+
+ ALIGN(16)
+L(end_3):
+ addze r12, r12
+ addic. vn, vn, -1
+ std r12, 8(rp)
+ bne L(outer_lo_3)
+ b L(ret)
+
+
+ ALIGN(16)
+L(b1):
+ mulld r0, r26, v0
+ mulhdu r12, r26, v0
+ addic r0, r0, 0
+ std r0, 0(rp)
+ bdz L(end_m_1)
+
+ ALIGN(16)
+L(lo_m_1):
+ ld r26, 8(up)
+ ld r27, 16(up)
+ ld r20, 24(up)
+ ld r21, 32(up)
+ mulld r0, r26, v0
+ mulhdu r31, r26, v0
+ mulld r24, r27, v0
+ mulhdu r8, r27, v0
+ mulld r9, r20, v0
+ mulhdu r27, r20, v0
+ mulld r11, r21, v0
+ mulhdu r26, r21, v0
+ adde r0, r0, r12
+ adde r24, r24, r31
+ std r0, 8(rp)
+ adde r9, r9, r8
+ std r24, 16(rp)
+ adde r11, r11, r27
+ std r9, 24(rp)
+ addi up, up, 32
+ std r11, 32(rp)
+ addi rp, rp, 32
+ mr r12, r26
+ bdnz L(lo_m_1)
+
+ ALIGN(16)
+L(end_m_1):
+ addze r12, r12
+ addic. vn, vn, -1
+ std r12, 8(rp)
+ beq L(ret)
+
+ ALIGN(16)
+L(outer_lo_1):
+ mtctr un C copy inner loop count into ctr
+ addi rp, outer_rp, 8
+ mr up, outer_up
+ addi outer_rp, outer_rp, 8
+ ld v0, 0(vp) C new v limb
+ addi vp, vp, 8
+ ld r26, 0(up)
+ ld r28, 0(rp)
+ mulld r0, r26, v0
+ mulhdu r12, r26, v0
+ addc r0, r0, r28
+ std r0, 0(rp)
+ bdz L(end_1)
+
+ ALIGN(32) C registers dying
+L(lo_1):
+ ld r26, 8(up)
+ ld r27, 16(up)
+ ld r20, 24(up) C
+ ld r21, 32(up) C
+ addi up, up, 32 C
+ addi rp, rp, 32 C
+ mulld r0, r26, v0 C
+ mulhdu r10, r26, v0 C 26
+ mulld r24, r27, v0 C
+ mulhdu r8, r27, v0 C 27
+ mulld r9, r20, v0 C
+ mulhdu r27, r20, v0 C 26
+ mulld r11, r21, v0 C
+ mulhdu r26, r21, v0 C 27
+ ld r28, -24(rp) C
+ adde r0, r0, r12 C 0 12
+ ld r29, -16(rp) C
+ adde r24, r24, r10 C 24 10
+ ld r30, -8(rp) C
+ ld r31, 0(rp) C
+ adde r9, r9, r8 C 8 9
+ adde r11, r11, r27 C 27 11
+ addze r12, r26 C 26
+ addc r0, r0, r28 C 0 28
+ std r0, -24(rp) C 0
+ adde r24, r24, r29 C 7 29
+ std r24, -16(rp) C 7
+ adde r9, r9, r30 C 9 30
+ std r9, -8(rp) C 9
+ adde r11, r11, r31 C 11 31
+ std r11, 0(rp) C 11
+ bdnz L(lo_1) C
+
+ ALIGN(16)
+L(end_1):
+ addze r12, r12
+ addic. vn, vn, -1
+ std r12, 8(rp)
+ bne L(outer_lo_1)
+ b L(ret)
+
+
+ ALIGN(16)
+L(b0):
+ addi up, up, -8
+ addi rp, rp, -8
+ li r12, 0
+ addic r12, r12, 0
+ bdz L(end_m_0)
+
+ ALIGN(16)
+L(lo_m_0):
+ ld r26, 8(up)
+ ld r27, 16(up)
+ ld r20, 24(up)
+ ld r21, 32(up)
+ mulld r0, r26, v0
+ mulhdu r31, r26, v0
+ mulld r24, r27, v0
+ mulhdu r8, r27, v0
+ mulld r9, r20, v0
+ mulhdu r27, r20, v0
+ mulld r11, r21, v0
+ mulhdu r26, r21, v0
+ adde r0, r0, r12
+ adde r24, r24, r31
+ std r0, 8(rp)
+ adde r9, r9, r8
+ std r24, 16(rp)
+ adde r11, r11, r27
+ std r9, 24(rp)
+ addi up, up, 32
+ std r11, 32(rp)
+ addi rp, rp, 32
+ mr r12, r26
+ bdnz L(lo_m_0)
+
+ ALIGN(16)
+L(end_m_0):
+ addze r12, r12
+ addic. vn, vn, -1
+ std r12, 8(rp)
+ beq L(ret)
+
+ ALIGN(16)
+L(outer_lo_0):
+ mtctr un C copy inner loop count into ctr
+ addi rp, outer_rp, 0
+ addi up, outer_up, -8
+ addi outer_rp, outer_rp, 8
+ ld v0, 0(vp) C new v limb
+ addi vp, vp, 8
+ li r12, 0
+ addic r12, r12, 0
+ bdz L(end_0)
+
+ ALIGN(32) C registers dying
+L(lo_0):
+ ld r26, 8(up)
+ ld r27, 16(up)
+ ld r20, 24(up) C
+ ld r21, 32(up) C
+ addi up, up, 32 C
+ addi rp, rp, 32 C
+ mulld r0, r26, v0 C
+ mulhdu r10, r26, v0 C 26
+ mulld r24, r27, v0 C
+ mulhdu r8, r27, v0 C 27
+ mulld r9, r20, v0 C
+ mulhdu r27, r20, v0 C 26
+ mulld r11, r21, v0 C
+ mulhdu r26, r21, v0 C 27
+ ld r28, -24(rp) C
+ adde r0, r0, r12 C 0 12
+ ld r29, -16(rp) C
+ adde r24, r24, r10 C 24 10
+ ld r30, -8(rp) C
+ ld r31, 0(rp) C
+ adde r9, r9, r8 C 8 9
+ adde r11, r11, r27 C 27 11
+ addze r12, r26 C 26
+ addc r0, r0, r28 C 0 28
+ std r0, -24(rp) C 0
+ adde r24, r24, r29 C 7 29
+ std r24, -16(rp) C 7
+ adde r9, r9, r30 C 9 30
+ std r9, -8(rp) C 9
+ adde r11, r11, r31 C 11 31
+ std r11, 0(rp) C 11
+ bdnz L(lo_0) C
+
+ ALIGN(16)
+L(end_0):
+ addze r12, r12
+ addic. vn, vn, -1
+ std r12, 8(rp)
+ bne L(outer_lo_0)
+ b L(ret)
+
+
+ ALIGN(16)
+L(b2): ld r27, 8(up)
+ addi up, up, 8
+ mulld r0, r26, v0
+ mulhdu r10, r26, v0
+ mulld r24, r27, v0
+ mulhdu r8, r27, v0
+ addc r24, r24, r10
+ addze r12, r8
+ std r0, 0(rp)
+ std r24, 8(rp)
+ addi rp, rp, 8
+ bdz L(end_m_2)
+
+ ALIGN(16)
+L(lo_m_2):
+ ld r26, 8(up)
+ ld r27, 16(up)
+ ld r20, 24(up)
+ ld r21, 32(up)
+ mulld r0, r26, v0
+ mulhdu r31, r26, v0
+ mulld r24, r27, v0
+ mulhdu r8, r27, v0
+ mulld r9, r20, v0
+ mulhdu r27, r20, v0
+ mulld r11, r21, v0
+ mulhdu r26, r21, v0
+ adde r0, r0, r12
+ adde r24, r24, r31
+ std r0, 8(rp)
+ adde r9, r9, r8
+ std r24, 16(rp)
+ adde r11, r11, r27
+ std r9, 24(rp)
+ addi up, up, 32
+ std r11, 32(rp)
+ addi rp, rp, 32
+ mr r12, r26
+ bdnz L(lo_m_2)
+
+ ALIGN(16)
+L(end_m_2):
+ addze r12, r12
+ addic. vn, vn, -1
+ std r12, 8(rp)
+ beq L(ret)
+
+ ALIGN(16)
+L(outer_lo_2):
+ mtctr un C copy inner loop count into ctr
+ addi rp, outer_rp, 16
+ addi up, outer_up, 8
+ addi outer_rp, outer_rp, 8
+ ld v0, 0(vp) C new v limb
+ addi vp, vp, 8
+ ld r26, -8(up)
+ ld r27, 0(up)
+ ld r28, -8(rp)
+ ld r29, 0(rp)
+ mulld r0, r26, v0
+ mulhdu r10, r26, v0
+ mulld r24, r27, v0
+ mulhdu r8, r27, v0
+ addc r24, r24, r10
+ addze r12, r8
+ addc r0, r0, r28
+ std r0, -8(rp)
+ adde r24, r24, r29
+ std r24, 0(rp)
+ bdz L(end_2)
+
+ ALIGN(16) C registers dying
+L(lo_2):
+ ld r26, 8(up)
+ ld r27, 16(up)
+ ld r20, 24(up) C
+ ld r21, 32(up) C
+ addi up, up, 32 C
+ addi rp, rp, 32 C
+ mulld r0, r26, v0 C
+ mulhdu r10, r26, v0 C 26
+ mulld r24, r27, v0 C
+ mulhdu r8, r27, v0 C 27
+ mulld r9, r20, v0 C
+ mulhdu r27, r20, v0 C 26
+ mulld r11, r21, v0 C
+ mulhdu r26, r21, v0 C 27
+ ld r28, -24(rp) C
+ adde r0, r0, r12 C 0 12
+ ld r29, -16(rp) C
+ adde r24, r24, r10 C 24 10
+ ld r30, -8(rp) C
+ ld r31, 0(rp) C
+ adde r9, r9, r8 C 8 9
+ adde r11, r11, r27 C 27 11
+ addze r12, r26 C 26
+ addc r0, r0, r28 C 0 28
+ std r0, -24(rp) C 0
+ adde r24, r24, r29 C 7 29
+ std r24, -16(rp) C 7
+ adde r9, r9, r30 C 9 30
+ std r9, -8(rp) C 9
+ adde r11, r11, r31 C 11 31
+ std r11, 0(rp) C 11
+ bdnz L(lo_2) C
+
+ ALIGN(16)
+L(end_2):
+ addze r12, r12
+ addic. vn, vn, -1
+ std r12, 8(rp)
+ bne L(outer_lo_2)
+C b L(ret)
+
+L(ret): ld r31, -8(r1)
+ ld r30, -16(r1)
+ ld r29, -24(r1)
+ ld r28, -32(r1)
+ ld r27, -40(r1)
+ ld r26, -48(r1)
+ ld r25, -56(r1)
+ ld r24, -64(r1)
+ ld r23, -72(r1)
+ ld r22, -80(r1)
+ ld r21, -88(r1)
+ ld r20, -96(r1)
+ blr
+EPILOGUE()
-/* gmp-mparam.h -- Compiler/machine parameter header file.
+/* POWER7 gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2009, 2010, 2012 Free
-Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2009, 2010, 2011
+Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 12
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 6
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 7
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 34
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 16
-#define USE_PREINV_DIVREM_1 1
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 8
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 12
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 33
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 17
+#define USE_PREINV_DIVREM_1 0
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 28
+#define BMOD_1_TO_MOD_1_THRESHOLD 38
#define MUL_TOOM22_THRESHOLD 22
#define MUL_TOOM33_THRESHOLD 73
-#define MUL_TOOM44_THRESHOLD 202
-#define MUL_TOOM6H_THRESHOLD 393
-#define MUL_TOOM8H_THRESHOLD 592
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 137
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 149
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 137
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 149
-
-#define SQR_BASECASE_THRESHOLD 18
-#define SQR_TOOM2_THRESHOLD 64
-#define SQR_TOOM3_THRESHOLD 89
-#define SQR_TOOM4_THRESHOLD 184
-#define SQR_TOOM6_THRESHOLD 294
-#define SQR_TOOM8_THRESHOLD 430
-
-#define MULMOD_BNM1_THRESHOLD 17
-#define SQRMOD_BNM1_THRESHOLD 13
-
-#define MUL_FFT_MODF_THRESHOLD 408 /* k = 5 */
+#define MUL_TOOM44_THRESHOLD 154
+#define MUL_TOOM6H_THRESHOLD 270
+#define MUL_TOOM8H_THRESHOLD 369
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 81
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 122
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 105
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 105
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 112
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 30
+#define SQR_TOOM3_THRESHOLD 109
+#define SQR_TOOM4_THRESHOLD 178
+#define SQR_TOOM6_THRESHOLD 303
+#define SQR_TOOM8_THRESHOLD 357
+
+#define MULMID_TOOM42_THRESHOLD 62
+
+#define MULMOD_BNM1_THRESHOLD 16
+#define SQRMOD_BNM1_THRESHOLD 18
+
+#define MUL_FFT_MODF_THRESHOLD 444 /* k = 5 */
#define MUL_FFT_TABLE3 \
- { { 408, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
- { 12, 5}, { 25, 6}, { 21, 7}, { 11, 6}, \
- { 23, 7}, { 12, 6}, { 25, 7}, { 13, 6}, \
- { 27, 7}, { 25, 8}, { 13, 7}, { 28, 8}, \
- { 15, 7}, { 31, 8}, { 17, 7}, { 35, 8}, \
- { 19, 7}, { 39, 8}, { 21, 9}, { 11, 8}, \
- { 27, 9}, { 15, 8}, { 35, 9}, { 19, 8}, \
- { 43, 9}, { 23, 8}, { 49, 9}, { 27,10}, \
- { 15, 9}, { 31, 8}, { 63, 9}, { 43,10}, \
- { 23, 9}, { 51,11}, { 15,10}, { 31, 9}, \
- { 67,10}, { 39, 9}, { 79,10}, { 55,11}, \
+ { { 436, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \
+ { 21, 7}, { 11, 6}, { 23, 7}, { 12, 6}, \
+ { 25, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \
+ { 31, 7}, { 21, 8}, { 11, 7}, { 25, 8}, \
+ { 13, 7}, { 28, 8}, { 15, 7}, { 32, 8}, \
+ { 17, 7}, { 35, 8}, { 19, 7}, { 39, 8}, \
+ { 21, 9}, { 11, 8}, { 29, 9}, { 15, 8}, \
+ { 35, 9}, { 19, 8}, { 41, 9}, { 23, 8}, \
+ { 47, 9}, { 27,10}, { 15, 9}, { 31, 8}, \
+ { 63, 9}, { 43,10}, { 23, 9}, { 51,11}, \
+ { 15,10}, { 31, 9}, { 67,10}, { 39, 9}, \
+ { 79,10}, { 47, 9}, { 95,10}, { 55,11}, \
{ 31,10}, { 79,11}, { 47,10}, { 95,12}, \
{ 31,11}, { 63,10}, { 135,11}, { 79,10}, \
- { 167,11}, { 95,10}, { 191,11}, { 111,12}, \
- { 63,11}, { 127,10}, { 255, 9}, { 511,11}, \
- { 143, 7}, { 2303,10}, { 303,11}, { 159,10}, \
- { 319, 9}, { 639,12}, { 95,11}, { 191,10}, \
- { 383,13}, { 63,12}, { 127,11}, { 255,10}, \
- { 511,11}, { 271,10}, { 543,11}, { 287,10}, \
- { 575,12}, { 159,11}, { 319,10}, { 639,11}, \
- { 335,10}, { 671, 9}, { 1343,11}, { 351,10}, \
- { 703,12}, { 191,11}, { 383,10}, { 799,11}, \
- { 415,10}, { 831,12}, { 223,11}, { 447,13}, \
- { 127,12}, { 255,11}, { 511,10}, { 1023,11}, \
- { 543,10}, { 1087,12}, { 287,11}, { 575,10}, \
- { 1151,11}, { 607,10}, { 1215,12}, { 319,11}, \
- { 639,10}, { 1279,11}, { 671,10}, { 1343,12}, \
- { 351,11}, { 703,13}, { 191,12}, { 383,11}, \
- { 799,10}, { 1599,12}, { 415,11}, { 831,10}, \
- { 1663,12}, { 447,11}, { 895,14}, { 127,13}, \
- { 255,12}, { 543,13}, { 319,12}, { 671,11}, \
- { 1343,12}, { 703,11}, { 1407,12}, { 735,13}, \
- { 447,12}, { 959,11}, { 1919,14}, { 255,12}, \
- { 1087,13}, { 575,12}, { 1215,13}, { 639,12}, \
- { 1343,11}, { 2687,12}, { 1471,14}, { 383,13}, \
- { 767,12}, { 1599,13}, { 831,10}, { 6655,12}, \
- { 1727,13}, { 959,12}, { 1919,11}, { 3839,14}, \
- { 511,11}, { 4095,13}, { 1087,12}, { 2303,13}, \
- { 1215,12}, { 2431,14}, { 639,13}, { 1343,12}, \
- { 2687,13}, { 1471,12}, { 2943,14}, { 767,13}, \
- { 1599,12}, { 3199,13}, { 1663,14}, { 895,13}, \
- { 1919,12}, { 3839,15}, { 511,14}, { 1023,13}, \
- { 2175,14}, { 1151,13}, { 2431,12}, { 4863,14}, \
- { 1407,13}, { 2943,15}, { 767,14}, { 1663,13}, \
- { 3327,12}, { 6655,14}, { 1919,13}, { 3839,16}, \
- { 511,15}, { 1023,14}, { 2175,13}, { 4351,14}, \
- { 2303,12}, { 9215,13}, { 4863,15}, { 1279,13}, \
- { 5119,14}, { 2815,13}, { 5887,15}, { 1535,14}, \
+ { 159,11}, { 95,10}, { 191,11}, { 111,12}, \
+ { 63,11}, { 127,10}, { 255,11}, { 143,10}, \
+ { 287, 9}, { 575,10}, { 303,11}, { 159,12}, \
+ { 95,11}, { 191,10}, { 383,13}, { 63,12}, \
+ { 127,11}, { 255,10}, { 511,11}, { 271,10}, \
+ { 543, 9}, { 1087,11}, { 287,10}, { 575,11}, \
+ { 303,12}, { 159,11}, { 319,10}, { 639,11}, \
+ { 335,10}, { 671,11}, { 351,10}, { 703,12}, \
+ { 191,11}, { 383,10}, { 767,11}, { 415,10}, \
+ { 831,12}, { 223,11}, { 447,13}, { 8192,14}, \
{ 16384,15}, { 32768,16}, { 65536,17}, { 131072,18}, \
{ 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
{4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 202
-#define MUL_FFT_THRESHOLD 3712
+#define MUL_FFT_TABLE3_SIZE 106
+#define MUL_FFT_THRESHOLD 5248
-#define SQR_FFT_MODF_THRESHOLD 332 /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD 380 /* k = 5 */
#define SQR_FFT_TABLE3 \
- { { 332, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \
+ { { 308, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
{ 21, 7}, { 11, 6}, { 23, 7}, { 21, 8}, \
{ 11, 7}, { 24, 8}, { 13, 7}, { 27, 8}, \
{ 15, 7}, { 31, 8}, { 21, 9}, { 11, 8}, \
{ 27, 9}, { 15, 8}, { 33, 9}, { 19, 8}, \
- { 39, 9}, { 23, 8}, { 47, 9}, { 27,10}, \
+ { 41, 9}, { 23, 8}, { 47, 9}, { 27,10}, \
{ 15, 9}, { 39,10}, { 23, 9}, { 47,11}, \
{ 15,10}, { 31, 9}, { 67,10}, { 39, 9}, \
- { 79,10}, { 55,11}, { 31,10}, { 79,11}, \
+ { 79,10}, { 47,11}, { 31,10}, { 79,11}, \
{ 47,10}, { 95,12}, { 31,11}, { 63,10}, \
- { 127, 9}, { 255,11}, { 79,10}, { 159, 9}, \
- { 319,11}, { 95,10}, { 191, 9}, { 383,12}, \
- { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \
- { 271, 9}, { 543,11}, { 143,10}, { 287, 9}, \
- { 575,10}, { 303, 9}, { 607,10}, { 319, 9}, \
- { 639,12}, { 95,11}, { 191,10}, { 383,13}, \
- { 63,12}, { 127,11}, { 255,10}, { 511,11}, \
- { 271,10}, { 543, 9}, { 1087,11}, { 287,10}, \
- { 575,11}, { 303,10}, { 607, 9}, { 1215,11}, \
- { 319,10}, { 639,11}, { 335,10}, { 671,11}, \
- { 351,10}, { 703,12}, { 191,11}, { 383,10}, \
- { 767,11}, { 415,10}, { 831,12}, { 223,11}, \
- { 447,10}, { 895,11}, { 479,10}, { 959,12}, \
- { 255,11}, { 511,10}, { 1023,11}, { 543,10}, \
- { 1087,12}, { 287,11}, { 575,10}, { 1151,11}, \
- { 607,10}, { 1215,12}, { 319,11}, { 639,10}, \
- { 1279,11}, { 671,12}, { 351,11}, { 703,13}, \
- { 191,12}, { 383,11}, { 767,10}, { 1535,11}, \
- { 831,10}, { 1663,12}, { 447,11}, { 895,12}, \
- { 479,11}, { 959,14}, { 127,13}, { 255,12}, \
- { 511,11}, { 1023,12}, { 543,11}, { 1087,12}, \
- { 575,11}, { 1151,12}, { 607,11}, { 1215,13}, \
- { 319,12}, { 639,11}, { 1279,12}, { 671,11}, \
- { 1343,12}, { 703,11}, { 1407,13}, { 383,12}, \
- { 767,11}, { 1535,12}, { 831,11}, { 1663,13}, \
- { 447,12}, { 959,11}, { 1919,14}, { 255,13}, \
- { 511,12}, { 1087,13}, { 639,12}, { 1343,13}, \
- { 703,12}, { 1407,14}, { 383,13}, { 767,12}, \
- { 1535,13}, { 831,12}, { 1663,13}, { 959,12}, \
- { 1919,15}, { 255,13}, { 1151,12}, { 2303,13}, \
- { 1215,12}, { 2431,14}, { 639,13}, { 1343,12}, \
- { 2687,13}, { 1407,12}, { 2815,13}, { 1471,11}, \
- { 5887,13}, { 1663,14}, { 895,13}, { 1919,15}, \
- { 511,14}, { 1023,13}, { 2175,14}, { 1151,13}, \
- { 2431,12}, { 4863,13}, { 2687,14}, { 1407,13}, \
- { 2815,15}, { 767,14}, { 1663,13}, { 3455,14}, \
- { 1791,13}, { 3583,14}, { 1919,13}, { 3839,16}, \
- { 511,15}, { 1023,14}, { 2175,13}, { 4351,15}, \
- { 1279,14}, { 2943,13}, { 5887,15}, { 1535,14}, \
- { 16384,15}, { 32768,16}, { 65536,17}, { 131072,18}, \
- { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
- {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 206
-#define SQR_FFT_THRESHOLD 2752
+ { 127, 9}, { 255,10}, { 135,11}, { 79,10}, \
+ { 159, 9}, { 319,11}, { 95,10}, { 191, 9}, \
+ { 383,12}, { 63,11}, { 127,10}, { 255, 9}, \
+ { 511,10}, { 271, 9}, { 543,11}, { 143,10}, \
+ { 287, 9}, { 575,11}, { 159,10}, { 319, 9}, \
+ { 639,11}, { 175,12}, { 95,11}, { 191,10}, \
+ { 383, 9}, { 767,11}, { 207,13}, { 63,12}, \
+ { 127,11}, { 255,10}, { 511,11}, { 271,10}, \
+ { 543,11}, { 287,10}, { 575,11}, { 303,12}, \
+ { 159,11}, { 319,10}, { 639, 9}, { 1279,10}, \
+ { 671,11}, { 351,10}, { 703,12}, { 191,11}, \
+ { 383,10}, { 767,11}, { 415,10}, { 831,12}, \
+ { 223,11}, { 447,10}, { 895,11}, { 479,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16}, { 65536,17}, \
+ { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+ {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 103
+#define SQR_FFT_THRESHOLD 3712
#define MULLO_BASECASE_THRESHOLD 5
-#define MULLO_DC_THRESHOLD 23
-#define MULLO_MUL_N_THRESHOLD 7246
+#define MULLO_DC_THRESHOLD 33
+#define MULLO_MUL_N_THRESHOLD 10323
+
+#define DC_DIV_QR_THRESHOLD 57
+#define DC_DIVAPPR_Q_THRESHOLD 185
+#define DC_BDIV_QR_THRESHOLD 63
+#define DC_BDIV_Q_THRESHOLD 158
-#define DC_DIV_QR_THRESHOLD 16
-#define DC_DIVAPPR_Q_THRESHOLD 64
-#define DC_BDIV_QR_THRESHOLD 62
-#define DC_BDIV_Q_THRESHOLD 156
+#define INV_MULMOD_BNM1_THRESHOLD 58
+#define INV_NEWTON_THRESHOLD 212
+#define INV_APPR_THRESHOLD 187
-#define INV_MULMOD_BNM1_THRESHOLD 62
-#define INV_NEWTON_THRESHOLD 93
-#define INV_APPR_THRESHOLD 66
+#define BINV_NEWTON_THRESHOLD 276
+#define REDC_1_TO_REDC_N_THRESHOLD 63
-#define BINV_NEWTON_THRESHOLD 294
-#define REDC_1_TO_REDC_N_THRESHOLD 74
+#define MU_DIV_QR_THRESHOLD 1442
+#define MU_DIVAPPR_Q_THRESHOLD 1442
+#define MUPI_DIV_QR_THRESHOLD 91
+#define MU_BDIV_QR_THRESHOLD 1142
+#define MU_BDIV_Q_THRESHOLD 1442
-#define MU_DIV_QR_THRESHOLD 1387
-#define MU_DIVAPPR_Q_THRESHOLD 1414
-#define MUPI_DIV_QR_THRESHOLD 31
-#define MU_BDIV_QR_THRESHOLD 1210
-#define MU_BDIV_Q_THRESHOLD 1558
+#define POWM_SEC_TABLE 3,38,270,1487
#define MATRIX22_STRASSEN_THRESHOLD 14
-#define HGCD_THRESHOLD 108
-#define GCD_DC_THRESHOLD 333
-#define GCDEXT_DC_THRESHOLD 333
-#define JACOBI_BASE_METHOD 1
-
-#define GET_STR_DC_THRESHOLD 10
-#define GET_STR_PRECOMPUTE_THRESHOLD 22
-#define SET_STR_DC_THRESHOLD 1532
-#define SET_STR_PRECOMPUTE_THRESHOLD 3850
+#define HGCD_THRESHOLD 138
+#define HGCD_APPR_THRESHOLD 157
+#define HGCD_REDUCE_THRESHOLD 2578
+#define GCD_DC_THRESHOLD 573
+#define GCDEXT_DC_THRESHOLD 440
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 14
+#define GET_STR_PRECOMPUTE_THRESHOLD 32
+#define SET_STR_DC_THRESHOLD 1517
+#define SET_STR_PRECOMPUTE_THRESHOLD 3007
+
+#define FAC_DSC_THRESHOLD 680
+#define FAC_ODD_THRESHOLD 24
dnl PowerPC-64 mpn_rsh1add_n -- rp[] = (up[] + vp[]) >> 1
-dnl Copyright 2003, 2005 Free Software Foundation, Inc.
+dnl Copyright 2003, 2005, 2010 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
-C cycles/limb
-C POWER3/PPC630: 2 (1.5 c/l should be possible)
-C POWER4/PPC970: 4 (2.0 c/l should be possible)
-
-C INPUT PARAMETERS
-C rp r3
-C up r4
-C vp r5
-C n r6
+C cycles/limb
+C POWER3/PPC630 2 (1.5 c/l should be possible)
+C POWER4/PPC970 4 (2.0 c/l should be possible)
+C POWER5 3.5 (2.0 c/l should be possible)
+C POWER6 4.5
+C POWER7 3.5
define(`rp',`r3')
define(`up',`r4')
bdz L(end)
-L(oop): ldu u1, 16(up)
+ ALIGN(32)
+L(top): ldu u1, 16(up)
ldu v1, 16(vp)
adde x, v0, u0
srdi s0, x, 1
rldimi s1, x, 63, 0
std s1, 8(rp)
- bdz L(exit)
+ bdz L(exi)
ld u0, 8(up)
ld v0, 8(vp)
rldimi s0, x, 63, 0
stdu s0, 16(rp)
- bdnz L(oop)
+ bdnz L(top)
L(end): adde x, v0, u0
srdi s0, x, 1
mr r3, r12
blr
-L(exit): adde x, v1, u1
+L(exi): adde x, v1, u1
srdi s1, x, 1
rldimi s0, x, 63, 0
stdu s0, 16(rp)
dnl PowerPC-64 mpn_rsh1sub_n -- rp[] = (up[] - vp[]) >> 1
-dnl Copyright 2003, 2005 Free Software Foundation, Inc.
+dnl Copyright 2003, 2005, 2010 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
-C cycles/limb
-C POWER3/PPC630: 2 (1.5 c/l should be possible)
-C POWER4/PPC970: 4 (2.0 c/l should be possible)
-
-C INPUT PARAMETERS
-C rp r3
-C up r4
-C vp r5
-C n r6
+C cycles/limb
+C POWER3/PPC630 2 (1.5 c/l should be possible)
+C POWER4/PPC970 4 (2.0 c/l should be possible)
+C POWER5 3.5 (2.0 c/l should be possible)
+C POWER6 4.5
+C POWER7 3.5
define(`rp',`r3')
define(`up',`r4')
bdz L(end)
-L(oop): ldu u1, 16(up)
+ ALIGN(32)
+L(top): ldu u1, 16(up)
ldu v1, 16(vp)
subfe x, v0, u0
srdi s0, x, 1
rldimi s1, x, 63, 0
std s1, 8(rp)
- bdz L(exit)
+ bdz L(exi)
ld u0, 8(up)
ld v0, 8(vp)
rldimi s0, x, 63, 0
stdu s0, 16(rp)
- bdnz L(oop)
+ bdnz L(top)
L(end): subfe x, v0, u0
srdi s0, x, 1
mr r3, r12
blr
-L(exit): subfe x, v1, u1
+L(exi): subfe x, v1, u1
srdi s1, x, 1
rldimi s0, x, 63, 0
stdu s0, 16(rp)
--- /dev/null
+dnl PowerPC-64 mpn_sqr_basecase.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2008, 2010, 2011 Free
+dnl Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C POWER3/PPC630 6-18
+C POWER4/PPC970 8
+C POWER5 8
+C POWER6 16.25
+C POWER7 3.77
+
+C NOTES
+C * This is very crude, cleanup!
+C * Try to reduce the number of needed live registers.
+C * Rewrite for POWER6 to use 8 consecutive muls, not 2 groups of 4. The
+C cost will be more live registers.
+C * Rewrite for POWER7 to use addmul_2 building blocks; this will reduce code
+C size a lot and speed things up perhaps 25%.
+C * Use computed goto in order to compress the code.
+C * Implement a larger final corner.
+C * Schedule callee-saves register saves into other insns. This could save
+C about 5 cycles/call. (We cannot analogously optimise the restores, since
+C the sqr_diag_addlsh1 loop has no wind-down code as currently written.)
+C * Should the alternating std/adde sequences be split? Some pipelines handle
+C adde poorly, and might sequentialise all these instructions.
+C * The sqr_diag_addlsh1 loop was written for POWER6 and its preferences for
+C adjacent integer multiply insns. Except for the multiply insns, the code
+C was not carefully optimised for POWER6 or any other CPU.
+C * Perform cross-jumping in sqr_diag_addlsh1's feed-in code, into the loop.
+
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`n', `r5')
+
+define(`rp_outer', `r25')
+define(`up_outer', `r21')
+define(`rp_saved', `r22')
+define(`up_saved', `r23')
+define(`n_saved', `r24')
+
+ASM_START()
+PROLOGUE(mpn_sqr_basecase)
+ cmpdi cr0, n, 2
+ bge cr0, L(ge2)
+ ld r5, 0(up) C n = 1
+ nop
+ mulld r8, r5, r5 C weight 0
+ mulhdu r9, r5, r5 C weight 1
+ std r8, 0(rp)
+ std r9, 8(rp)
+ blr
+ ALIGN(16)
+L(ge2): bgt cr0, L(gt2)
+ ld r0, 0(up) C n = 2
+ nop
+ mulld r8, r0, r0 C u0 * u0
+ mulhdu r9, r0, r0 C u0 * u0
+ ld r6, 8(up)
+ mulld r10, r6, r6 C u1 * u1
+ mulhdu r11, r6, r6 C u1 * u1
+ mulld r4, r6, r0 C u1 * u0
+ mulhdu r5, r6, r0 C u1 * u0
+ addc r4, r4, r4
+ adde r5, r5, r5
+ addze r11, r11
+ addc r9, r9, r4
+ adde r10, r10, r5
+ addze r11, r11
+ std r8, 0(rp)
+ std r9, 8(rp)
+ std r10, 16(rp)
+ std r11, 24(rp)
+ blr
+
+ ALIGN(16)
+L(gt2): std r31, -8(r1)
+ std r30, -16(r1)
+ std r29, -24(r1)
+ std r28, -32(r1)
+ std r27, -40(r1)
+ std r26, -48(r1)
+ std r25, -56(r1)
+ std r24, -64(r1)
+ std r23, -72(r1)
+ std r22, -80(r1)
+ std r21, -88(r1)
+
+ mr rp_saved, rp
+ mr up_saved, up
+ mr n_saved, n
+ mr rp_outer, rp
+ mr up_outer, up
+
+ rldicl. r0, n, 0,62 C r0 = n & 3, set cr0
+ cmpdi cr6, r0, 2
+ addic r7, n, 2 C compute count...
+ srdi r7, r7, 2 C ...for ctr
+ mtctr r7 C copy count into ctr
+ beq- cr0, L(b0)
+ blt- cr6, L(b1)
+ beq- cr6, L(b2)
+
+L(b3): ld r6, 0(up)
+ ld r9, 8(up)
+ ld r27, 16(up)
+ addi up, up, 24
+ li r12, 0 C carry limb
+ bdz L(em3)
+
+ ALIGN(16)
+L(tm3): mulld r0, r9, r6
+ mulhdu r26, r9, r6
+ mulld r7, r27, r6
+ mulhdu r8, r27, r6
+ ld r9, 0(up)
+ ld r27, 8(up)
+ adde r0, r0, r12
+ adde r7, r7, r26
+ mulld r26, r9, r6
+ mulhdu r10, r9, r6
+ mulld r11, r27, r6
+ mulhdu r12, r27, r6
+ ld r9, 16(up)
+ ld r27, 24(up)
+ std r0, 8(rp)
+ adde r26, r26, r8
+ std r7, 16(rp)
+ adde r11, r11, r10
+ std r26, 24(rp)
+ addi up, up, 32
+ std r11, 32(rp)
+ addi rp, rp, 32
+ bdnz L(tm3)
+
+L(em3): mulld r0, r9, r6
+ mulhdu r26, r9, r6
+ mulld r7, r27, r6
+ mulhdu r8, r27, r6
+ adde r0, r0, r12
+ adde r7, r7, r26
+ std r0, 8(rp)
+ std r7, 16(rp)
+ addze r8, r8
+ std r8, 24(rp)
+ addi n, n, 2
+ b L(outer_loop)
+
+L(b0): ld r6, 0(up)
+ ld r27, 8(up)
+ mulld r7, r27, r6
+ mulhdu r12, r27, r6
+ std r7, 8(rp)
+ addi rp, rp, 8
+ ld r9, 16(up)
+ ld r27, 24(up)
+ addi up, up, 32
+ bdz L(em0)
+
+ ALIGN(16)
+L(tm0): mulld r0, r9, r6
+ mulhdu r26, r9, r6
+ mulld r7, r27, r6
+ mulhdu r8, r27, r6
+ ld r9, 0(up)
+ ld r27, 8(up)
+ adde r0, r0, r12
+ adde r7, r7, r26
+ mulld r26, r9, r6
+ mulhdu r10, r9, r6
+ mulld r11, r27, r6
+ mulhdu r12, r27, r6
+ ld r9, 16(up)
+ ld r27, 24(up)
+ std r0, 8(rp)
+ adde r26, r26, r8
+ std r7, 16(rp)
+ adde r11, r11, r10
+ std r26, 24(rp)
+ addi up, up, 32
+ std r11, 32(rp)
+ addi rp, rp, 32
+ bdnz L(tm0)
+
+L(em0): mulld r0, r9, r6
+ mulhdu r26, r9, r6
+ mulld r7, r27, r6
+ mulhdu r8, r27, r6
+ adde r0, r0, r12
+ adde r7, r7, r26
+ std r0, 8(rp)
+ std r7, 16(rp)
+ addze r8, r8
+ std r8, 24(rp)
+ addi n, n, 2
+ b L(outer_loop_ent_2)
+
+L(b1): ld r6, 0(up)
+ ld r9, 8(up)
+ ld r27, 16(up)
+ mulld r0, r9, r6
+ mulhdu r26, r9, r6
+ mulld r7, r27, r6
+ mulhdu r12, r27, r6
+ addc r7, r7, r26
+ std r0, 8(rp)
+ std r7, 16(rp)
+ addi rp, rp, 16
+ ld r9, 24(up)
+ ld r27, 32(up)
+ addi up, up, 40
+ bdz L(em1)
+
+ ALIGN(16)
+L(tm1): mulld r0, r9, r6
+ mulhdu r26, r9, r6
+ mulld r7, r27, r6
+ mulhdu r8, r27, r6
+ ld r9, 0(up)
+ ld r27, 8(up)
+ adde r0, r0, r12
+ adde r7, r7, r26
+ mulld r26, r9, r6
+ mulhdu r10, r9, r6
+ mulld r11, r27, r6
+ mulhdu r12, r27, r6
+ ld r9, 16(up)
+ ld r27, 24(up)
+ std r0, 8(rp)
+ adde r26, r26, r8
+ std r7, 16(rp)
+ adde r11, r11, r10
+ std r26, 24(rp)
+ addi up, up, 32
+ std r11, 32(rp)
+ addi rp, rp, 32
+ bdnz L(tm1)
+
+L(em1): mulld r0, r9, r6
+ mulhdu r26, r9, r6
+ mulld r7, r27, r6
+ mulhdu r8, r27, r6
+ adde r0, r0, r12
+ adde r7, r7, r26
+ std r0, 8(rp)
+ std r7, 16(rp)
+ addze r8, r8
+ std r8, 24(rp)
+ addi n, n, 2
+ b L(outer_loop_ent_3)
+
+L(b2): addi r7, r7, -1 C FIXME
+ mtctr r7 C FIXME
+ ld r6, 0(up)
+ ld r9, 8(up)
+ ld r27, 16(up)
+ mulld r0, r9, r6
+ mulhdu r26, r9, r6
+ mulld r7, r27, r6
+ mulhdu r8, r27, r6
+ ld r9, 24(up)
+ mulld r11, r9, r6
+ mulhdu r10, r9, r6
+ addc r7, r7, r26
+ adde r11, r11, r8
+ addze r12, r10
+ std r0, 8(rp)
+ std r7, 16(rp)
+ std r11, 24(rp)
+ addi rp, rp, 24
+ ld r9, 32(up)
+ ld r27, 40(up)
+ addi up, up, 48
+ bdz L(em2)
+
+ ALIGN(16)
+L(tm2): mulld r0, r9, r6
+ mulhdu r26, r9, r6
+ mulld r7, r27, r6
+ mulhdu r8, r27, r6
+ ld r9, 0(up)
+ ld r27, 8(up)
+ adde r0, r0, r12
+ adde r7, r7, r26
+ mulld r26, r9, r6
+ mulhdu r10, r9, r6
+ mulld r11, r27, r6
+ mulhdu r12, r27, r6
+ ld r9, 16(up)
+ ld r27, 24(up)
+ std r0, 8(rp)
+ adde r26, r26, r8
+ std r7, 16(rp)
+ adde r11, r11, r10
+ std r26, 24(rp)
+ addi up, up, 32
+ std r11, 32(rp)
+ addi rp, rp, 32
+ bdnz L(tm2)
+
+L(em2): mulld r0, r9, r6
+ mulhdu r26, r9, r6
+ mulld r7, r27, r6
+ mulhdu r8, r27, r6
+ adde r0, r0, r12
+ adde r7, r7, r26
+ std r0, 8(rp)
+ std r7, 16(rp)
+ addze r8, r8
+ std r8, 24(rp)
+ addi n, n, 2
+ b L(outer_loop_ent_0)
+
+
+L(outer_loop):
+ addi n, n, -1
+ addi up_outer, up_outer, 8
+ addi rp_outer, rp_outer, 16
+
+ mr up, up_outer
+ addi rp, rp_outer, 8
+
+ srdi r0, n, 2
+ mtctr r0
+
+ bdz L(outer_end)
+
+ ld r6, 0(up)
+ ld r9, 8(up)
+ ld r27, 16(up)
+ mulld r0, r9, r6
+ mulhdu r26, r9, r6
+ mulld r7, r27, r6
+ mulhdu r8, r27, r6
+ ld r9, 24(up)
+ ld r28, 0(rp)
+ ld r29, 8(rp)
+ ld r30, 16(rp)
+ mulld r11, r9, r6
+ mulhdu r10, r9, r6
+ addc r7, r7, r26
+ adde r11, r11, r8
+ addze r12, r10
+ addc r0, r0, r28
+ std r0, 0(rp)
+ adde r7, r7, r29
+ std r7, 8(rp)
+ adde r11, r11, r30
+ std r11, 16(rp)
+ addi rp, rp, 24
+ ld r9, 32(up)
+ ld r27, 40(up)
+ addi up, up, 48
+ bdz L(ea1)
+
+ ALIGN(16)
+L(ta1): mulld r0, r9, r6
+ mulhdu r26, r9, r6 C 9
+ mulld r7, r27, r6
+ mulhdu r8, r27, r6 C 27
+ ld r9, 0(up)
+ ld r28, 0(rp)
+ ld r27, 8(up)
+ ld r29, 8(rp)
+ adde r0, r0, r12 C 0 12
+ adde r7, r7, r26 C 5 7
+ mulld r26, r9, r6
+ mulhdu r10, r9, r6 C 9
+ mulld r11, r27, r6
+ mulhdu r12, r27, r6 C 27
+ ld r9, 16(up)
+ ld r30, 16(rp)
+ ld r27, 24(up)
+ ld r31, 24(rp)
+ adde r26, r26, r8 C 8 5
+ adde r11, r11, r10 C 10 11
+ addze r12, r12 C 12
+ addc r0, r0, r28 C 0 28
+ std r0, 0(rp) C 0
+ adde r7, r7, r29 C 7 29
+ std r7, 8(rp) C 7
+ adde r26, r26, r30 C 5 30
+ std r26, 16(rp) C 5
+ adde r11, r11, r31 C 11 31
+ std r11, 24(rp) C 11
+ addi up, up, 32
+ addi rp, rp, 32
+ bdnz L(ta1)
+
+L(ea1): mulld r0, r9, r6
+ mulhdu r26, r9, r6
+ mulld r7, r27, r6
+ mulhdu r8, r27, r6
+ ld r28, 0(rp)
+ ld r29, 8(rp)
+ adde r0, r0, r12
+ adde r7, r7, r26
+ addze r8, r8
+ addc r0, r0, r28
+ std r0, 0(rp)
+ adde r7, r7, r29
+ std r7, 8(rp)
+ addze r8, r8
+ std r8, 16(rp)
+
+L(outer_loop_ent_0):
+ addi n, n, -1
+ addi up_outer, up_outer, 8
+ addi rp_outer, rp_outer, 16
+
+ mr up, up_outer
+ addi rp, rp_outer, 8
+
+ srdi r0, n, 2
+ mtctr r0
+
+ ld r6, 0(up)
+ ld r9, 8(up)
+ ld r27, 16(up)
+ ld r28, 0(rp)
+ ld r29, 8(rp)
+ mulld r0, r9, r6
+ mulhdu r26, r9, r6
+ mulld r7, r27, r6
+ mulhdu r8, r27, r6
+ addc r0, r0, r28
+ adde r7, r7, r26
+ addze r12, r8
+ std r0, 0(rp)
+ adde r7, r7, r29
+ std r7, 8(rp)
+ addi rp, rp, 16
+ ld r9, 24(up)
+ ld r27, 32(up)
+ addi up, up, 40
+ bdz L(ea0)
+
+ ALIGN(16)
+L(ta0): mulld r0, r9, r6
+ mulhdu r26, r9, r6 C 9
+ mulld r7, r27, r6
+ mulhdu r8, r27, r6 C 27
+ ld r9, 0(up)
+ ld r28, 0(rp)
+ ld r27, 8(up)
+ ld r29, 8(rp)
+ adde r0, r0, r12 C 0 12
+ adde r7, r7, r26 C 5 7
+ mulld r26, r9, r6
+ mulhdu r10, r9, r6 C 9
+ mulld r11, r27, r6
+ mulhdu r12, r27, r6 C 27
+ ld r9, 16(up)
+ ld r30, 16(rp)
+ ld r27, 24(up)
+ ld r31, 24(rp)
+ adde r26, r26, r8 C 8 5
+ adde r11, r11, r10 C 10 11
+ addze r12, r12 C 12
+ addc r0, r0, r28 C 0 28
+ std r0, 0(rp) C 0
+ adde r7, r7, r29 C 7 29
+ std r7, 8(rp) C 7
+ adde r26, r26, r30 C 5 30
+ std r26, 16(rp) C 5
+ adde r11, r11, r31 C 11 31
+ std r11, 24(rp) C 11
+ addi up, up, 32
+ addi rp, rp, 32
+ bdnz L(ta0)
+
+L(ea0): mulld r0, r9, r6
+ mulhdu r26, r9, r6
+ mulld r7, r27, r6
+ mulhdu r8, r27, r6
+ ld r28, 0(rp)
+ ld r29, 8(rp)
+ adde r0, r0, r12
+ adde r7, r7, r26
+ addze r8, r8
+ addc r0, r0, r28
+ std r0, 0(rp)
+ adde r7, r7, r29
+ std r7, 8(rp)
+ addze r8, r8
+ std r8, 16(rp)
+
+L(outer_loop_ent_3):
+ addi n, n, -1
+ addi up_outer, up_outer, 8
+ addi rp_outer, rp_outer, 16
+
+ mr up, up_outer
+ addi rp, rp_outer, 8
+
+ srdi r0, n, 2
+ mtctr r0
+
+ ld r6, 0(up)
+ ld r9, 8(up)
+ ld r28, 0(rp)
+ mulld r0, r9, r6
+ mulhdu r12, r9, r6
+ addc r0, r0, r28
+ std r0, 0(rp)
+ addi rp, rp, 8
+ ld r9, 16(up)
+ ld r27, 24(up)
+ addi up, up, 32
+ bdz L(ea3)
+
+ ALIGN(16)
+L(ta3): mulld r0, r9, r6
+ mulhdu r26, r9, r6 C 9
+ mulld r7, r27, r6
+ mulhdu r8, r27, r6 C 27
+ ld r9, 0(up)
+ ld r28, 0(rp)
+ ld r27, 8(up)
+ ld r29, 8(rp)
+ adde r0, r0, r12 C 0 12
+ adde r7, r7, r26 C 5 7
+ mulld r26, r9, r6
+ mulhdu r10, r9, r6 C 9
+ mulld r11, r27, r6
+ mulhdu r12, r27, r6 C 27
+ ld r9, 16(up)
+ ld r30, 16(rp)
+ ld r27, 24(up)
+ ld r31, 24(rp)
+ adde r26, r26, r8 C 8 5
+ adde r11, r11, r10 C 10 11
+ addze r12, r12 C 12
+ addc r0, r0, r28 C 0 28
+ std r0, 0(rp) C 0
+ adde r7, r7, r29 C 7 29
+ std r7, 8(rp) C 7
+ adde r26, r26, r30 C 5 30
+ std r26, 16(rp) C 5
+ adde r11, r11, r31 C 11 31
+ std r11, 24(rp) C 11
+ addi up, up, 32
+ addi rp, rp, 32
+ bdnz L(ta3)
+
+L(ea3): mulld r0, r9, r6
+ mulhdu r26, r9, r6
+ mulld r7, r27, r6
+ mulhdu r8, r27, r6
+ ld r28, 0(rp)
+ ld r29, 8(rp)
+ adde r0, r0, r12
+ adde r7, r7, r26
+ addze r8, r8
+ addc r0, r0, r28
+ std r0, 0(rp)
+ adde r7, r7, r29
+ std r7, 8(rp)
+ addze r8, r8
+ std r8, 16(rp)
+
+
+L(outer_loop_ent_2):
+ addi n, n, -1
+ addi up_outer, up_outer, 8
+ addi rp_outer, rp_outer, 16
+
+ mr up, up_outer
+ addi rp, rp_outer, 8
+
+ srdi r0, n, 2
+ mtctr r0
+
+ addic r0, r0, 0
+ li r12, 0 C cy_limb = 0
+ ld r6, 0(up)
+ ld r9, 8(up)
+ ld r27, 16(up)
+ bdz L(ea2)
+ addi up, up, 24
+
+ ALIGN(16)
+L(ta2): mulld r0, r9, r6
+ mulhdu r26, r9, r6 C 9
+ mulld r7, r27, r6
+ mulhdu r8, r27, r6 C 27
+ ld r9, 0(up)
+ ld r28, 0(rp)
+ ld r27, 8(up)
+ ld r29, 8(rp)
+ adde r0, r0, r12 C 0 12
+ adde r7, r7, r26 C 5 7
+ mulld r26, r9, r6
+ mulhdu r10, r9, r6 C 9
+ mulld r11, r27, r6
+ mulhdu r12, r27, r6 C 27
+ ld r9, 16(up)
+ ld r30, 16(rp)
+ ld r27, 24(up)
+ ld r31, 24(rp)
+ adde r26, r26, r8 C 8 5
+ adde r11, r11, r10 C 10 11
+ addze r12, r12 C 12
+ addc r0, r0, r28 C 0 28
+ std r0, 0(rp) C 0
+ adde r7, r7, r29 C 7 29
+ std r7, 8(rp) C 7
+ adde r26, r26, r30 C 5 30
+ std r26, 16(rp) C 5
+ adde r11, r11, r31 C 11 31
+ std r11, 24(rp) C 11
+ addi up, up, 32
+ addi rp, rp, 32
+ bdnz L(ta2)
+
+L(ea2): mulld r0, r9, r6
+ mulhdu r26, r9, r6
+ mulld r7, r27, r6
+ mulhdu r8, r27, r6
+ ld r28, 0(rp)
+ ld r29, 8(rp)
+ adde r0, r0, r12
+ adde r7, r7, r26
+ addze r8, r8
+ addc r0, r0, r28
+ std r0, 0(rp)
+ adde r7, r7, r29
+ std r7, 8(rp)
+ addze r8, r8
+ std r8, 16(rp)
+
+ b L(outer_loop)
+
+L(outer_end):
+ ld r6, 0(up)
+ ld r9, 8(up)
+ ld r11, 0(rp)
+ mulld r0, r9, r6
+ mulhdu r8, r9, r6
+ addc r0, r0, r11
+ std r0, 0(rp)
+ addze r8, r8
+ std r8, 8(rp)
+
+define(`rp', `rp_saved')
+define(`up', `r5')
+define(`n', `r6')
+define(`climb', `r0')
+
+ addi r4, rp_saved, 8
+ mr r5, up_saved
+ mr r6, n_saved
+
+ rldicl. r0, n, 0,62 C r0 = n & 3, set cr0
+ cmpdi cr6, r0, 2
+ addi n, n, 2 C compute count...
+ srdi n, n, 2 C ...for ctr
+ mtctr n C put loop count into ctr
+ beq cr0, L(xb0)
+ blt cr6, L(xb1)
+ beq cr6, L(xb2)
+
+L(xb3): ld r6, 0(up)
+ ld r7, 8(up)
+ ld r12, 16(up)
+ addi up, up, 24
+ mulld r24, r6, r6
+ mulhdu r25, r6, r6
+ mulld r26, r7, r7
+ mulhdu r27, r7, r7
+ mulld r28, r12, r12
+ mulhdu r29, r12, r12
+ ld r10, 8(rp)
+ ld r11, 16(rp)
+ ld r6, 24(rp)
+ ld r7, 32(rp)
+ addc r10, r10, r10
+ adde r11, r11, r11
+ adde r6, r6, r6
+ adde r7, r7, r7
+ addze climb, r29
+ addc r10, r10, r25
+ adde r11, r11, r26
+ adde r6, r6, r27
+ adde r7, r7, r28
+ std r24, 0(rp)
+ std r10, 8(rp)
+ std r11, 16(rp)
+ std r6, 24(rp)
+ std r7, 32(rp)
+ addi rp, rp, 40
+ bdnz L(top)
+ b L(end)
+
+L(xb2): ld r6, 0(up)
+ ld r7, 8(up)
+ addi up, up, 16
+ mulld r24, r6, r6
+ mulhdu r25, r6, r6
+ mulld r26, r7, r7
+ mulhdu r27, r7, r7
+ ld r10, 8(rp)
+ ld r11, 16(rp)
+ addc r10, r10, r10
+ adde r11, r11, r11
+ addze climb, r27
+ addc r10, r10, r25
+ adde r11, r11, r26
+ std r24, 0(rp)
+ std r10, 8(rp)
+ std r11, 16(rp)
+ addi rp, rp, 24
+ bdnz L(top)
+ b L(end)
+
+L(xb0): ld r6, 0(up)
+ ld r7, 8(up)
+ ld r12, 16(up)
+ ld r23, 24(up)
+ addi up, up, 32
+ mulld r24, r6, r6
+ mulhdu r25, r6, r6
+ mulld r26, r7, r7
+ mulhdu r27, r7, r7
+ mulld r28, r12, r12
+ mulhdu r29, r12, r12
+ mulld r30, r23, r23
+ mulhdu r31, r23, r23
+ ld r10, 8(rp)
+ ld r11, 16(rp)
+ ld r6, 24(rp)
+ ld r7, 32(rp)
+ ld r12, 40(rp)
+ ld r23, 48(rp)
+ addc r10, r10, r10
+ adde r11, r11, r11
+ adde r6, r6, r6
+ adde r7, r7, r7
+ adde r12, r12, r12
+ adde r23, r23, r23
+ addze climb, r31
+ std r24, 0(rp)
+ addc r10, r10, r25
+ std r10, 8(rp)
+ adde r11, r11, r26
+ std r11, 16(rp)
+ adde r6, r6, r27
+ std r6, 24(rp)
+ adde r7, r7, r28
+ std r7, 32(rp)
+ adde r12, r12, r29
+ std r12, 40(rp)
+ adde r23, r23, r30
+ std r23, 48(rp)
+ addi rp, rp, 56
+ bdnz L(top)
+ b L(end)
+
+L(xb1): ld r6, 0(up)
+ addi up, up, 8
+ mulld r24, r6, r6
+ mulhdu climb, r6, r6
+ std r24, 0(rp)
+ addic rp, rp, 8 C clear carry as side-effect
+
+ ALIGN(32)
+L(top): ld r6, 0(up)
+ ld r7, 8(up)
+ ld r12, 16(up)
+ ld r23, 24(up)
+ addi up, up, 32
+ mulld r24, r6, r6
+ mulhdu r25, r6, r6
+ mulld r26, r7, r7
+ mulhdu r27, r7, r7
+ mulld r28, r12, r12
+ mulhdu r29, r12, r12
+ mulld r30, r23, r23
+ mulhdu r31, r23, r23
+ ld r8, 0(rp)
+ ld r9, 8(rp)
+ adde r8, r8, r8
+ adde r9, r9, r9
+ ld r10, 16(rp)
+ ld r11, 24(rp)
+ adde r10, r10, r10
+ adde r11, r11, r11
+ ld r6, 32(rp)
+ ld r7, 40(rp)
+ adde r6, r6, r6
+ adde r7, r7, r7
+ ld r12, 48(rp)
+ ld r23, 56(rp)
+ adde r12, r12, r12
+ adde r23, r23, r23
+ addze r31, r31
+ addc r8, r8, climb
+ std r8, 0(rp)
+ adde r9, r9, r24
+ std r9, 8(rp)
+ adde r10, r10, r25
+ std r10, 16(rp)
+ adde r11, r11, r26
+ std r11, 24(rp)
+ adde r6, r6, r27
+ std r6, 32(rp)
+ adde r7, r7, r28
+ std r7, 40(rp)
+ adde r12, r12, r29
+ std r12, 48(rp)
+ adde r23, r23, r30
+ std r23, 56(rp)
+ mr climb, r31
+ addi rp, rp, 64
+ bdnz L(top)
+
+L(end): addze climb, climb
+ std climb, 0(rp)
+
+ ld r31, -8(r1)
+ ld r30, -16(r1)
+ ld r29, -24(r1)
+ ld r28, -32(r1)
+ ld r27, -40(r1)
+ ld r26, -48(r1)
+ ld r25, -56(r1)
+ ld r24, -64(r1)
+ ld r23, -72(r1)
+ ld r22, -80(r1)
+ ld r21, -88(r1)
+ blr
+EPILOGUE()
+++ /dev/null
-dnl PowerPC-64 mpn_sublsh1_n -- rp[] = up[] - (vp[] << 1)
-
-dnl Copyright 2003, 2005 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C POWER3/PPC630: 2 (1.5 c/l should be possible)
-C POWER4/PPC970: 4 (2.0 c/l should be possible)
-
-C INPUT PARAMETERS
-C rp r3
-C up r4
-C vp r5
-C n r6
-
-define(`rp',`r3')
-define(`up',`r4')
-define(`vp',`r5')
-
-define(`s0',`r6')
-define(`s1',`r7')
-define(`u0',`r8')
-define(`v0',`r10')
-define(`v1',`r11')
-
-ASM_START()
-PROLOGUE(mpn_sublsh1_n)
- mtctr r6 C put n in ctr
-
- ld v0, 0(vp) C load v limb
- ld u0, 0(up) C load u limb
- addic up, up, -8 C update up; set cy
- addi rp, rp, -8 C update rp
- sldi s1, v0, 1
- bdz L(end) C If done, skip loop
-
-L(oop): ld v1, 8(vp) C load v limb
- subfe s1, s1, u0 C add limbs with cy, set cy
- std s1, 8(rp) C store result limb
- srdi s0, v0, 63 C shift down previous v limb
- ldu u0, 16(up) C load u limb and update up
- rldimi s0, v1, 1, 0 C left shift v limb and merge with prev v limb
-
- bdz L(exit) C decrement ctr and exit if done
-
- ldu v0, 16(vp) C load v limb and update vp
- subfe s0, s0, u0 C add limbs with cy, set cy
- stdu s0, 16(rp) C store result limb and update rp
- srdi s1, v1, 63 C shift down previous v limb
- ld u0, 8(up) C load u limb
- rldimi s1, v0, 1, 0 C left shift v limb and merge with prev v limb
-
- bdnz L(oop) C decrement ctr and loop back
-
-L(end): subfe r7, s1, u0
- std r7, 8(rp) C store last result limb
- srdi r3, v0, 63
- subfze r3, r3
- neg r3, r3
- blr
-L(exit): subfe r7, s0, u0
- std r7, 16(rp) C store last result limb
- srdi r3, v1, 63
- subfze r3, r3
- neg r3, r3
- blr
-EPILOGUE()
+++ /dev/null
-dnl PowerPC-64 mpn_submul_1 -- Multiply a limb vector with a limb and subtract
-dnl the result from a second limb vector.
-
-dnl Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006 Free Software
-dnl Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C POWER3/PPC630: 6-18
-C POWER4/PPC970: 10
-C POWER5: 10.5
-
-C INPUT PARAMETERS
-define(`rp', `r3')
-define(`up', `r4')
-define(`n', `r5')
-define(`vl', `r6')
-define(`cy', `r7')
-
-ASM_START()
-PROLOGUE(mpn_submul_1)
- li cy, 0 C cy_limb = 0
-
-PROLOGUE(mpn_submul_1c)
- mtctr n
- addic r0, r0, 0
- addi rp, rp, -8
- ALIGN(16)
-L(top):
- ld r0, 0(up)
- ld r10, 8(rp)
- mulld r9, r0, vl
- mulhdu r5, r0, vl
- adde r9, r9, cy
- addi up, up, 8
- addze cy, r5
- subf r12, r9, r10
- not r0, r10
- addc r11, r9, r0 C inverted carry from subf
- stdu r12, 8(rp)
- bdnz L(top)
-
- addze r3, cy
- blr
-EPILOGUE(mpn_submul_1)
-EPILOGUE(mpn_submul_1c)
--- /dev/null
+dnl PowerPC-64 mpn_lshift -- rp[] = up[] << cnt
+
+dnl Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C POWER3/PPC630 ?
+C POWER4/PPC970 ?
+C POWER5 2.25
+C POWER6 4
+
+C TODO
+C * Micro-optimise header code
+C * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6. The code is 4236
+C bytes, 4-way code would become about 50% larger.
+
+C INPUT PARAMETERS
+define(`rp_param', `r3')
+define(`up', `r4')
+define(`n', `r5')
+define(`cnt', `r6')
+
+define(`tnc',`r0')
+define(`retval',`r3')
+define(`rp', `r7')
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+
+ifdef(`HAVE_ABI_mode32',`
+ rldicl n, n, 0,32 C FIXME: avoid this zero extend
+')
+ mflr r12
+ sldi r8, n, 3
+ sldi r10, cnt, 6 C multiply cnt by size of a SHIFT block
+ LEAL( r11, L(e1)) C address of L(e1) label in SHIFT(1)
+ add up, up, r8 C make up point at end of up[]
+ add r11, r11, r10 C address of L(oN) for N = cnt
+ srdi r10, n, 1
+ add rp, rp_param, r8 C make rp point at end of rp[]
+ subfic tnc, cnt, 64
+ rlwinm. r8, n, 0,31,31 C extract bit 0
+ mtctr r10
+ beq L(evn)
+
+L(odd): ld r9, -8(up)
+ cmpdi cr0, n, 1 C n = 1?
+ beq L(1)
+ ld r8, -16(up)
+ addi r11, r11, -84 C L(o1) - L(e1) - 64
+ mtlr r11
+ srd r3, r9, tnc C retval
+ addi up, up, 8
+ addi rp, rp, -8
+ blr C branch to L(oN)
+
+L(evn): ld r8, -8(up)
+ ld r9, -16(up)
+ addi r11, r11, -64
+ mtlr r11
+ srd r3, r8, tnc C retval
+ blr C branch to L(eN)
+
+L(1): srd r3, r9, tnc C retval
+ sld r8, r9, cnt
+ std r8, -8(rp)
+ mtlr r12
+ifdef(`HAVE_ABI_mode32',
+` mr r4, r3
+ srdi r3, r3, 32
+')
+ blr
+
+
+define(SHIFT,`
+L(lo$1):ld r8, -24(up)
+ std r11, -8(rp)
+ addi rp, rp, -16
+L(o$1): srdi r10, r8, eval(64-$1)
+ rldimi r10, r9, $1, 0
+ ld r9, -32(up)
+ addi up, up, -16
+ std r10, 0(rp)
+L(e$1): srdi r11, r9, eval(64-$1)
+ rldimi r11, r8, $1, 0
+ bdnz L(lo$1)
+ std r11, -8(rp)
+ sldi r10, r9, $1
+ b L(com)
+ nop
+ nop
+')
+
+ ALIGN(64)
+forloop(`i',1,63,`SHIFT(i)')
+
+L(com): std r10, -16(rp)
+ mtlr r12
+ifdef(`HAVE_ABI_mode32',
+` mr r4, r3
+ srdi r3, r3, 32
+')
+ blr
+EPILOGUE()
+ASM_END()
--- /dev/null
+dnl PowerPC-64 mpn_lshiftc -- rp[] = ~up[] << cnt
+
+dnl Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C POWER3/PPC630 ?
+C POWER4/PPC970 ?
+C POWER5 2.25
+C POWER6 4
+
+C TODO
+C * Micro-optimise header code
+C * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6. The code is 4236
+C bytes, 4-way code would become about 50% larger.
+
+C INPUT PARAMETERS
+define(`rp_param', `r3')
+define(`up', `r4')
+define(`n', `r5')
+define(`cnt', `r6')
+
+define(`tnc',`r0')
+define(`retval',`r3')
+define(`rp', `r7')
+
+ASM_START()
+PROLOGUE(mpn_lshiftc)
+
+ifdef(`HAVE_ABI_mode32',`
+ rldicl n, n, 0,32 C FIXME: avoid this zero extend
+')
+ mflr r12
+ sldi r8, n, 3
+ sldi r10, cnt, 6 C multiply cnt by size of a SHIFT block
+ LEAL( r11, L(e1)) C address of L(e1) label in SHIFT(1)
+ add up, up, r8 C make up point at end of up[]
+ add r11, r11, r10 C address of L(oN) for N = cnt
+ srdi r10, n, 1
+ add rp, rp_param, r8 C make rp point at end of rp[]
+ subfic tnc, cnt, 64
+ rlwinm. r8, n, 0,31,31 C extract bit 0
+ mtctr r10
+ beq L(evn)
+
+L(odd): ld r9, -8(up)
+ cmpdi cr0, n, 1 C n = 1?
+ beq L(1)
+ ld r8, -16(up)
+ addi r11, r11, -88 C L(o1) - L(e1) - 64
+ mtlr r11
+ srd r3, r9, tnc C retval
+ addi up, up, 8
+ addi rp, rp, -8
+ blr C branch to L(oN)
+
+L(evn): ld r8, -8(up)
+ ld r9, -16(up)
+ addi r11, r11, -64
+ mtlr r11
+ srd r3, r8, tnc C retval
+ blr C branch to L(eN)
+
+L(1): srd r3, r9, tnc C retval
+ sld r8, r9, cnt
+ nor r8, r8, r8
+ std r8, -8(rp)
+ mtlr r12
+ifdef(`HAVE_ABI_mode32',
+` mr r4, r3
+ srdi r3, r3, 32
+')
+ blr
+
+
+define(SHIFT,`
+L(lo$1):ld r8, -24(up)
+ nor r11, r11, r11
+ std r11, -8(rp)
+ addi rp, rp, -16
+L(o$1): srdi r10, r8, eval(64-$1)
+ rldimi r10, r9, $1, 0
+ ld r9, -32(up)
+ addi up, up, -16
+ nor r10, r10, r10
+ std r10, 0(rp)
+L(e$1): srdi r11, r9, eval(64-$1)
+ rldimi r11, r8, $1, 0
+ bdnz L(lo$1)
+ sldi r10, r9, $1
+ b L(com)
+ nop
+')
+
+ ALIGN(64)
+forloop(`i',1,63,`SHIFT(i)')
+
+L(com): nor r11, r11, r11
+ nor r10, r10, r10
+ std r11, -8(rp)
+ std r10, -16(rp)
+ mtlr r12
+ifdef(`HAVE_ABI_mode32',
+` mr r4, r3
+ srdi r3, r3, 32
+')
+ blr
+EPILOGUE()
+ASM_END()
--- /dev/null
+dnl PowerPC-64 mpn_rshift -- rp[] = up[] << cnt
+
+dnl Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C POWER3/PPC630 ?
+C POWER4/PPC970 ?
+C POWER5 2
+C POWER6 3.5 (mysteriously 3.0 for cnt=1)
+
+C TODO
+C * Micro-optimise header code
+C * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6. The code is 4248
+C bytes, 4-way code would become about 50% larger.
+
+C INPUT PARAMETERS
+define(`rp_param', `r3')
+define(`up', `r4')
+define(`n', `r5')
+define(`cnt', `r6')
+
+define(`tnc',`r0')
+define(`retval',`r3')
+define(`rp', `r7')
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+
+ifdef(`HAVE_ABI_mode32',`
+ rldicl n, n, 0,32 C FIXME: avoid this zero extend
+')
+ mflr r12
+ LEAL( r11, L(e1)) C address of L(e1) label in SHIFT(1)
+ sldi r10, cnt, 6 C multiply cnt by size of a SHIFT block
+ add r11, r11, r10 C address of L(oN) for N = cnt
+ srdi r10, n, 1
+ mr rp, rp_param
+ subfic tnc, cnt, 64
+ rlwinm. r8, n, 0,31,31 C extract bit 0
+ mtctr r10
+ beq L(evn)
+
+L(odd): ld r9, 0(up)
+ cmpdi cr0, n, 1 C n = 1?
+ beq L(1)
+ ld r8, 8(up)
+ addi r11, r11, -84 C L(o1) - L(e1) - 64
+ mtlr r11
+ sld r3, r9, tnc C retval
+ addi up, up, 8
+ addi rp, rp, 8
+ blr C branch to L(oN)
+
+L(evn): ld r8, 0(up)
+ ld r9, 8(up)
+ addi r11, r11, -64
+ mtlr r11
+ sld r3, r8, tnc C retval
+ addi up, up, 16
+ blr C branch to L(eN)
+
+L(1): sld r3, r9, tnc C retval
+ srd r8, r9, cnt
+ std r8, 0(rp)
+ mtlr r12
+ifdef(`HAVE_ABI_mode32',
+` mr r4, r3
+ srdi r3, r3, 32
+')
+ blr
+
+
+define(SHIFT,`
+L(lo$1):ld r8, 0(up)
+ std r11, 0(rp)
+ addi rp, rp, 16
+L(o$1): srdi r10, r9, $1
+ rldimi r10, r8, eval(64-$1), 0
+ ld r9, 8(up)
+ addi up, up, 16
+ std r10, -8(rp)
+L(e$1): srdi r11, r8, $1
+ rldimi r11, r9, eval(64-$1), 0
+ bdnz L(lo$1)
+ std r11, 0(rp)
+ srdi r10, r9, $1
+ b L(com)
+ nop
+ nop
+')
+
+ ALIGN(64)
+forloop(`i',1,63,`SHIFT(i)')
+
+L(com): std r10, 8(rp)
+ mtlr r12
+ifdef(`HAVE_ABI_mode32',
+` mr r4, r3
+ srdi r3, r3, 32
+')
+ blr
+EPILOGUE()
+ASM_END()
--- /dev/null
+dnl PowerPC-64 mpn_hamdist.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C POWER3/PPC630 -
+C POWER4/PPC970 -
+C POWER5 -
+C POWER6 -
+C POWER7 2.87
+
+define(`up', r3)
+define(`vp', r4)
+define(`n', r5)
+
+ASM_START()
+PROLOGUE(mpn_hamdist)
+ std r30, -16(r1)
+ std r31, -8(r1)
+
+ addi r0, n, 1
+ifdef(`HAVE_ABI_mode32',
+` rldicl r0, r0, 63,33', C ...branch count
+` srdi r0, r0, 1') C ...for ctr
+ mtctr r0
+
+ andi. r0, n, 1
+
+ li r0, 0
+ li r12, 0
+
+ beq L(evn)
+
+L(odd): ld r6, 0(up)
+ addi up, up, 8
+ ld r8, 0(vp)
+ addi vp, vp, 8
+ xor r10, r6, r8
+ popcntd r0, r10
+ bdz L(e1)
+
+L(evn): ld r6, 0(up)
+ ld r8, 0(vp)
+ ld r7, 8(up)
+ ld r9, 8(vp)
+ xor r10, r6, r8
+ addi up, up, 16
+ addi vp, vp, 16
+ li r30, 0
+ li r31, 0
+ bdz L(end)
+
+ nop
+ nop
+C ALIGN(16)
+L(top): add r0, r0, r30
+ ld r6, 0(up)
+ ld r8, 0(vp)
+ xor r11, r7, r9
+ popcntd r30, r10
+ add r12, r12, r31
+ ld r7, 8(up)
+ ld r9, 8(vp)
+ xor r10, r6, r8
+ popcntd r31, r11
+ addi up, up, 16
+ addi vp, vp, 16
+ bdnz L(top)
+
+L(end): add r0, r0, r30
+ xor r11, r7, r9
+ popcntd r30, r10
+ add r12, r12, r31
+ popcntd r31, r11
+
+ add r0, r0, r30
+ add r12, r12, r31
+L(e1): add r3, r0, r12
+ ld r30, -16(r1)
+ ld r31, -8(r1)
+ blr
+EPILOGUE()
--- /dev/null
+dnl PowerPC-64 mpn_popcount.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C POWER3/PPC630 -
+C POWER4/PPC970 -
+C POWER5 -
+C POWER6 -
+C POWER7 2
+
+define(`up', r3)
+define(`n', r4)
+
+ASM_START()
+PROLOGUE(mpn_popcount)
+ addi r0, n, 1
+ifdef(`HAVE_ABI_mode32',
+` rldicl r0, r0, 63,33', C ...branch count
+` srdi r0, r0, 1') C ...for ctr
+ mtctr r0
+
+ andi. r0, n, 1
+
+ li r0, 0
+ li r12, 0
+ beq L(evn)
+
+L(odd): ld r4, 0(up)
+ addi up, up, 8
+ popcntd r0, r4
+ bdz L(e1)
+
+L(evn): ld r4, 0(up)
+ ld r5, 8(up)
+ popcntd r8, r4
+ popcntd r9, r5
+ bdz L(e2)
+
+ ld r4, 16(up)
+ ld r5, 24(up)
+ bdz L(e4)
+ addi up, up, 32
+
+L(top): add r0, r0, r8
+ popcntd r8, r4
+ ld r4, 0(up)
+ add r12, r12, r9
+ popcntd r9, r5
+ ld r5, 8(up)
+ addi up, up, 16
+ bdnz L(top)
+
+L(e4): add r0, r0, r8
+ popcntd r8, r4
+ add r12, r12, r9
+ popcntd r9, r5
+L(e2): add r0, r0, r8
+ add r12, r12, r9
+L(e1): add r3, r0, r12
+ blr
+EPILOGUE()
dnl PowerPC-64 mpn_rshift -- rp[] = up[] >> cnt
-dnl Copyright 2003, 2005 Free Software Foundation, Inc.
+dnl Copyright 2003, 2005, 2010, 2011 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
-C cycles/limb
-C POWER3/PPC630: 1.5
-C POWER4/PPC970: 3.0
+C cycles/limb
+C POWER3/PPC630 ?
+C POWER4/PPC970 ?
+C POWER5 2.25
+C POWER6 9.75
+C POWER7 2.15
-C INPUT PARAMETERS
-define(`rp',`r3')
-define(`up',`r4')
-define(`n',`r5')
-define(`cnt',`r6')
+C TODO
+C * Try to reduce the number of needed live registers
+C * Micro-optimise header code
+C * Keep in synch with lshift.asm and lshiftc.asm
-define(`tnc',`r5')
-define(`v0',`r0')
-define(`v1',`r7')
-define(`u0',`r8')
-define(`u1',`r9')
-define(`h0',`r10')
-define(`h1',`r11')
+C INPUT PARAMETERS
+define(`rp', `r3')
+define(`up', `r4')
+define(`n', `r5')
+define(`cnt', `r6')
+define(`tnc',`r0')
+define(`u0',`r30')
+define(`u1',`r31')
+define(`retval',`r5')
ASM_START()
PROLOGUE(mpn_rshift)
+ std r31, -8(r1)
+ std r30, -16(r1)
+ subfic tnc, cnt, 64
+C sldi r30, n, 3 C byte count corresponding to n
+C add rp, rp, r30 C rp = rp + n
+C add up, up, r30 C up = up + n
+ rldicl. r30, n, 0,62 C r30 = n & 3, set cr0
+ cmpdi cr6, r30, 2
+ addi r31, n, 3 C compute count...
+ ld r10, 0(up) C load 1st limb for b00...b11
+ sld retval, r10, tnc
ifdef(`HAVE_ABI_mode32',
-` rldicl n, n, 0, 32') C zero extend n
- mtctr n C copy n to count register
+` rldicl r31, r31, 62,34', C ...branch count
+` srdi r31, r31, 2') C ...for ctr
+ mtctr r31 C copy count into ctr
+ beq cr0, L(b00)
+ blt cr6, L(b01)
+ ld r11, 8(up) C load 2nd limb for b10 and b11
+ beq cr6, L(b10)
+
+ ALIGN(16)
+L(b11): srd r8, r10, cnt
+ sld r9, r11, tnc
+ ld u1, 16(up)
+ addi up, up, 24
+ srd r12, r11, cnt
+ sld r7, u1, tnc
addi rp, rp, -16
- subfic tnc, cnt, 64 C reverse shift count
+ bdnz L(gt3)
- ld u0, 0(up)
- srd h0, u0, cnt
- sld r12, u0, tnc C return value
- bdz L(1) C jump for n = 1
+ or r11, r8, r9
+ srd r8, u1, cnt
+ b L(cj3)
+ ALIGN(16)
+L(gt3): ld u0, 0(up)
+ or r11, r8, r9
+ srd r8, u1, cnt
+ sld r9, u0, tnc
ld u1, 8(up)
- bdz L(2) C jump for n = 2
-
- ldu u0, 16(up)
- bdz L(end) C jump for n = 3
-
-L(oop): sld v1, u1, tnc
- srd h1, u1, cnt
+ or r10, r12, r7
+ b L(L11)
+
+ ALIGN(32)
+L(b10): srd r12, r10, cnt
+ addi rp, rp, -24
+ sld r7, r11, tnc
+ bdnz L(gt2)
+
+ srd r8, r11, cnt
+ or r10, r12, r7
+ b L(cj2)
+
+L(gt2): ld u0, 16(up)
+ srd r8, r11, cnt
+ sld r9, u0, tnc
+ ld u1, 24(up)
+ or r10, r12, r7
+ srd r12, u0, cnt
+ sld r7, u1, tnc
+ ld u0, 32(up)
+ or r11, r8, r9
+ addi up, up, 16
+ b L(L10)
+
+ ALIGN(16)
+L(b00): ld u1, 8(up)
+ srd r12, r10, cnt
+ sld r7, u1, tnc
+ ld u0, 16(up)
+ srd r8, u1, cnt
+ sld r9, u0, tnc
+ ld u1, 24(up)
+ or r10, r12, r7
+ srd r12, u0, cnt
+ sld r7, u1, tnc
+ addi rp, rp, -8
+ bdz L(cj4)
+
+L(gt4): addi up, up, 32
+ ld u0, 0(up)
+ or r11, r8, r9
+ b L(L00)
+
+ ALIGN(16)
+L(b01): bdnz L(gt1)
+ srd r8, r10, cnt
+ std r8, 0(rp)
+ b L(ret)
+
+L(gt1): ld u0, 8(up)
+ srd r8, r10, cnt
+ sld r9, u0, tnc
+ ld u1, 16(up)
+ srd r12, u0, cnt
+ sld r7, u1, tnc
+ ld u0, 24(up)
+ or r11, r8, r9
+ srd r8, u1, cnt
+ sld r9, u0, tnc
+ ld u1, 32(up)
+ addi up, up, 40
+ or r10, r12, r7
+ bdz L(end)
+
+ ALIGN(32)
+L(top): srd r12, u0, cnt
+ sld r7, u1, tnc
+ ld u0, 0(up)
+ std r11, 0(rp)
+ or r11, r8, r9
+L(L00): srd r8, u1, cnt
+ sld r9, u0, tnc
ld u1, 8(up)
- or h0, v1, h0
- stdu h0, 16(rp)
-
- bdz L(exit)
-
- sld v0, u0, tnc
- srd h0, u0, cnt
- ldu u0, 16(up)
- or h1, v0, h1
- std h1, 8(rp)
-
- bdnz L(oop)
-
-L(end): sld v1, u1, tnc
- srd h1, u1, cnt
- or h0, v1, h0
- stdu h0, 16(rp)
- sld v0, u0, tnc
- srd h0, u0, cnt
- or h1, v0, h1
- std h1, 8(rp)
-L(1): std h0, 16(rp)
-ifdef(`HAVE_ABI_mode32',
-` srdi r3, r12, 32
- mr r4, r12
-',` mr r3, r12
-')
- blr
-
-L(exit): sld v0, u0, tnc
- srd h0, u0, cnt
- or h1, v0, h1
- std h1, 8(rp)
-L(2): sld v1, u1, tnc
- srd h1, u1, cnt
- or h0, v1, h0
- stdu h0, 16(rp)
- std h1, 8(rp)
+ std r10, 8(rp)
+ or r10, r12, r7
+L(L11): srd r12, u0, cnt
+ sld r7, u1, tnc
+ ld u0, 16(up)
+ std r11, 16(rp)
+ or r11, r8, r9
+L(L10): srd r8, u1, cnt
+ sld r9, u0, tnc
+ ld u1, 24(up)
+ addi up, up, 32
+ std r10, 24(rp)
+ addi rp, rp, 32
+ or r10, r12, r7
+ bdnz L(top)
+
+ ALIGN(32)
+L(end): srd r12, u0, cnt
+ sld r7, u1, tnc
+ std r11, 0(rp)
+L(cj4): or r11, r8, r9
+ srd r8, u1, cnt
+ std r10, 8(rp)
+L(cj3): or r10, r12, r7
+ std r11, 16(rp)
+L(cj2): std r10, 24(rp)
+ std r8, 32(rp)
+
+L(ret): ld r31, -8(r1)
+ ld r30, -16(r1)
ifdef(`HAVE_ABI_mode32',
-` srdi r3, r12, 32
- mr r4, r12
-',` mr r3, r12
-')
+` srdi r3, retval, 32
+ mr r4, retval
+',` mr r3, retval')
blr
EPILOGUE()
+++ /dev/null
-dnl PowerPC-64 mpn_sqr_diagonal.
-
-dnl Copyright 2001, 2002, 2003, 2005, 2006 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C POWER3/PPC630: 18
-C POWER4/PPC970: 8
-
-C INPUT PARAMETERS
-C rp r3
-C up r4
-C n r5
-
-ASM_START()
-PROLOGUE(mpn_sqr_diagonal)
-ifdef(`HAVE_ABI_mode32',
-` rldicl r5, r5, 0, 32') C zero extend n
- mtctr r5
- ld r0, 0(r4)
- bdz L(end)
- ALIGN(16)
-
-L(top): mulld r5, r0, r0
- mulhdu r6, r0, r0
- ld r0, 8(r4)
- addi r4, r4, 8
- std r5, 0(r3)
- std r6, 8(r3)
- addi r3, r3, 16
- bdnz L(top)
-
-L(end): mulld r5, r0, r0
- mulhdu r6, r0, r0
- std r5, 0(r3)
- std r6, 8(r3)
-
- blr
-EPILOGUE()
--- /dev/null
+dnl PowerPC-64 mpn_tabselect.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C POWER3/PPC630 ?
+C POWER4/PPC970 3.3
+C POWER5 ?
+C POWER6 ?
+C POWER7 2.5
+
+C NOTES
+C * This has not been tuned for any specific processor. Its speed should not
+C be too bad, though.
+C * Using VMX could result in significant speedup for certain CPUs.
+
+C mpn_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
+define(`rp', `r3')
+define(`tp', `r4')
+define(`n', `r5')
+define(`nents', `r6')
+define(`which', `r7')
+
+define(`mask', `r8')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_tabselect)
+ addi r0, n, 1
+ srdi r0, r0, 1 C inner loop count
+ andi. r9, n, 1 C set cr0 for use in inner loop
+ subf which, nents, which
+ sldi n, n, 3
+
+L(outer):
+ mtctr r0 C put inner loop count in ctr
+
+ add r9, which, nents C are we at the selected table entry?
+ addic r9, r9, -1 C set CF iff not selected entry
+ subfe mask, r0, r0
+
+ beq cr0, L(top) C branch to loop entry if n even
+
+ ld r9, 0(tp)
+ addi tp, tp, 8
+ and r9, r9, mask
+ ld r11, 0(rp)
+ andc r11, r11, mask
+ or r9, r9, r11
+ std r9, 0(rp)
+ addi rp, rp, 8
+ bdz L(end)
+
+ ALIGN(16)
+L(top): ld r9, 0(tp)
+ ld r10, 8(tp)
+ addi tp, tp, 16
+ nop
+ and r9, r9, mask
+ and r10, r10, mask
+ ld r11, 0(rp)
+ ld r12, 8(rp)
+ andc r11, r11, mask
+ andc r12, r12, mask
+ or r9, r9, r11
+ or r10, r10, r12
+ std r9, 0(rp)
+ std r10, 8(rp)
+ addi rp, rp, 16
+ bdnz L(top)
+
+L(end): subf rp, n, rp C move rp back to beginning
+ cmpdi cr6, nents, 1
+ addi nents, nents, -1
+ bne cr6, L(outer)
+
+ blr
+EPILOGUE()
dnl PowerPC-32/VMX and PowerPC-64/VMX mpn_popcount.
-dnl Copyright 2006 Free Software Foundation, Inc.
+dnl Copyright 2006, 2010 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
C cycles/limb
-C 7400,7410 (G4): 2.75
-C 744x,745x (G4+): 2.25
-C 970 (G5): 5.3
-
-C STATUS
-C * Works for all sizes and alignments.
+C 7400,7410 (G4): ?
+C 744x,745x (G4+): 1.125
+C 970 (G5): 2.25
C TODO
-C * Tune the awkward huge n outer loop code.
+C * Rewrite the awkward huge n outer loop code.
C * Two lvx, two vperm, and two vxor could make us a similar hamdist.
-C * For the 970, a combined VMX+intop approach might be best.
C * Compress cnsts table in 64-bit mode, only half the values are needed.
define(`GMP_LIMB_BYTES', eval(GMP_LIMB_BITS/8))
define(`OPERATION_popcount')
-ifdef(`OPERATION_popcount',`
- define(`func',`mpn_popcount')
- define(`up', `r3')
- define(`n', `r4')
- define(`HAM', `dnl')
-')
-ifdef(`OPERATION_hamdist',`
- define(`func',`mpn_hamdist')
- define(`up', `r3')
- define(`vp', `r4')
- define(`n', `r5')
- define(`HAM', `$1')
-')
+define(`ap', `r3')
+define(`n', `r4')
-define(`x01010101',`v2')
-define(`x00110011',`v7')
-define(`x00001111',`v10')
-define(`cnt1',`v11')
-define(`cnt2',`v12')
-define(`cnt4',`v13')
+define(`rtab', `v10')
+define(`cnt4', `v11')
ifelse(GMP_LIMB_BITS,32,`
define(`LIMB32',` $1')
C Load various constants into vector registers
LEAL( r11, cnsts)
li r12, 16
- vspltisb cnt1, 1 C 0x0101...01 used as shift count
- vspltisb cnt2, 2 C 0x0202...02 used as shift count
vspltisb cnt4, 4 C 0x0404...04 used as shift count
- lvx x01010101, 0, r11 C 0x3333...33
- lvx x00110011, r12, r11 C 0x5555...55
- vspltisb x00001111, 15 C 0x0f0f...0f
+
+ li r7, 160
+ lvx rtab, 0, r11
LIMB64(`lis r0, LIMBS_CHUNK_THRES ')
LIMB64(`cmpd cr7, n, r0 ')
- lvx v0, 0, up
- addi r7, r11, 96
- rlwinm r6, up, 2,26,29
+ lvx v0, 0, ap
+ addi r7, r11, 80
+ rlwinm r6, ap, 2,26,29
lvx v8, r7, r6
vand v0, v0, v8
-LIMB32(`rlwinm r8, up, 30,30,31 ')
-LIMB64(`rlwinm r8, up, 29,31,31 ')
- add n, n, r8 C compensate n for rounded down `up'
+LIMB32(`rlwinm r8, ap, 30,30,31 ')
+LIMB64(`rlwinm r8, ap, 29,31,31 ')
+ add n, n, r8 C compensate n for rounded down `ap'
vxor v1, v1, v1
li r8, 0 C grand total count
- vxor v3, v3, v3 C zero total count
+ vxor v12, v12, v12 C zero total count
+ vxor v13, v13, v13 C zero total count
addic. n, n, -LIMBS_PER_VR
ble L(sum)
LIMB64(`ble cr7, L(small) ')
LIMB64(`addis r9, n, -LIMBS_PER_CHUNK ') C remaining n
LIMB64(`lis n, LIMBS_PER_CHUNK ')
-L(small):
-
+ ALIGN(16)
+L(small):
LIMB32(`srwi r7, n, 3 ') C loop count corresponding to n
LIMB64(`srdi r7, n, 2 ') C loop count corresponding to n
addi r7, r7, 1
mtctr r7 C copy n to count register
b L(ent)
- ALIGN(8)
-L(top): lvx v0, 0, up
- li r7, 128 C prefetch distance
-L(ent): lvx v1, r12, up
- addi up, up, 32
- vsr v4, v0, cnt1
- vsr v5, v1, cnt1
- dcbt up, r7 C prefetch
- vand v8, v4, x01010101
- vand v9, v5, x01010101
- vsububm v0, v0, v8 C 64 2-bit accumulators (0..2)
- vsububm v1, v1, v9 C 64 2-bit accumulators (0..2)
- vsr v4, v0, cnt2
- vsr v5, v1, cnt2
- vand v8, v0, x00110011
- vand v9, v1, x00110011
- vand v4, v4, x00110011
- vand v5, v5, x00110011
- vaddubm v0, v4, v8 C 32 4-bit accumulators (0..4)
- vaddubm v1, v5, v9 C 32 4-bit accumulators (0..4)
- vaddubm v8, v0, v1 C 32 4-bit accumulators (0..8)
- vsr v9, v8, cnt4
- vand v6, v8, x00001111
- vand v9, v9, x00001111
- vaddubm v6, v9, v6 C 16 8-bit accumulators (0..16)
- vsum4ubs v3, v6, v3 C sum 4 x 4 bytes into 4 32-bit fields
+ ALIGN(16)
+L(top):
+ lvx v0, 0, ap
+L(ent): lvx v1, r12, ap
+ addi ap, ap, 32
+ vsrb v8, v0, cnt4
+ vsrb v9, v1, cnt4
+ vperm v2, rtab, rtab, v0
+ vperm v3, rtab, rtab, v8
+ vperm v4, rtab, rtab, v1
+ vperm v5, rtab, rtab, v9
+ vaddubm v6, v2, v3
+ vaddubm v7, v4, v5
+ vsum4ubs v12, v6, v12
+ vsum4ubs v13, v7, v13
bdnz L(top)
andi. n, n, eval(LIMBS_PER_2VR-1)
beq L(rt)
- lvx v0, 0, up
+ lvx v0, 0, ap
vxor v1, v1, v1
cmpwi n, LIMBS_PER_VR
ble L(sum)
L(lsum):
vor v1, v0, v0
- lvx v0, r12, up
+ lvx v0, r12, ap
L(sum):
LIMB32(`rlwinm r6, n, 4,26,27 ')
LIMB64(`rlwinm r6, n, 5,26,26 ')
- addi r7, r11, 32
+ addi r7, r11, 16
lvx v8, r7, r6
vand v0, v0, v8
-
- vsr v4, v0, cnt1
- vsr v5, v1, cnt1
- vand v8, v4, x01010101
- vand v9, v5, x01010101
- vsububm v0, v0, v8 C 64 2-bit accumulators (0..2)
- vsububm v1, v1, v9 C 64 2-bit accumulators (0..2)
- vsr v4, v0, cnt2
- vsr v5, v1, cnt2
- vand v8, v0, x00110011
- vand v9, v1, x00110011
- vand v4, v4, x00110011
- vand v5, v5, x00110011
- vaddubm v0, v4, v8 C 32 4-bit accumulators (0..4)
- vaddubm v1, v5, v9 C 32 4-bit accumulators (0..4)
- vaddubm v8, v0, v1 C 32 4-bit accumulators (0..8)
- vsr v9, v8, cnt4
- vand v6, v8, x00001111
- vand v9, v9, x00001111
- vaddubm v6, v9, v6 C 16 8-bit accumulators (0..16)
- vsum4ubs v3, v6, v3 C sum 4 x 4 bytes into 4 32-bit fields
-
-L(rt):
+ vsrb v8, v0, cnt4
+ vsrb v9, v1, cnt4
+ vperm v2, rtab, rtab, v0
+ vperm v3, rtab, rtab, v8
+ vperm v4, rtab, rtab, v1
+ vperm v5, rtab, rtab, v9
+ vaddubm v6, v2, v3
+ vaddubm v7, v4, v5
+ vsum4ubs v12, v6, v12
+ vsum4ubs v13, v7, v13
+
+ ALIGN(16)
+L(rt): vadduwm v3, v12, v13
li r7, -16 C FIXME: does all ppc32 and ppc64 ABIs
stvx v3, r7, r1 C FIXME: ...support storing below sp?
C Handle outer loop for huge n. We inherit cr7 and r0 from above.
LIMB64(`ble cr7, L(ret)
- vxor v3, v3, v3 C zero total count
+ vxor v12, v12, v12 C zero total count
+ vxor v13, v13, v13 C zero total count
mr n, r9
cmpd cr7, n, r0
ble cr7, L(2)
b L(top)
')
+ ALIGN(16)
L(ret): mr r3, r8
mtspr 256, r10
blr
EPILOGUE()
DEF_OBJECT(cnsts,16)
- .byte 0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55
- .byte 0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55
-
- .byte 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33
- .byte 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33
+C Counts for vperm
+ .byte 0x00,0x01,0x01,0x02,0x01,0x02,0x02,0x03
+ .byte 0x01,0x02,0x02,0x03,0x02,0x03,0x03,0x04
C Masks for high end of number
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
--- /dev/null
+dnl S/390-32 mpn_copyd
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C cycles/limb
+C cycles/limb
+C z900 1.65
+C z990 1.125
+C z9 ?
+C z10 ?
+C z196 ?
+
+C FIXME:
+C * Avoid saving/restoring callee-saves registers for n < 3. This could be
+C done by setting rp=r1, up=r2, i=r0 and r3,r4,r5 for clock regs.
+C We could then use r3...r10 in main loop.
+
+C INPUT PARAMETERS
+define(`rp_param', `%r2')
+define(`up_param', `%r3')
+define(`n', `%r4')
+
+define(`rp', `%r8')
+define(`up', `%r9')
+
+ASM_START()
+PROLOGUE(mpn_copyd)
+ stm %r6, %r11, 24(%r15)
+
+ lr %r1, n
+ sll %r1, 2
+ la %r10, 8(n)
+ ahi %r1, -32
+ srl %r10, 3
+ lhi %r11, -32
+
+ la rp, 0(%r1,rp_param) C FIXME use lay on z990 and later
+ la up, 0(%r1,up_param) C FIXME use lay on z990 and later
+
+ lhi %r7, 7
+ nr %r7, n C n mod 8
+ chi %r7, 2
+ jh L(b34567)
+ chi %r7, 1
+ je L(b1)
+ jh L(b2)
+
+L(b0): brct %r10, L(top)
+ j L(end)
+
+L(b1): l %r0, 28(up)
+ ahi up, -4
+ st %r0, 28(rp)
+ ahi rp, -4
+ brct %r10, L(top)
+ j L(end)
+
+L(b2): lm %r0, %r1, 24(up)
+ ahi up, -8
+ stm %r0, %r1, 24(rp)
+ ahi rp, -8
+ brct %r10, L(top)
+ j L(end)
+
+L(b34567):
+ chi %r7, 4
+ jl L(b3)
+ je L(b4)
+ chi %r7, 6
+ je L(b6)
+ jh L(b7)
+
+L(b5): lm %r0, %r4, 12(up)
+ ahi up, -20
+ stm %r0, %r4, 12(rp)
+ ahi rp, -20
+ brct %r10, L(top)
+ j L(end)
+
+L(b3): lm %r0, %r2, 20(up)
+ ahi up, -12
+ stm %r0, %r2, 20(rp)
+ ahi rp, -12
+ brct %r10, L(top)
+ j L(end)
+
+L(b4): lm %r0, %r3, 16(up)
+ ahi up, -16
+ stm %r0, %r3, 16(rp)
+ ahi rp, -16
+ brct %r10, L(top)
+ j L(end)
+
+L(b6): lm %r0, %r5, 8(up)
+ ahi up, -24
+ stm %r0, %r5, 8(rp)
+ ahi rp, -24
+ brct %r10, L(top)
+ j L(end)
+
+L(b7): lm %r0, %r6, 4(up)
+ ahi up, -28
+ stm %r0, %r6, 4(rp)
+ ahi rp, -28
+ brct %r10, L(top)
+ j L(end)
+
+L(top): lm %r0, %r7, 0(up)
+ la up, 0(%r11,up)
+ stm %r0, %r7, 0(rp)
+ la rp, 0(%r11,rp)
+ brct %r10, L(top)
+
+L(end): lm %r6, %r11, 24(%r15)
+ br %r14
+EPILOGUE()
--- /dev/null
+dnl S/390-32 mpn_copyi
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 0.75
+C z990 0.375
+C z9 ?
+C z10 ?
+C z196 ?
+
+C NOTE
+C * This is based on GNU libc memcpy which was written by Martin Schwidefsky.
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+
+ASM_START()
+PROLOGUE(mpn_copyi)
+ ltr %r4, %r4
+ sll %r4, 2
+ je L(rtn)
+ ahi %r4, -1
+ lr %r5, %r4
+ srl %r5, 8
+ ltr %r5, %r5 C < 256 bytes to copy?
+ je L(1)
+
+L(top): mvc 0(256, rp), 0(up)
+ la rp, 256(rp)
+ la up, 256(up)
+ brct %r5, L(top)
+
+L(1): bras %r5, L(2) C make r5 point to mvc insn
+ mvc 0(1, rp), 0(up)
+L(2): ex %r4, 0(%r5) C execute mvc with length ((n-1) mod 256)+1
+L(rtn): br %r14
+EPILOGUE()
--- /dev/null
+dnl S/390-32 mpn_addmul_1 for systems with MLR instruction
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 18.5
+C z990 10
+C z9 ?
+C z10 ?
+C z196 ?
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+define(`v0', `%r5')
+
+define(`z', `%r9')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ stm %r9, %r12, 36(%r15)
+ lhi %r12, 0 C zero index reister
+ ahi %r12, 0 C clear carry fla
+ lhi %r11, 0 C clear carry limb
+ lhi z, 0 C clear carry limb
+
+L(top): l %r1, 0(%r12,up)
+ l %r10, 0(%r12,rp)
+ mlr %r0, v0
+ alcr %r1, %r10
+ alcr %r0, z
+ alr %r1, %r11
+ lr %r11, %r0
+ st %r1, 0(%r12,rp)
+ la %r12, 4(%r12)
+ brct n, L(top)
+
+ lhi %r2, 0
+ alcr %r2, %r11
+
+ lm %r9, %r12, 36(%r15)
+ br %r14
+EPILOGUE()
--- /dev/null
+dnl S/390-32 mpn_add_n and mpn_sub_n.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 ?
+C z990 2.75-3 (fast for even n, slow for odd n)
+C z9 ?
+C z10 ?
+C z196 ?
+
+C TODO
+C * Optimise for small n
+C * Use r0 and save/restore one less register
+C * Using logops_n's v1 inner loop operand order make the loop about 20%
+C faster, at the expense of highly alignment-dependent performance.
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`vp', `%r4')
+define(`n', `%r5')
+
+ifdef(`OPERATION_add_n', `
+ define(ADSB, al)
+ define(ADSBCR, alcr)
+ define(ADSBC, alc)
+ define(RETVAL,`dnl
+ lhi %r2, 0
+ alcr %r2, %r2')
+ define(func, mpn_add_n)
+ define(func_nc, mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+ define(ADSB, sl)
+ define(ADSBCR, slbr)
+ define(ADSBC, slb)
+ define(RETVAL,`dnl
+ slbr %r2, %r2
+ lcr %r2, %r2')
+ define(func, mpn_sub_n)
+ define(func_nc, mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_sub_n)
+
+ASM_START()
+PROLOGUE(func)
+ stm %r6, %r8, 24(%r15)
+
+ ahi n, 3
+ lhi %r7, 3
+ lr %r1, n
+ srl %r1, 2
+ nr %r7, n C n mod 4
+ je L(b1)
+ chi %r7, 2
+ jl L(b2)
+ jne L(b0)
+
+L(b3): lm %r5, %r7, 0(up)
+ la up, 12(up)
+ ADSB %r5, 0(vp)
+ ADSBC %r6, 4(vp)
+ ADSBC %r7, 8(vp)
+ la vp, 12(vp)
+ stm %r5, %r7, 0(rp)
+ la rp, 12(rp)
+ brct %r1, L(top)
+ j L(end)
+
+L(b0): lm %r5, %r8, 0(up) C This redundant insns is no mistake,
+ la up, 16(up) C it is needed to make main loop run
+ ADSB %r5, 0(vp) C fast for n = 0 (mod 4).
+ ADSBC %r6, 4(vp)
+ j L(m0)
+
+L(b1): l %r5, 0(up)
+ la up, 4(up)
+ ADSB %r5, 0(vp)
+ la vp, 4(vp)
+ st %r5, 0(rp)
+ la rp, 4(rp)
+ brct %r1, L(top)
+ j L(end)
+
+L(b2): lm %r5, %r6, 0(up)
+ la up, 8(up)
+ ADSB %r5, 0(vp)
+ ADSBC %r6, 4(vp)
+ la vp, 8(vp)
+ stm %r5, %r6, 0(rp)
+ la rp, 8(rp)
+ brct %r1, L(top)
+ j L(end)
+
+L(top): lm %r5, %r8, 0(up)
+ la up, 16(up)
+ ADSBC %r5, 0(vp)
+ ADSBC %r6, 4(vp)
+L(m0): ADSBC %r7, 8(vp)
+ ADSBC %r8, 12(vp)
+ la vp, 16(vp)
+ stm %r5, %r8, 0(rp)
+ la rp, 16(rp)
+ brct %r1, L(top)
+
+L(end): RETVAL
+ lm %r6, %r8, 24(%r15)
+ br %r14
+EPILOGUE()
--- /dev/null
+dnl S/390-32 mpn_addlsh1_n
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 9.25
+C z990 5
+C z9 ?
+C z10 ?
+C z196 ?
+
+C TODO
+C * Optimise for small n
+C * Compute RETVAL for sublsh1_n less stupidly
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`vp', `%r4')
+define(`n', `%r5')
+
+ifdef(`OPERATION_addlsh1_n',`
+ define(ADDSUBC, alr)
+ define(ADDSUBE, alcr)
+ define(INITCY, `lhi %r13, -1')
+ define(RETVAL, `alr %r1, %r13
+ lhi %r2, 2
+ alr %r2, %r1')
+ define(func, mpn_addlsh1_n)
+')
+ifdef(`OPERATION_sublsh1_n',`
+ define(ADDSUBC, slr)
+ define(ADDSUBE, slbr)
+ define(INITCY, `lhi %r13, 0')
+ define(RETVAL, `slr %r1, %r13
+ lhi %r2, 1
+ alr %r2, %r1')
+ define(func, mpn_sublsh1_n)
+')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
+
+ASM_START()
+PROLOGUE(func)
+ stm %r6, %r13, 24(%r15)
+
+ la %r0, 3(n)
+ lhi %r7, 3
+ srl %r0, 2
+ nr %r7, n C n mod 4
+ je L(b0)
+ chi %r7, 2
+ jl L(b1)
+ je L(b2)
+
+L(b3): lm %r5, %r7, 0(up)
+ la up, 12(up)
+ lm %r9, %r11, 0(vp)
+ la vp, 12(vp)
+
+ alr %r9, %r9
+ alcr %r10, %r10
+ alcr %r11, %r11
+ slbr %r1, %r1
+
+ ADDSUBC %r5, %r9
+ ADDSUBE %r6, %r10
+ ADDSUBE %r7, %r11
+ slbr %r13, %r13
+
+ stm %r5, %r7, 0(rp)
+ la rp, 12(rp)
+ brct %r0, L(top)
+ j L(end)
+
+L(b0): lhi %r1, -1
+ INITCY
+ j L(top)
+
+L(b1): l %r5, 0(up)
+ la up, 4(up)
+ l %r9, 0(vp)
+ la vp, 4(vp)
+
+ alr %r9, %r9
+ slbr %r1, %r1
+ ADDSUBC %r5, %r9
+ slbr %r13, %r13
+
+ st %r5, 0(rp)
+ la rp, 4(rp)
+ brct %r0, L(top)
+ j L(end)
+
+L(b2): lm %r5, %r6, 0(up)
+ la up, 8(up)
+ lm %r9, %r10, 0(vp)
+ la vp, 8(vp)
+
+ alr %r9, %r9
+ alcr %r10, %r10
+ slbr %r1, %r1
+
+ ADDSUBC %r5, %r9
+ ADDSUBE %r6, %r10
+ slbr %r13, %r13
+
+ stm %r5, %r6, 0(rp)
+ la rp, 8(rp)
+ brct %r0, L(top)
+ j L(end)
+
+L(top): lm %r9, %r12, 0(vp)
+ la vp, 16(vp)
+
+ ahi %r1, 1 C restore carry
+
+ alcr %r9, %r9
+ alcr %r10, %r10
+ alcr %r11, %r11
+ alcr %r12, %r12
+
+ slbr %r1, %r1 C save carry
+
+ lm %r5, %r8, 0(up)
+ la up, 16(up)
+
+ ahi %r13, 1 C restore carry
+
+ ADDSUBE %r5, %r9
+ ADDSUBE %r6, %r10
+ ADDSUBE %r7, %r11
+ ADDSUBE %r8, %r12
+
+ slbr %r13, %r13
+
+ stm %r5, %r8, 0(rp)
+ la rp, 16(rp)
+ brct %r0, L(top)
+
+L(end):
+ RETVAL
+ lm %r6, %r13, 24(%r15)
+ br %r14
+EPILOGUE()
--- /dev/null
+dnl S/390-32 mpn_bdiv_dbm1c for systems with MLR instruction.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 14
+C z990 10
+C z9 ?
+C z10 ?
+C z196 ?
+
+C INPUT PARAMETERS
+define(`qp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+define(`bd', `%r5')
+define(`cy', `%r6')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_bdiv_dbm1c)
+ stm %r6, %r7, 24(%r15)
+ lhi %r7, 0 C zero index register
+
+L(top): l %r1, 0(%r7,up)
+ mlr %r0, bd
+ slr %r6, %r1
+ st %r6, 0(%r7,qp)
+ slbr %r6, %r0
+ la %r7, 4(%r7)
+ brct n, L(top)
+
+ lr %r2, %r6
+ lm %r6, %r7, 24(%r15)
+ br %r14
+EPILOGUE()
--- /dev/null
+/* S/390-32 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* 1200 MHz IBM z990 running in 32-bit mode */
+
+#define DIVREM_1_NORM_THRESHOLD 0 /* always */
+#define DIVREM_1_UNNORM_THRESHOLD 4
+#define MOD_1_1P_METHOD 2
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 3
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 17
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 8
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 34
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 42
+#define USE_PREINV_DIVREM_1 1
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD 30
+
+#define MUL_TOOM22_THRESHOLD 16
+#define MUL_TOOM33_THRESHOLD 57
+#define MUL_TOOM44_THRESHOLD 147
+#define MUL_TOOM6H_THRESHOLD 226
+#define MUL_TOOM8H_THRESHOLD 333
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 65
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 100
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 97
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 102
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 26
+#define SQR_TOOM3_THRESHOLD 81
+#define SQR_TOOM4_THRESHOLD 154
+#define SQR_TOOM6_THRESHOLD 318
+#define SQR_TOOM8_THRESHOLD 478
+
+#define MULMID_TOOM42_THRESHOLD 38
+
+#define MULMOD_BNM1_THRESHOLD 13
+#define SQRMOD_BNM1_THRESHOLD 15
+
+#define POWM_SEC_TABLE 4,23,262,892,2500
+
+#define MUL_FFT_MODF_THRESHOLD 336 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 336, 5}, { 19, 6}, { 11, 5}, { 23, 6}, \
+ { 17, 7}, { 9, 6}, { 21, 7}, { 11, 6}, \
+ { 24, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \
+ { 31, 7}, { 27, 8}, { 15, 7}, { 33, 8}, \
+ { 19, 7}, { 39, 8}, { 23, 7}, { 47, 8}, \
+ { 27, 9}, { 15, 8}, { 31, 7}, { 63, 8}, \
+ { 39, 9}, { 23, 8}, { 51,10}, { 15, 9}, \
+ { 31, 8}, { 63, 9}, { 39, 8}, { 79, 9}, \
+ { 47,10}, { 31, 9}, { 79,10}, { 47,11}, \
+ { 2048,12}, { 4096,13}, { 8192,14}, { 16384,15}, \
+ { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 41
+#define MUL_FFT_THRESHOLD 2752
+
+#define SQR_FFT_MODF_THRESHOLD 308 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 308, 5}, { 20, 6}, { 11, 5}, { 23, 6}, \
+ { 21, 7}, { 11, 6}, { 24, 7}, { 15, 6}, \
+ { 31, 7}, { 21, 8}, { 11, 7}, { 27, 8}, \
+ { 15, 7}, { 33, 8}, { 19, 7}, { 39, 8}, \
+ { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \
+ { 39, 9}, { 23, 8}, { 47,10}, { 15, 9}, \
+ { 31, 8}, { 63, 9}, { 47,10}, { 31, 9}, \
+ { 79,10}, { 47,11}, { 2048,12}, { 4096,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 35
+#define SQR_FFT_THRESHOLD 2368
+
+#define MULLO_BASECASE_THRESHOLD 5
+#define MULLO_DC_THRESHOLD 49
+#define MULLO_MUL_N_THRESHOLD 5397
+
+#define DC_DIV_QR_THRESHOLD 42
+#define DC_DIVAPPR_Q_THRESHOLD 146
+#define DC_BDIV_QR_THRESHOLD 51
+#define DC_BDIV_Q_THRESHOLD 124
+
+#define INV_MULMOD_BNM1_THRESHOLD 46
+#define INV_NEWTON_THRESHOLD 179
+#define INV_APPR_THRESHOLD 153
+
+#define BINV_NEWTON_THRESHOLD 214
+#define REDC_1_TO_REDC_N_THRESHOLD 55
+
+#define MU_DIV_QR_THRESHOLD 1078
+#define MU_DIVAPPR_Q_THRESHOLD 1078
+#define MUPI_DIV_QR_THRESHOLD 74
+#define MU_BDIV_QR_THRESHOLD 872
+#define MU_BDIV_Q_THRESHOLD 1078
+
+#define MATRIX22_STRASSEN_THRESHOLD 14
+#define HGCD_THRESHOLD 90
+#define HGCD_APPR_THRESHOLD 111
+#define HGCD_REDUCE_THRESHOLD 1962
+#define GCD_DC_THRESHOLD 225
+#define GCDEXT_DC_THRESHOLD 217
+#define JACOBI_BASE_METHOD 2
+
+#define GET_STR_DC_THRESHOLD 13
+#define GET_STR_PRECOMPUTE_THRESHOLD 27
+#define SET_STR_DC_THRESHOLD 274
+#define SET_STR_PRECOMPUTE_THRESHOLD 824
--- /dev/null
+dnl S/390-32 mpn_mul_1 for systems with MLR instruction
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 14
+C z990 9
+C z9 ?
+C z10 ?
+C z196 ?
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+define(`v0', `%r5')
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+ stm %r11, %r12, 44(%r15)
+ lhi %r12, 0 C zero index register
+ ahi %r12, 0 C clear carry flag
+ lhi %r11, 0 C clear carry limb
+
+L(top): l %r1, 0(%r12,up)
+ mlr %r0, v0
+ alcr %r1, %r11
+ lr %r11, %r0 C copy high part to carry limb
+ st %r1, 0(%r12,rp)
+ la %r12, 4(%r12)
+ brct n, L(top)
+
+ lhi %r2, 0
+ alcr %r2, %r11
+
+ lm %r11, %r12, 44(%r15)
+ br %r14
+EPILOGUE()
--- /dev/null
+dnl S/390-32/esame mpn_mul_basecase.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 ?
+C z990 ?
+C z9 ?
+C z10 ?
+C z196 ?
+
+C TODO
+C * Perhaps add special case for un <= 2.
+C * Replace loops by faster code. The mul_1 and addmul_1 loops could be sped
+C up by about 10%.
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`un', `%r4')
+define(`vp', `%r5')
+define(`vn', `%r6')
+
+define(`zero', `%r8')
+
+ASM_START()
+PROLOGUE(mpn_mul_basecase)
+ chi un, 2
+ jhe L(ge2)
+
+C un = vn = 1
+ l %r1, 0(vp)
+ ml %r0, 0(up)
+ st %r1, 0(rp)
+ st %r0, 4(rp)
+ br %r14
+
+L(ge2): C jne L(gen)
+
+
+L(gen):
+C mul_1 =======================================================================
+
+ stm %r6, %r12, 24(%r15)
+ lhi zero, 0
+ ahi un, -1
+
+ l %r7, 0(vp)
+ l %r11, 0(up)
+ lhi %r12, 4 C init index register
+ mlr %r10, %r7
+ lr %r9, un
+ st %r11, 0(rp)
+ cr %r15, %r15 C clear carry flag
+
+L(tm): l %r1, 0(%r12,up)
+ mlr %r0, %r7
+ alcr %r1, %r10
+ lr %r10, %r0 C copy high part to carry limb
+ st %r1, 0(%r12,rp)
+ la %r12, 4(%r12)
+ brct %r9, L(tm)
+
+ alcr %r0, zero
+ st %r0, 0(%r12,rp)
+
+C addmul_1 loop ===============================================================
+
+ ahi vn, -1
+ je L(outer_end)
+L(outer_loop):
+
+ la rp, 4(rp) C rp += 1
+ la vp, 4(vp) C up += 1
+ l %r7, 0(vp)
+ l %r11, 0(up)
+ lhi %r12, 4 C init index register
+ mlr %r10, %r7
+ lr %r9, un
+ al %r11, 0(rp)
+ st %r11, 0(rp)
+
+L(tam): l %r1, 0(%r12,up)
+ l %r11, 0(%r12,rp)
+ mlr %r0, %r7
+ alcr %r1, %r11
+ alcr %r0, zero
+ alr %r1, %r10
+ lr %r10, %r0
+ st %r1, 0(%r12,rp)
+ la %r12, 4(%r12)
+ brct %r9, L(tam)
+
+ alcr %r0, zero
+ st %r0, 0(%r12,rp)
+
+ brct vn, L(outer_loop)
+L(outer_end):
+
+ lm %r6, %r12, 24(%r15)
+ br %r14
+EPILOGUE()
--- /dev/null
+dnl S/390-32 mpn_sqr_basecase.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 ?
+C z990 23
+C z9 ?
+C z10 ?
+C z196 ?
+
+C TODO
+C * Clean up.
+C * Stop iterating addmul_1 loop at latest for n = 2, implement longer tail.
+C This will ask for basecase handling of n = 3.
+C * Update counters and pointers more straightforwardly, possibly lowering
+C register usage.
+C * Should we use this allocation-free style for more sqr_basecase asm
+C implementations? The only disadvantage is that it requires R != U.
+C * Replace loops by faster code. The mul_1 and addmul_1 loops could be sped
+C up by about 10%. The sqr_diag_addlsh1 loop could probably be sped up even
+C more.
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+
+define(`zero', `%r8')
+define(`rp_saved', `%r9')
+define(`up_saved', `%r13')
+define(`n_saved', `%r14')
+
+ASM_START()
+PROLOGUE(mpn_sqr_basecase)
+ ahi n, -2
+ jhe L(ge2)
+
+C n = 1
+ l %r5, 0(up)
+ mlr %r4, %r5
+ st %r5, 0(rp)
+ st %r4, 4(rp)
+ br %r14
+
+L(ge2): jne L(gen)
+
+C n = 2
+ stm %r6, %r8, 24(%r15)
+ lhi zero, 0
+
+ l %r5, 0(up)
+ mlr %r4, %r5 C u0 * u0
+ l %r1, 4(up)
+ mlr %r0, %r1 C u1 * u1
+ st %r5, 0(rp)
+
+ l %r7, 0(up)
+ ml %r6, 4(up) C u0 * u1
+ alr %r7, %r7
+ alcr %r6, %r6
+ alcr %r0, zero
+
+ alr %r4, %r7
+ alcr %r1, %r6
+ alcr %r0, zero
+ st %r4, 4(rp)
+ st %r1, 8(rp)
+ st %r0, 12(rp)
+
+ lm %r6, %r8, 24(%r15)
+ br %r14
+
+L(gen):
+C mul_1 =======================================================================
+
+ stm %r6, %r14, 24(%r15)
+ lhi zero, 0
+ lr up_saved, up
+ lr rp_saved, rp
+ lr n_saved, n
+
+ l %r6, 0(up)
+ l %r11, 4(up)
+ lhi %r12, 8 C init index register
+ mlr %r10, %r6
+ lr %r5, n
+ st %r11, 4(rp)
+ cr %r15, %r15 C clear carry flag
+
+L(tm): l %r1, 0(%r12,up)
+ mlr %r0, %r6
+ alcr %r1, %r10
+ lr %r10, %r0 C copy high part to carry limb
+ st %r1, 0(%r12,rp)
+ la %r12, 4(%r12)
+ brct %r5, L(tm)
+
+ alcr %r0, zero
+ st %r0, 0(%r12,rp)
+
+C addmul_1 loop ===============================================================
+
+ ahi n, -1
+ je L(outer_end)
+L(outer_loop):
+
+ la rp, 8(rp) C rp += 2
+ la up, 4(up) C up += 1
+ l %r6, 0(up)
+ l %r11, 4(up)
+ lhi %r12, 8 C init index register
+ mlr %r10, %r6
+ lr %r5, n
+ al %r11, 4(rp)
+ st %r11, 4(rp)
+
+L(tam): l %r1, 0(%r12,up)
+ l %r7, 0(%r12,rp)
+ mlr %r0, %r6
+ alcr %r1, %r7
+ alcr %r0, zero
+ alr %r1, %r10
+ lr %r10, %r0
+ st %r1, 0(%r12,rp)
+ la %r12, 4(%r12)
+ brct %r5, L(tam)
+
+ alcr %r0, zero
+ st %r0, 0(%r12,rp)
+
+ brct n, L(outer_loop)
+L(outer_end):
+
+ l %r6, 4(up)
+ l %r1, 8(up)
+ lr %r7, %r0 C Same as: l %r7, 12(,rp)
+ mlr %r0, %r6
+ alr %r1, %r7
+ alcr %r0, zero
+ st %r1, 12(rp)
+ st %r0, 16(rp)
+
+C sqr_dia_addlsh1 ============================================================
+
+define(`up', `up_saved')
+define(`rp', `rp_saved')
+ la n, 1(n_saved)
+
+ l %r1, 0(up)
+ mlr %r0, %r1
+ st %r1, 0(rp)
+C clr %r15, %r15 C clear carry (already clear per above)
+
+L(top): l %r11, 4(up)
+ la up, 4(up)
+ l %r6, 4(rp)
+ l %r7, 8(rp)
+ mlr %r10, %r11
+ alcr %r6, %r6
+ alcr %r7, %r7
+ alcr %r10, zero C propagate carry to high product limb
+ alr %r6, %r0
+ alcr %r7, %r11
+ stm %r6, %r7, 4(rp)
+ la rp, 8(rp)
+ lr %r0, %r10 C copy carry limb
+ brct n, L(top)
+
+ alcr %r0, zero
+ st %r0, 4(rp)
+
+ lm %r6, %r14, 24(%r15)
+ br %r14
+EPILOGUE()
--- /dev/null
+dnl S/390-32 mpn_submul_1 for systems with MLR instruction.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 20
+C z990 11
+C z9 ?
+C z10 ?
+C z196 ?
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+define(`v0', `%r5')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+ stm %r9, %r12, 36(%r15)
+ lhi %r12, 0
+ slr %r11, %r11
+
+L(top): l %r1, 0(%r12, up)
+ l %r10, 0(%r12, rp)
+ mlr %r0, v0
+ slbr %r10, %r1
+ slbr %r9, %r9
+ slr %r0, %r9 C conditional incr
+ slr %r10, %r11
+ lr %r11, %r0
+ st %r10, 0(%r12, rp)
+ la %r12, 4(%r12)
+ brct %r4, L(top)
+
+ lr %r2, %r11
+ slbr %r9, %r9
+ slr %r2, %r9
+
+ lm %r9, %r12, 36(%r15)
+ br %r14
+EPILOGUE()
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
#define GMP_LIMB_BITS 32
#define BYTES_PER_MP_LIMB 4
+/* 770 MHz IBM z900 running in 32-bit mode, using just traditional insns */
+
#define DIVREM_1_NORM_THRESHOLD 0 /* always */
#define DIVREM_1_UNNORM_THRESHOLD 5
+#define MOD_1_1P_METHOD 2
#define MOD_1_NORM_THRESHOLD 0 /* always */
-#define MOD_1_UNNORM_THRESHOLD 3
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 13
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 6
+#define MOD_1_UNNORM_THRESHOLD 5
+#define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* never */
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 15
#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 35
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 21
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 30
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
#define USE_PREINV_DIVREM_1 1
-#define DIVREM_2_THRESHOLD 0 /* always */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIVEXACT_1_THRESHOLD 0 /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD 30
+#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
-#define MUL_TOOM22_THRESHOLD 22
-#define MUL_TOOM33_THRESHOLD 89
-#define MUL_TOOM44_THRESHOLD 202
-#define MUL_TOOM6H_THRESHOLD 270
-#define MUL_TOOM8H_THRESHOLD 406
+#define MUL_TOOM22_THRESHOLD 19
+#define MUL_TOOM33_THRESHOLD 114
+#define MUL_TOOM44_THRESHOLD 166
+#define MUL_TOOM6H_THRESHOLD 226
+#define MUL_TOOM8H_THRESHOLD 333
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 129
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 139
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 127
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 106
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 122
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 105
#define MUL_TOOM42_TO_TOOM63_THRESHOLD 113
-#define SQR_BASECASE_THRESHOLD 8
-#define SQR_TOOM2_THRESHOLD 52
-#define SQR_TOOM3_THRESHOLD 125
-#define SQR_TOOM4_THRESHOLD 226
-#define SQR_TOOM6_THRESHOLD 306
-#define SQR_TOOM8_THRESHOLD 430
+#define SQR_BASECASE_THRESHOLD 7
+#define SQR_TOOM2_THRESHOLD 40
+#define SQR_TOOM3_THRESHOLD 126
+#define SQR_TOOM4_THRESHOLD 192
+#define SQR_TOOM6_THRESHOLD 246
+#define SQR_TOOM8_THRESHOLD 357
+
+#define MULMID_TOOM42_THRESHOLD 28
-#define MULMOD_BNM1_THRESHOLD 13
-#define SQRMOD_BNM1_THRESHOLD 17
+#define MULMOD_BNM1_THRESHOLD 12
+#define SQRMOD_BNM1_THRESHOLD 18
-#define MUL_FFT_MODF_THRESHOLD 308 /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD 244 /* k = 5 */
#define MUL_FFT_TABLE3 \
- { { 308, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
- { 17, 7}, { 9, 6}, { 20, 7}, { 11, 6}, \
- { 23, 7}, { 13, 8}, { 7, 7}, { 15, 6}, \
- { 31, 7}, { 19, 8}, { 11, 7}, { 27, 9}, \
- { 7, 8}, { 15, 7}, { 33, 8}, { 19, 7}, \
- { 39, 8}, { 23, 7}, { 47, 8}, { 27, 9}, \
- { 15, 8}, { 39, 9}, { 23, 8}, { 47,10}, \
- { 15, 9}, { 31, 8}, { 63, 9}, { 39, 8}, \
- { 83, 9}, { 47,10}, { 31, 9}, { 79,10}, \
- { 47,11}, { 2048,12}, { 4096,13}, { 8192,14}, \
- { 16384,15}, { 32768,16} }
-#define MUL_FFT_TABLE3_SIZE 42
-#define MUL_FFT_THRESHOLD 3520
-
-#define SQR_FFT_MODF_THRESHOLD 276 /* k = 5 */
+ { { 244, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
+ { 8, 5}, { 17, 6}, { 13, 7}, { 7, 6}, \
+ { 16, 7}, { 9, 6}, { 19, 7}, { 11, 6}, \
+ { 23, 7}, { 13, 8}, { 7, 7}, { 19, 8}, \
+ { 11, 7}, { 25, 9}, { 7, 8}, { 15, 7}, \
+ { 33, 8}, { 19, 7}, { 39, 8}, { 23, 7}, \
+ { 47, 8}, { 27, 9}, { 15, 8}, { 39, 9}, \
+ { 23, 8}, { 47,10}, { 15, 9}, { 31, 8}, \
+ { 63, 9}, { 39, 8}, { 79, 9}, { 47,10}, \
+ { 31, 9}, { 63, 8}, { 127, 9}, { 71, 8}, \
+ { 143, 9}, { 79,10}, { 47,11}, { 2048,12}, \
+ { 4096,13}, { 8192,14}, { 16384,15}, { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 48
+#define MUL_FFT_THRESHOLD 2688
+
+#define SQR_FFT_MODF_THRESHOLD 216 /* k = 5 */
#define SQR_FFT_TABLE3 \
- { { 276, 5}, { 19, 6}, { 17, 7}, { 9, 6}, \
- { 20, 7}, { 11, 6}, { 23, 7}, { 19, 8}, \
- { 11, 7}, { 27, 8}, { 15, 7}, { 33, 8}, \
- { 19, 7}, { 39, 8}, { 23, 7}, { 47, 8}, \
- { 27, 9}, { 15, 8}, { 39, 9}, { 23, 8}, \
- { 47,10}, { 15, 9}, { 31, 8}, { 63, 9}, \
- { 39, 8}, { 79, 9}, { 47,10}, { 31, 9}, \
- { 79,10}, { 47,11}, { 2048,12}, { 4096,13}, \
- { 8192,14}, { 16384,15}, { 32768,16} }
-#define SQR_FFT_TABLE3_SIZE 35
-#define SQR_FFT_THRESHOLD 2688
+ { { 216, 5}, { 7, 4}, { 15, 5}, { 17, 6}, \
+ { 13, 7}, { 7, 6}, { 17, 7}, { 9, 6}, \
+ { 20, 7}, { 11, 6}, { 23, 7}, { 13, 8}, \
+ { 7, 7}, { 19, 8}, { 11, 7}, { 25, 9}, \
+ { 7, 8}, { 15, 7}, { 33, 8}, { 19, 7}, \
+ { 39, 8}, { 23, 9}, { 15, 8}, { 39, 9}, \
+ { 23, 8}, { 47,10}, { 15, 9}, { 31, 8}, \
+ { 63, 9}, { 39, 8}, { 79, 9}, { 47,10}, \
+ { 31, 9}, { 63, 8}, { 127, 9}, { 71, 8}, \
+ { 143, 9}, { 79,10}, { 47,11}, { 2048,12}, \
+ { 4096,13}, { 8192,14}, { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 44
+#define SQR_FFT_THRESHOLD 1856
#define MULLO_BASECASE_THRESHOLD 0 /* always */
-#define MULLO_DC_THRESHOLD 54
-#define MULLO_MUL_N_THRESHOLD 6633
-
-#define DC_DIV_QR_THRESHOLD 52
-#define DC_DIVAPPR_Q_THRESHOLD 185
-#define DC_BDIV_QR_THRESHOLD 53
-#define DC_BDIV_Q_THRESHOLD 122
-
-#define INV_MULMOD_BNM1_THRESHOLD 29
-#define INV_NEWTON_THRESHOLD 260
-#define INV_APPR_THRESHOLD 220
-
-#define BINV_NEWTON_THRESHOLD 230
-#define REDC_1_TO_REDC_N_THRESHOLD 56
-
-#define MU_DIV_QR_THRESHOLD 1142
-#define MU_DIVAPPR_Q_THRESHOLD 1234
-#define MUPI_DIV_QR_THRESHOLD 114
-#define MU_BDIV_QR_THRESHOLD 792
-#define MU_BDIV_Q_THRESHOLD 1099
-
-#define MATRIX22_STRASSEN_THRESHOLD 15
-#define HGCD_THRESHOLD 151
-#define GCD_DC_THRESHOLD 599
-#define GCDEXT_DC_THRESHOLD 460
+#define MULLO_DC_THRESHOLD 61
+#define MULLO_MUL_N_THRESHOLD 5240
+
+#define DC_DIV_QR_THRESHOLD 70
+#define DC_DIVAPPR_Q_THRESHOLD 234
+#define DC_BDIV_QR_THRESHOLD 59
+#define DC_BDIV_Q_THRESHOLD 137
+
+#define INV_MULMOD_BNM1_THRESHOLD 36
+#define INV_NEWTON_THRESHOLD 327
+#define INV_APPR_THRESHOLD 268
+
+#define BINV_NEWTON_THRESHOLD 324
+#define REDC_1_TO_REDC_N_THRESHOLD 63
+
+#define MU_DIV_QR_THRESHOLD 1099
+#define MU_DIVAPPR_Q_THRESHOLD 1360
+#define MUPI_DIV_QR_THRESHOLD 138
+#define MU_BDIV_QR_THRESHOLD 889
+#define MU_BDIV_Q_THRESHOLD 1234
+
+#define MATRIX22_STRASSEN_THRESHOLD 18
+#define HGCD_THRESHOLD 167
+#define GCD_DC_THRESHOLD 518
+#define GCDEXT_DC_THRESHOLD 378
#define JACOBI_BASE_METHOD 2
-#define GET_STR_DC_THRESHOLD 15
-#define GET_STR_PRECOMPUTE_THRESHOLD 35
-#define SET_STR_DC_THRESHOLD 915
-#define SET_STR_PRECOMPUTE_THRESHOLD 1670
+#define GET_STR_DC_THRESHOLD 14
+#define GET_STR_PRECOMPUTE_THRESHOLD 25
+#define SET_STR_DC_THRESHOLD 577
+#define SET_STR_PRECOMPUTE_THRESHOLD 1217
--- /dev/null
+dnl S/390-32 logops.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb variant 1 variant 2 variant 3
+C rp!=up rp=up
+C z900 ? ? ? ?
+C z990 2.5 1 2.75 2.75
+C z9 ? ? ?
+C z10 ? ? ?
+C z196 ? ? ?
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`vp', `%r4')
+define(`nn', `%r5')
+
+ifdef(`OPERATION_and_n',`
+ define(`func',`mpn_and_n')
+ define(`VARIANT_1')
+ define(`LOGOPC',`nc')
+ define(`LOGOP',`n')')
+ifdef(`OPERATION_andn_n',`
+ define(`func',`mpn_andn_n')
+ define(`VARIANT_2')
+ define(`LOGOP',`n')')
+ifdef(`OPERATION_nand_n',`
+ define(`func',`mpn_nand_n')
+ define(`VARIANT_3')
+ define(`LOGOP',`n')')
+ifdef(`OPERATION_ior_n',`
+ define(`func',`mpn_ior_n')
+ define(`VARIANT_1')
+ define(`LOGOPC',`oc')
+ define(`LOGOP',`o')')
+ifdef(`OPERATION_iorn_n',`
+ define(`func',`mpn_iorn_n')
+ define(`VARIANT_2')
+ define(`LOGOP',`o')')
+ifdef(`OPERATION_nior_n',`
+ define(`func',`mpn_nior_n')
+ define(`VARIANT_3')
+ define(`LOGOP',`o')')
+ifdef(`OPERATION_xor_n',`
+ define(`func',`mpn_xor_n')
+ define(`VARIANT_1')
+ define(`LOGOPC',`xc')
+ define(`LOGOP',`x')')
+ifdef(`OPERATION_xnor_n',`
+ define(`func',`mpn_xnor_n')
+ define(`VARIANT_2')
+ define(`LOGOP',`x')')
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+ASM_START()
+PROLOGUE(func)
+ifdef(`VARIANT_1',`
+ cr rp, up
+ jne L(normal)
+
+ sll nn, 2
+ ahi nn, -1
+ lr %r1, nn
+ srl %r1, 8
+ ltr %r1, %r1 C < 256 bytes to copy?
+ je L(1)
+
+L(tp): LOGOPC 0(256, rp), 0(vp)
+ la rp, 256(rp)
+ la vp, 256(vp)
+ brct %r1, L(tp)
+
+L(1): bras %r1, L(2) C make r1 point to mvc insn
+ LOGOPC 0(1, rp), 0(vp)
+L(2): ex nn, 0(%r1) C execute mvc with length ((nn-1) mod 256)+1
+L(rtn): br %r14
+
+
+L(normal):
+ stm %r6, %r8, 12(%r15)
+ ahi nn, 3
+ lhi %r7, 3
+ lr %r0, nn
+ srl %r0, 2
+ nr %r7, nn C nn mod 4
+ je L(b1)
+ chi %r7, 2
+ jl L(b2)
+ jne L(top)
+
+L(b3): lm %r5, %r7, 0(up)
+ la up, 12(up)
+ LOGOP %r5, 0(vp)
+ LOGOP %r6, 4(vp)
+ LOGOP %r7, 8(vp)
+ stm %r5, %r7, 0(rp)
+ la rp, 12(rp)
+ la vp, 12(vp)
+ j L(mid)
+
+L(b1): l %r5, 0(up)
+ la up, 4(up)
+ LOGOP %r5, 0(vp)
+ st %r5, 0(rp)
+ la rp, 4(rp)
+ la vp, 4(vp)
+ j L(mid)
+
+L(b2): lm %r5, %r6, 0(up)
+ la up, 8(up)
+ LOGOP %r5, 0(vp)
+ LOGOP %r6, 4(vp)
+ stm %r5, %r6, 0(rp)
+ la rp, 8(rp)
+ la vp, 8(vp)
+ j L(mid)
+
+L(top): lm %r5, %r8, 0(up)
+ la up, 16(up)
+ LOGOP %r5, 0(vp)
+ LOGOP %r6, 4(vp)
+ LOGOP %r7, 8(vp)
+ LOGOP %r8, 12(vp)
+ stm %r5, %r8, 0(rp)
+ la rp, 16(rp)
+ la vp, 16(vp)
+L(mid): brct %r0, L(top)
+
+ lm %r6, %r8, 12(%r15)
+ br %r14
+')
+
+ifdef(`VARIANT_2',`
+ stm %r6, %r8, 12(%r15)
+ lhi %r1, -1
+
+ ahi nn, 3
+ lhi %r7, 3
+ lr %r0, nn
+ srl %r0, 2
+ nr %r7, nn C nn mod 4
+ je L(b1)
+ chi %r7, 2
+ jl L(b2)
+ jne L(top)
+
+L(b3): lm %r5, %r7, 0(vp)
+ la vp, 12(vp)
+ xr %r5, %r1
+ xr %r6, %r1
+ xr %r7, %r1
+ LOGOP %r5, 0(up)
+ LOGOP %r6, 4(up)
+ LOGOP %r7, 8(up)
+ stm %r5, %r7, 0(rp)
+ la rp, 12(rp)
+ la up, 12(up)
+ j L(mid)
+
+L(b1): l %r5, 0(vp)
+ la vp, 4(vp)
+ xr %r5, %r1
+ LOGOP %r5, 0(up)
+ st %r5, 0(rp)
+ la rp, 4(rp)
+ la up, 4(up)
+ j L(mid)
+
+L(b2): lm %r5, %r6, 0(vp)
+ la vp, 8(vp)
+ xr %r5, %r1
+ xr %r6, %r1
+ LOGOP %r5, 0(up)
+ LOGOP %r6, 4(up)
+ stm %r5, %r6, 0(rp)
+ la rp, 8(rp)
+ la up, 8(up)
+ j L(mid)
+
+L(top): lm %r5, %r8, 0(vp)
+ la vp, 16(vp)
+ xr %r5, %r1
+ xr %r6, %r1
+ xr %r7, %r1
+ xr %r8, %r1
+ LOGOP %r5, 0(up)
+ LOGOP %r6, 4(up)
+ LOGOP %r7, 8(up)
+ LOGOP %r8, 12(up)
+ la up, 16(up)
+ stm %r5, %r8, 0(rp)
+ la rp, 16(rp)
+L(mid): brct %r0, L(top)
+
+ lm %r6, %r8, 12(%r15)
+ br %r14
+')
+
+ifdef(`VARIANT_3',`
+ stm %r6, %r8, 12(%r15)
+ lhi %r1, -1
+
+ ahi nn, 3
+ lhi %r7, 3
+ lr %r0, nn
+ srl %r0, 2
+ nr %r7, nn C nn mod 4
+ je L(b1)
+ chi %r7, 2
+ jl L(b2)
+ jne L(top)
+
+L(b3): lm %r5, %r7, 0(vp)
+ la vp, 12(vp)
+ LOGOP %r5, 0(up)
+ LOGOP %r6, 4(up)
+ xr %r5, %r1
+ xr %r6, %r1
+ LOGOP %r7, 8(up)
+ xr %r7, %r1
+ stm %r5, %r7, 0(rp)
+ la rp, 12(rp)
+ la up, 12(up)
+ j L(mid)
+
+L(b1): l %r5, 0(vp)
+ la vp, 4(vp)
+ LOGOP %r5, 0(up)
+ xr %r5, %r1
+ st %r5, 0(rp)
+ la rp, 4(rp)
+ la up, 4(up)
+ j L(mid)
+
+L(b2): lm %r5, %r6, 0(vp)
+ la vp, 8(vp)
+ LOGOP %r5, 0(up)
+ LOGOP %r6, 4(up)
+ xr %r5, %r1
+ xr %r6, %r1
+ stm %r5, %r6, 0(rp)
+ la rp, 8(rp)
+ la up, 8(up)
+ j L(mid)
+
+L(top): lm %r5, %r8, 0(vp)
+ la vp, 16(vp)
+ LOGOP %r5, 0(up)
+ LOGOP %r6, 4(up)
+ xr %r5, %r1
+ xr %r6, %r1
+ LOGOP %r7, 8(up)
+ LOGOP %r8, 12(up)
+ xr %r7, %r1
+ xr %r8, %r1
+ stm %r5, %r8, 0(rp)
+ la up, 16(up)
+ la rp, 16(rp)
+L(mid): brct %r0, L(top)
+
+ lm %r6, %r8, 12(%r15)
+ br %r14
+')
+
+EPILOGUE()
--- /dev/null
+dnl S/390-32 mpn_lshift.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 6
+C z990 3
+C z9 ?
+C z10 ?
+C z196 ?
+
+C TODO
+C *
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+define(`cnt', `%r5')
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+ lr %r1, n
+ sll %r1, 2
+ stm %r6, %r12, 24(%r15)
+ la up, 0(%r1,up) C put up near end of U
+ la rp, 0(%r1,rp) C put rp near end of R
+ ahi up, -20
+ ahi rp, -16
+ lhi %r8, 32
+ sr %r8, cnt
+ l %r12, 16(up)
+ srl %r12, 0(%r8) C return value
+ lhi %r7, 3
+ nr %r7, n
+ srl n, 2
+ je L(b0)
+ chi %r7, 2
+ jl L(b1)
+ je L(b2)
+
+L(b3): l %r10, 16(up)
+ l %r11, 12(up)
+ l %r9, 8(up)
+ ahi up, -8
+ lr %r8, %r11
+ sldl %r10, 0(cnt)
+ sldl %r8, 0(cnt)
+ st %r10, 12(rp)
+ st %r8, 8(rp)
+ ahi rp, -8
+ ltr n, n
+ je L(end)
+ j L(top)
+
+L(b2): l %r10, 16(up)
+ l %r11, 12(up)
+ ahi up, -4
+ sldl %r10, 0(cnt)
+ st %r10, 12(rp)
+ ahi rp, -4
+ ltr n, n
+ je L(end)
+ j L(top)
+
+L(b1): ltr n, n
+ je L(end)
+ j L(top)
+
+L(b0): l %r10,16(up)
+ l %r8, 12(up)
+ l %r6, 8(up)
+ l %r0, 4(up)
+ ahi up, -12
+ lr %r11, %r8
+ lr %r9, %r6
+ lr %r7, %r0
+ sldl %r10,0(cnt)
+ sldl %r8, 0(cnt)
+ sldl %r6, 0(cnt)
+ st %r10, 12(rp)
+ st %r8, 8(rp)
+ st %r6, 4(rp)
+ ahi rp, -12
+ ahi n, -1
+ je L(end)
+
+ ALIGN(8)
+L(top): l %r10, 16(up)
+ l %r8, 12(up)
+ l %r6, 8(up)
+ l %r0, 4(up)
+ l %r1, 0(up)
+ lr %r11, %r8
+ lr %r9, %r6
+ lr %r7, %r0
+ ahi up, -16
+ sldl %r10, 0(cnt)
+ sldl %r8, 0(cnt)
+ sldl %r6, 0(cnt)
+ sldl %r0, 0(cnt)
+ st %r10, 12(rp)
+ st %r8, 8(rp)
+ st %r6, 4(rp)
+ st %r0, 0(rp)
+ ahi rp, -16
+ brct n, L(top)
+
+L(end): l %r10, 16(up)
+ sll %r10, 0(cnt)
+ st %r10, 12(rp)
+
+ lr %r2, %r12
+ lm %r6, %r12, 24(%r15)
+ br %r14
+EPILOGUE()
--- /dev/null
+dnl S/390-32 mpn_lshiftc.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 7
+C z990 3.375
+C z9 ?
+C z10 ?
+C z196 ?
+
+C TODO
+C *
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+define(`cnt', `%r5')
+
+ASM_START()
+PROLOGUE(mpn_lshiftc)
+ lr %r1, n
+ sll %r1, 2
+ stm %r6, %r13, 24(%r15)
+ la up, 0(%r1,up) C put up near end of U
+ la rp, 0(%r1,rp) C put rp near end of R
+ ahi up, -20
+ ahi rp, -16
+ lhi %r8, 32
+ sr %r8, cnt
+ l %r12, 16(up)
+ srl %r12, 0(%r8) C return value
+ lhi %r13, -1
+ lhi %r7, 3
+ nr %r7, n
+ srl n, 2
+ je L(b0)
+ chi %r7, 2
+ jl L(b1)
+ je L(b2)
+
+L(b3): l %r10, 16(up)
+ l %r11, 12(up)
+ l %r9, 8(up)
+ ahi up, -8
+ lr %r8, %r11
+ sldl %r10, 0(cnt)
+ sldl %r8, 0(cnt)
+ xr %r10, %r13
+ xr %r8, %r13
+ st %r10, 12(rp)
+ st %r8, 8(rp)
+ ahi rp, -8
+ ltr n, n
+ je L(end)
+ j L(top)
+
+L(b2): l %r10, 16(up)
+ l %r11, 12(up)
+ ahi up, -4
+ sldl %r10, 0(cnt)
+ xr %r10, %r13
+ st %r10, 12(rp)
+ ahi rp, -4
+ ltr n, n
+ je L(end)
+ j L(top)
+
+L(b1): ltr n, n
+ je L(end)
+ j L(top)
+
+L(b0): l %r10,16(up)
+ l %r8, 12(up)
+ l %r6, 8(up)
+ l %r0, 4(up)
+ ahi up, -12
+ lr %r11, %r8
+ lr %r9, %r6
+ lr %r7, %r0
+ sldl %r10,0(cnt)
+ sldl %r8, 0(cnt)
+ sldl %r6, 0(cnt)
+ xr %r10, %r13
+ xr %r8, %r13
+ xr %r6, %r13
+ st %r10, 12(rp)
+ st %r8, 8(rp)
+ st %r6, 4(rp)
+ ahi rp, -12
+ ahi n, -1
+ je L(end)
+
+ ALIGN(8)
+L(top): l %r10, 16(up)
+ l %r8, 12(up)
+ l %r6, 8(up)
+ l %r0, 4(up)
+ l %r1, 0(up)
+ lr %r11, %r8
+ lr %r9, %r6
+ lr %r7, %r0
+ ahi up, -16
+ sldl %r10, 0(cnt)
+ sldl %r8, 0(cnt)
+ sldl %r6, 0(cnt)
+ sldl %r0, 0(cnt)
+ xr %r10, %r13
+ xr %r8, %r13
+ xr %r6, %r13
+ xr %r0, %r13
+ st %r10, 12(rp)
+ st %r8, 8(rp)
+ st %r6, 4(rp)
+ st %r0, 0(rp)
+ ahi rp, -16
+ brct n, L(top)
+
+L(end): l %r10, 16(up)
+ sll %r10, 0(cnt)
+ xr %r10, %r13
+ st %r10, 12(rp)
+
+ lr %r2, %r12
+ lm %r6, %r13, 24(%r15)
+ br %r14
+EPILOGUE()
--- /dev/null
+dnl S/390-32 mpn_rshift.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 6
+C z990 3
+C z9 ?
+C z10 ?
+C z196 ?
+
+C TODO
+C *
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+define(`cnt', `%r5')
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+ stm %r6, %r12, 24(%r15)
+ lhi %r8, 32
+ sr %r8, cnt
+ l %r12, 0(up)
+ sll %r12, 0(%r8) C return value
+ lhi %r7, 3
+ nr %r7, n
+ srl n, 2
+ je L(b0)
+ chi %r7, 2
+ jl L(b1)
+ je L(b2)
+
+L(b3): l %r11, 0(up)
+ l %r10, 4(up)
+ l %r8, 8(up)
+ ahi up, 8
+ lr %r9, %r10
+ srdl %r10, 0(cnt)
+ srdl %r8, 0(cnt)
+ st %r11, 0(rp)
+ st %r9, 4(rp)
+ ahi rp, 8
+ ltr n, n
+ je L(end)
+ j L(top)
+
+L(b2): l %r11, 0(up)
+ l %r10, 4(up)
+ ahi up, 4
+ srdl %r10, 0(cnt)
+ st %r11, 0(rp)
+ ahi rp, 4
+ ltr n, n
+ je L(end)
+ j L(top)
+
+L(b1): ltr n, n
+ je L(end)
+ j L(top)
+
+L(b0): l %r11, 0(up)
+ l %r9, 4(up)
+ l %r7, 8(up)
+ l %r1, 12(up)
+ ahi up, 12
+ lr %r10, %r9
+ lr %r8, %r7
+ lr %r6, %r1
+ srdl %r10, 0(cnt)
+ srdl %r8, 0(cnt)
+ srdl %r6, 0(cnt)
+ st %r11, 0(rp)
+ st %r9, 4(rp)
+ st %r7, 8(rp)
+ ahi rp, 12
+ ahi n, -1
+ je L(end)
+
+ ALIGN(8)
+L(top): l %r11, 0(up)
+ l %r9, 4(up)
+ l %r7, 8(up)
+ l %r1, 12(up)
+ l %r0, 16(up)
+ lr %r10, %r9
+ lr %r8, %r7
+ lr %r6, %r1
+ ahi up, 16
+ srdl %r10, 0(cnt)
+ srdl %r8, 0(cnt)
+ srdl %r6, 0(cnt)
+ srdl %r0, 0(cnt)
+ st %r11, 0(rp)
+ st %r9, 4(rp)
+ st %r7, 8(rp)
+ st %r1, 12(rp)
+ ahi rp, 16
+ brct n, L(top)
+
+L(end): l %r11, 0(up)
+ srl %r11, 0(cnt)
+ st %r11, 0(rp)
+
+ lr %r2, %r12
+ lm %r6, %r12, 24(%r15)
+ br %r14
+EPILOGUE()
--- /dev/null
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+
+
+There are 5 generations of 64-but s390 processors, z900, z990, z9,
+z10, and z196. The current GMP code was optimised for the two oldest,
+z900 and z990.
+
+
+mpn_copyi
+
+This code makes use of a loop around MVC. It almost surely runs very
+close to optimally. A small improvement could be done by using one
+MVC for size 256 bytes, now we use two (we use an extra MVC when
+copying any multiple of 256 bytes).
+
+
+mpn_copyd
+
+We have tried several feed-in variants here, branch tree, jump table
+and computed goto. The fastest (on z990) turned out to be computed
+goto.
+
+An approach not tried is EX of LMG and STMG, modifying the register set
+on-the-fly. Using that trick, we could completely avoid using
+separate feed-in paths.
+
+
+mpn_lshift, mpn_rshift
+
+The current code runs at pipeline decode bandwith on z990.
+
+
+mpn_add_n, mpn_sub_n
+
+The current code is 4-way unrolled. It should be unrolled more, at
+least 8x, in order to reach 2.5 c/l.
+
+
+mpn_mul_1, mpn_addmul_1, mpn_submul_1
+
+The current code is very naive, but due to the non-pipelined nature of
+MLGR on z900 and z990, more sophisticated code would not gain much.
+
+On z10 one would need to cluster at least 4 MLGR together, in order to
+reduce stalling.
+
+On z196, one surely want to use unrolling and pipelining, to perhaps
+reach around 12 c/l. A major issue here and on z10 is ALCGR's 3 cycle
+stalling.
+
+
+mpn_mul_2, mpn_addmul_2
+
+At least for older machines (z900, z990) with very slow MLGR, we
+should use Karatsuba's algorithm on 2-limb units, making mul_2 and
+addmul_2 the main multiplicaton primitives. The newer machines might
+benefit less from this approach, perhaps in particular z10, where MLGR
+clustering is more important.
+
+With Karatsuba, one could hope for around 16 cycles per accumulated
+128 cross product, on z990.
--- /dev/null
+dnl S/390-64 mpn_addmul_1
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 34
+C z990 23
+C z9 ?
+C z10 ?
+C z196 ?
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+define(`v0', `%r5')
+
+define(`z', `%r9')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ stmg %r9, %r12, 72(%r15)
+ lghi %r12, 0 C zero index register
+ aghi %r12, 0 C clear carry flag
+ lghi %r11, 0 C clear carry limb
+ lghi z, 0 C keep register zero
+
+L(top): lg %r1, 0(%r12,up)
+ lg %r10, 0(%r12,rp)
+ mlgr %r0, v0
+ alcgr %r1, %r10
+ alcgr %r0, z
+ algr %r1, %r11
+ lgr %r11, %r0
+ stg %r1, 0(%r12,rp)
+ la %r12, 8(%r12)
+ brctg n, L(top)
+
+ lghi %r2, 0
+ alcgr %r2, %r11
+
+ lmg %r9, %r12, 72(%r15)
+ br %r14
+EPILOGUE()
--- /dev/null
+dnl S/390-64 mpn_addlsh1_n and mpn_rsblsh1_n.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 9
+C z990 4.75
+C z9 ?
+C z10 ?
+C z196 ?
+
+C TODO
+C * Optimise for small n, avoid 'la' like in aors_n.asm.
+C * Tune to reach 3.5 c/l. For addlsh1, we could let the main alcgr propagate
+C carry to the lsh1 alcgr.
+C * Compute RETVAL for sublsh1_n less stupidly.
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`vp', `%r4')
+define(`n', `%r5')
+
+ifdef(`OPERATION_addlsh1_n',`
+ define(ADSB, alg)
+ define(ADSBC, alcg)
+ define(INITCY, `lghi %r9, -1')
+ define(RETVAL, `la %r2, 2(%r1,%r9)')
+ define(func, mpn_addlsh1_n)
+')
+ifdef(`OPERATION_rsblsh1_n',`
+ define(ADSB, slg)
+ define(ADSBC, slbg)
+ define(INITCY, `lghi %r9, 0')
+ define(RETVAL,`dnl
+ algr %r1, %r9
+ lghi %r2, 1
+ algr %r2, %r1')
+ define(func, mpn_rsblsh1_n)
+')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_rsblsh1_n)
+
+ASM_START()
+PROLOGUE(func)
+ stmg %r6, %r9, 48(%r15)
+
+ aghi n, 3
+ lghi %r7, 3
+ srlg %r0, n, 2
+ ngr %r7, n C n mod 4
+ je L(b1)
+ cghi %r7, 2
+ jl L(b2)
+ jne L(b0)
+
+L(b3): lmg %r5, %r7, 0(vp)
+ la vp, 24(vp)
+
+ algr %r5, %r5
+ alcgr %r6, %r6
+ alcgr %r7, %r7
+ slbgr %r1, %r1
+
+ ADSB %r5, 0(up)
+ ADSBC %r6, 8(up)
+ ADSBC %r7, 16(up)
+ la up, 24(up)
+ slbgr %r9, %r9
+
+ stmg %r5, %r7, 0(rp)
+ la rp, 24(rp)
+ brctg %r0, L(top)
+ j L(end)
+
+L(b0): lghi %r1, -1
+ INITCY
+ j L(top)
+
+L(b1): lg %r5, 0(vp)
+ la vp, 8(vp)
+
+ algr %r5, %r5
+ slbgr %r1, %r1
+ ADSB %r5, 0(up)
+ la up, 8(up)
+ slbgr %r9, %r9
+
+ stg %r5, 0(rp)
+ la rp, 8(rp)
+ brctg %r0, L(top)
+ j L(end)
+
+L(b2): lmg %r5, %r6, 0(vp)
+ la vp, 16(vp)
+
+ algr %r5, %r5
+ alcgr %r6, %r6
+ slbgr %r1, %r1
+
+ ADSB %r5, 0(up)
+ ADSBC %r6, 8(up)
+ la up, 16(up)
+ slbgr %r9, %r9
+
+ stmg %r5, %r6, 0(rp)
+ la rp, 16(rp)
+ brctg %r0, L(top)
+ j L(end)
+
+L(top): lmg %r5, %r8, 0(vp)
+ la vp, 32(vp)
+
+ aghi %r1, 1 C restore carry
+
+ alcgr %r5, %r5
+ alcgr %r6, %r6
+ alcgr %r7, %r7
+ alcgr %r8, %r8
+
+ slbgr %r1, %r1 C save carry
+
+ aghi %r9, 1 C restore carry
+
+ ADSBC %r5, 0(up)
+ ADSBC %r6, 8(up)
+ ADSBC %r7, 16(up)
+ ADSBC %r8, 24(up)
+ la up, 32(up)
+
+ slbgr %r9, %r9 C save carry
+
+ stmg %r5, %r8, 0(rp)
+ la rp, 32(rp)
+ brctg %r0, L(top)
+
+L(end): RETVAL
+ lmg %r6, %r9, 48(%r15)
+ br %r14
+EPILOGUE()
--- /dev/null
+dnl S/390-64 mpn_add_n and mpn_sub_n.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 5.5
+C z990 3
+C z9 ?
+C z10 ?
+C z196 ?
+
+C TODO
+C * Optimise for small n
+C * Use r0 and save/restore one less register
+C * Using logops_n's v1 inner loop operand order make the loop about 20%
+C faster, at the expense of highly alignment-dependent performance.
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`vp', `%r4')
+define(`n', `%r5')
+
+ifdef(`OPERATION_add_n', `
+ define(ADSB, alg)
+ define(ADSBCR, alcgr)
+ define(ADSBC, alcg)
+ define(RETVAL,`dnl
+ lghi %r2, 0
+ alcgr %r2, %r2')
+ define(func, mpn_add_n)
+ define(func_nc, mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+ define(ADSB, slg)
+ define(ADSBCR, slbgr)
+ define(ADSBC, slbg)
+ define(RETVAL,`dnl
+ slbgr %r2, %r2
+ lcgr %r2, %r2')
+ define(func, mpn_sub_n)
+ define(func_nc, mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_sub_n)
+
+ASM_START()
+PROLOGUE(func)
+ stmg %r6, %r8, 48(%r15)
+
+ aghi n, 3
+ lghi %r7, 3
+ srlg %r1, n, 2
+ ngr %r7, n C n mod 4
+ je L(b1)
+ cghi %r7, 2
+ jl L(b2)
+ jne L(b0)
+
+L(b3): lmg %r5, %r7, 0(up)
+ la up, 24(up)
+ ADSB %r5, 0(vp)
+ ADSBC %r6, 8(vp)
+ ADSBC %r7, 16(vp)
+ la vp, 24(vp)
+ stmg %r5, %r7, 0(rp)
+ la rp, 24(rp)
+ brctg %r1, L(top)
+ j L(end)
+
+L(b0): lmg %r5, %r8, 0(up) C This redundant insns is no mistake,
+ la up, 32(up) C it is needed to make main loop run
+ ADSB %r5, 0(vp) C fast for n = 0 (mod 4).
+ ADSBC %r6, 8(vp)
+ j L(m0)
+
+L(b1): lg %r5, 0(up)
+ la up, 8(up)
+ ADSB %r5, 0(vp)
+ la vp, 8(vp)
+ stg %r5, 0(rp)
+ la rp, 8(rp)
+ brctg %r1, L(top)
+ j L(end)
+
+L(b2): lmg %r5, %r6, 0(up)
+ la up, 16(up)
+ ADSB %r5, 0(vp)
+ ADSBC %r6, 8(vp)
+ la vp, 16(vp)
+ stmg %r5, %r6, 0(rp)
+ la rp, 16(rp)
+ brctg %r1, L(top)
+ j L(end)
+
+L(top): lmg %r5, %r8, 0(up)
+ la up, 32(up)
+ ADSBC %r5, 0(vp)
+ ADSBC %r6, 8(vp)
+L(m0): ADSBC %r7, 16(vp)
+ ADSBC %r8, 24(vp)
+ la vp, 32(vp)
+ stmg %r5, %r8, 0(rp)
+ la rp, 32(rp)
+ brctg %r1, L(top)
+
+L(end): RETVAL
+ lmg %r6, %r8, 48(%r15)
+ br %r14
+EPILOGUE()
--- /dev/null
+dnl S/390-64 mpn_bdiv_dbm1c
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 29
+C z990 22
+C z9 ?
+C z10 ?
+C z196 ?
+
+C INPUT PARAMETERS
+define(`qp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+define(`bd', `%r5')
+define(`cy', `%r6')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_bdiv_dbm1c)
+ stmg %r6, %r7, 48(%r15)
+ lghi %r7, 0 C zero index register
+
+L(top): lg %r1, 0(%r7,up)
+ mlgr %r0, bd
+ slgr %r6, %r1
+ stg %r6, 0(%r7,qp)
+ la %r7, 8(%r7)
+ slbgr %r6, %r0
+ brctg n, L(top)
+
+ lgr %r2, %r6
+ lmg %r6, %r7, 48(%r15)
+ br %r14
+EPILOGUE()
--- /dev/null
+dnl S/390-64 mpn_copyd
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 2.67
+C z990 1.5
+C z9 ?
+C z10 ?
+C z196 ?
+
+C FIXME:
+C * Avoid saving/restoring callee-saves registers for n < 3. This could be
+C done by setting rp=r1, up=r2, i=r0 and r3,r4,r5 for clock regs.
+C We could then use r3...r10 in main loop.
+C * Could we use some EX trick, modifying lmg/stmg, for the feed-in code?
+
+C INPUT PARAMETERS
+define(`rp_param', `%r2')
+define(`up_param', `%r3')
+define(`n', `%r4')
+
+define(`rp', `%r8')
+define(`up', `%r9')
+
+ASM_START()
+PROLOGUE(mpn_copyd)
+ stmg %r6, %r11, 48(%r15)
+
+ sllg %r1, n, 3
+ la %r10, 8(n)
+ aghi %r1, -64
+ srlg %r10, %r10, 3
+ lghi %r11, -64
+
+ la rp, 0(%r1,rp_param) C FIXME use lay on z990 and later
+ la up, 0(%r1,up_param) C FIXME use lay on z990 and later
+
+ lghi %r7, 7
+ ngr %r7, n C n mod 8
+ cghi %r7, 2
+ jh L(b34567)
+ cghi %r7, 1
+ je L(b1)
+ jh L(b2)
+
+L(b0): brctg %r10, L(top)
+ j L(end)
+
+L(b1): lg %r0, 56(up)
+ aghi up, -8
+ stg %r0, 56(rp)
+ aghi rp, -8
+ brctg %r10, L(top)
+ j L(end)
+
+L(b2): lmg %r0, %r1, 48(up)
+ aghi up, -16
+ stmg %r0, %r1, 48(rp)
+ aghi rp, -16
+ brctg %r10, L(top)
+ j L(end)
+
+L(b34567):
+ cghi %r7, 4
+ jl L(b3)
+ je L(b4)
+ cghi %r7, 6
+ je L(b6)
+ jh L(b7)
+
+L(b5): lmg %r0, %r4, 24(up)
+ aghi up, -40
+ stmg %r0, %r4, 24(rp)
+ aghi rp, -40
+ brctg %r10, L(top)
+ j L(end)
+
+L(b3): lmg %r0, %r2, 40(up)
+ aghi up, -24
+ stmg %r0, %r2, 40(rp)
+ aghi rp, -24
+ brctg %r10, L(top)
+ j L(end)
+
+L(b4): lmg %r0, %r3, 32(up)
+ aghi up, -32
+ stmg %r0, %r3, 32(rp)
+ aghi rp, -32
+ brctg %r10, L(top)
+ j L(end)
+
+L(b6): lmg %r0, %r5, 16(up)
+ aghi up, -48
+ stmg %r0, %r5, 16(rp)
+ aghi rp, -48
+ brctg %r10, L(top)
+ j L(end)
+
+L(b7): lmg %r0, %r6, 8(up)
+ aghi up, -56
+ stmg %r0, %r6, 8(rp)
+ aghi rp, -56
+ brctg %r10, L(top)
+ j L(end)
+
+L(top): lmg %r0, %r7, 0(up)
+ la up, 0(%r11,up)
+ stmg %r0, %r7, 0(rp)
+ la rp, 0(%r11,rp)
+ brctg %r10, L(top)
+
+L(end): lmg %r6, %r11, 48(%r15)
+ br %r14
+EPILOGUE()
--- /dev/null
+dnl S/390-64 mpn_copyi
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 1.25
+C z990 0.75
+C z9 ?
+C z10 ?
+C z196 ?
+
+C NOTE
+C * This is based on GNU libc memcpy which was written by Martin Schwidefsky.
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+
+ASM_START()
+PROLOGUE(mpn_copyi)
+ ltgr %r4, %r4
+ sllg %r4, %r4, 3
+ je L(rtn)
+ aghi %r4, -1
+ srlg %r5, %r4, 8
+ ltgr %r5, %r5 C < 256 bytes to copy?
+ je L(1)
+
+L(top): mvc 0(256, rp), 0(up)
+ la rp, 256(rp)
+ la up, 256(up)
+ brctg %r5, L(top)
+
+L(1): bras %r5, L(2) C make r5 point to mvc insn
+ mvc 0(1, rp), 0(up)
+L(2): ex %r4, 0(%r5) C execute mvc with length ((n-1) mod 256)+1
+L(rtn): br %r14
+EPILOGUE()
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
#define GMP_LIMB_BITS 64
#define BYTES_PER_MP_LIMB 8
+/* 1200 MHz z990 */
+
#define DIVREM_1_NORM_THRESHOLD 0 /* always */
#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_1P_METHOD 2
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* never */
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 9
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 19
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 8
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 38
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 19
#define USE_PREINV_DIVREM_1 1
-#define DIVREM_2_THRESHOLD 0 /* always */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIVEXACT_1_THRESHOLD 0 /* always */
-#define BMOD_1_TO_MOD_1_THRESHOLD 101
-
-#define MUL_TOOM22_THRESHOLD 14
-#define MUL_TOOM33_THRESHOLD 74
-#define MUL_TOOM44_THRESHOLD 118
-#define MUL_TOOM6H_THRESHOLD 157
-#define MUL_TOOM8H_THRESHOLD 236
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 84
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 81
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 72
-
-#define SQR_BASECASE_THRESHOLD 4
-#define SQR_TOOM2_THRESHOLD 26
-#define SQR_TOOM3_THRESHOLD 87
-#define SQR_TOOM4_THRESHOLD 136
-#define SQR_TOOM6_THRESHOLD 171
-#define SQR_TOOM8_THRESHOLD 246
+#define BMOD_1_TO_MOD_1_THRESHOLD 88
+
+#define MUL_TOOM22_THRESHOLD 10
+#define MUL_TOOM33_THRESHOLD 41
+#define MUL_TOOM44_THRESHOLD 104
+#define MUL_TOOM6H_THRESHOLD 149
+#define MUL_TOOM8H_THRESHOLD 212
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 65
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 69
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 73
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 66
+
+#define SQR_BASECASE_THRESHOLD 0
+#define SQR_TOOM2_THRESHOLD 16
+#define SQR_TOOM3_THRESHOLD 57
+#define SQR_TOOM4_THRESHOLD 154
+#define SQR_TOOM6_THRESHOLD 206
+#define SQR_TOOM8_THRESHOLD 309
+
+#define MULMID_TOOM42_THRESHOLD 20
#define MULMOD_BNM1_THRESHOLD 9
#define SQRMOD_BNM1_THRESHOLD 11
-#define MUL_FFT_MODF_THRESHOLD 212 /* k = 5 */
+#define POWM_SEC_TABLE 4,23,128,598
+
+#define MUL_FFT_MODF_THRESHOLD 220 /* k = 5 */
#define MUL_FFT_TABLE3 \
- { { 212, 5}, { 9, 6}, { 5, 5}, { 11, 6}, \
- { 6, 5}, { 13, 6}, { 13, 7}, { 7, 6}, \
- { 17, 7}, { 13, 8}, { 7, 7}, { 17, 8}, \
+ { { 220, 5}, { 7, 4}, { 15, 5}, { 8, 4}, \
+ { 17, 5}, { 11, 6}, { 6, 5}, { 13, 6}, \
+ { 7, 5}, { 15, 6}, { 13, 7}, { 7, 6}, \
+ { 15, 7}, { 8, 6}, { 17, 7}, { 9, 6}, \
+ { 19, 7}, { 13, 8}, { 7, 7}, { 17, 8}, \
{ 9, 7}, { 19, 8}, { 11, 7}, { 23, 8}, \
{ 13, 9}, { 7, 8}, { 19, 9}, { 11, 8}, \
- { 23,10}, { 7, 9}, { 15, 8}, { 31, 9}, \
- { 19, 8}, { 41, 9}, { 23,10}, { 15, 9}, \
- { 39,10}, { 23, 9}, { 47,11}, { 15,10}, \
- { 31, 9}, { 67,10}, { 39, 9}, { 79,10}, \
- { 47,11}, { 2048,12}, { 4096,13}, { 8192,14}, \
- { 16384,15}, { 32768,16}, { 65536,17}, { 131072,18}, \
- { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
- {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 50
+ { 25,10}, { 7, 9}, { 15, 8}, { 33, 9}, \
+ { 19, 8}, { 39, 9}, { 23,10}, { 15, 9}, \
+ { 39,10}, { 23,11}, { 15,10}, { 31, 9}, \
+ { 63,10}, { 39, 9}, { 79,10}, { 47,11}, \
+ { 31,10}, { 63, 9}, { 127, 8}, { 255,10}, \
+ { 71, 9}, { 143, 8}, { 287,10}, { 79,11}, \
+ { 47,12}, { 31,11}, { 63,10}, { 127, 9}, \
+ { 255, 8}, { 511,10}, { 143,11}, { 79,10}, \
+ { 159, 9}, { 319,10}, { 175, 9}, { 351, 8}, \
+ { 703,11}, { 95,10}, { 191, 9}, { 383,10}, \
+ { 207,11}, { 111,10}, { 223,12}, { 63,11}, \
+ { 127,10}, { 255, 9}, { 511,11}, { 143,10}, \
+ { 287, 9}, { 575, 8}, { 1151,10}, { 319,11}, \
+ { 175,10}, { 351, 9}, { 703,12}, { 95,11}, \
+ { 191,10}, { 383, 9}, { 767,11}, { 207,10}, \
+ { 415, 9}, { 831,11}, { 223,13}, { 63,12}, \
+ { 127,11}, { 255,10}, { 511,11}, { 287,10}, \
+ { 575, 9}, { 1151,12}, { 159,11}, { 319,10}, \
+ { 639,11}, { 351,10}, { 703, 9}, { 1407, 8}, \
+ { 2815,11}, { 383,10}, { 767,11}, { 415,10}, \
+ { 831,12}, { 223,11}, { 447, 9}, { 1791,11}, \
+ { 479,13}, { 8192,14}, { 16384,15}, { 32768,16}, \
+ { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
+ {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 124
#define MUL_FFT_THRESHOLD 2240
#define SQR_FFT_MODF_THRESHOLD 184 /* k = 5 */
#define SQR_FFT_TABLE3 \
- { { 184, 5}, { 11, 6}, { 13, 7}, { 7, 6}, \
- { 15, 7}, { 13, 8}, { 7, 7}, { 16, 8}, \
- { 9, 7}, { 19, 8}, { 11, 7}, { 23, 8}, \
- { 13, 9}, { 7, 8}, { 19, 9}, { 11, 8}, \
- { 23,10}, { 7, 9}, { 15, 8}, { 31, 9}, \
- { 23,10}, { 15, 9}, { 39,10}, { 23,11}, \
- { 15,10}, { 31, 9}, { 63, 8}, { 127,10}, \
- { 47,11}, { 2048,12}, { 4096,13}, { 8192,14}, \
+ { { 184, 5}, { 6, 4}, { 13, 5}, { 13, 6}, \
+ { 7, 5}, { 15, 6}, { 15, 7}, { 8, 6}, \
+ { 17, 7}, { 16, 8}, { 9, 7}, { 19, 8}, \
+ { 11, 7}, { 23, 8}, { 13, 9}, { 7, 8}, \
+ { 19, 9}, { 11, 8}, { 25,10}, { 7, 9}, \
+ { 15, 8}, { 31, 9}, { 23,10}, { 15, 9}, \
+ { 39,10}, { 23,11}, { 15,10}, { 31, 9}, \
+ { 63,10}, { 47,11}, { 31,10}, { 63, 9}, \
+ { 127, 8}, { 255,10}, { 71, 9}, { 143, 8}, \
+ { 287, 7}, { 575,10}, { 79,11}, { 47,12}, \
+ { 31,11}, { 63,10}, { 127, 9}, { 255,10}, \
+ { 143, 9}, { 287,11}, { 79,10}, { 159, 9}, \
+ { 319, 8}, { 639,10}, { 175, 9}, { 351,11}, \
+ { 95,10}, { 191, 9}, { 383, 8}, { 767,11}, \
+ { 111,10}, { 223,12}, { 63,11}, { 127,10}, \
+ { 255, 9}, { 511,11}, { 143,10}, { 287, 9}, \
+ { 575,11}, { 159,10}, { 319, 9}, { 639,11}, \
+ { 175,10}, { 351,12}, { 95,11}, { 191,10}, \
+ { 383, 9}, { 767,11}, { 207,10}, { 415, 9}, \
+ { 831,11}, { 223,13}, { 63,12}, { 127,11}, \
+ { 255,10}, { 511,11}, { 287,10}, { 575,12}, \
+ { 159,11}, { 319,10}, { 639,11}, { 351,10}, \
+ { 703,12}, { 191,11}, { 383,10}, { 767,11}, \
+ { 415,12}, { 223,11}, { 447,13}, { 8192,14}, \
{ 16384,15}, { 32768,16}, { 65536,17}, { 131072,18}, \
{ 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
{4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 42
-#define SQR_FFT_THRESHOLD 1728
-
-#define MULLO_BASECASE_THRESHOLD 2
-#define MULLO_DC_THRESHOLD 45
-#define MULLO_MUL_N_THRESHOLD 4392
-
-#define DC_DIV_QR_THRESHOLD 40
-#define DC_DIVAPPR_Q_THRESHOLD 154
-#define DC_BDIV_QR_THRESHOLD 42
-#define DC_BDIV_Q_THRESHOLD 102
-
-#define INV_MULMOD_BNM1_THRESHOLD 26
-#define INV_NEWTON_THRESHOLD 226
-#define INV_APPR_THRESHOLD 171
-
-#define BINV_NEWTON_THRESHOLD 222
-#define REDC_1_TO_REDC_N_THRESHOLD 46
-
-#define MU_DIV_QR_THRESHOLD 855
-#define MU_DIVAPPR_Q_THRESHOLD 942
-#define MUPI_DIV_QR_THRESHOLD 99
-#define MU_BDIV_QR_THRESHOLD 680
-#define MU_BDIV_Q_THRESHOLD 855
-
-#define MATRIX22_STRASSEN_THRESHOLD 15
-#define HGCD_THRESHOLD 89
-#define GCD_DC_THRESHOLD 273
-#define GCDEXT_DC_THRESHOLD 209
-#define JACOBI_BASE_METHOD 2
-
-#define GET_STR_DC_THRESHOLD 32
-#define GET_STR_PRECOMPUTE_THRESHOLD 47
-#define SET_STR_DC_THRESHOLD 532
-#define SET_STR_PRECOMPUTE_THRESHOLD 1336
+#define SQR_FFT_TABLE3_SIZE 106
+#define SQR_FFT_THRESHOLD 1600
+
+#define MULLO_BASECASE_THRESHOLD 3
+#define MULLO_DC_THRESHOLD 33
+#define MULLO_MUL_N_THRESHOLD 5240
+
+#define DC_DIV_QR_THRESHOLD 28
+#define DC_DIVAPPR_Q_THRESHOLD 106
+#define DC_BDIV_QR_THRESHOLD 31
+#define DC_BDIV_Q_THRESHOLD 78
+
+#define INV_MULMOD_BNM1_THRESHOLD 43
+#define INV_NEWTON_THRESHOLD 130
+#define INV_APPR_THRESHOLD 117
+
+#define BINV_NEWTON_THRESHOLD 149
+#define REDC_1_TO_REDC_N_THRESHOLD 38
+
+#define MU_DIV_QR_THRESHOLD 680
+#define MU_DIVAPPR_Q_THRESHOLD 748
+#define MUPI_DIV_QR_THRESHOLD 66
+#define MU_BDIV_QR_THRESHOLD 562
+#define MU_BDIV_Q_THRESHOLD 680
+
+#define MATRIX22_STRASSEN_THRESHOLD 11
+#define HGCD_THRESHOLD 75
+#define HGCD_APPR_THRESHOLD 59
+#define HGCD_REDUCE_THRESHOLD 901
+#define GCD_DC_THRESHOLD 186
+#define GCDEXT_DC_THRESHOLD 150
+#define JACOBI_BASE_METHOD 3
+
+#define GET_STR_DC_THRESHOLD 27
+#define GET_STR_PRECOMPUTE_THRESHOLD 40
+#define SET_STR_DC_THRESHOLD 418
+#define SET_STR_PRECOMPUTE_THRESHOLD 1111
--- /dev/null
+dnl S/390-64 mpn_invert_limb
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 142
+C z990 86
+C z9 ?
+C z10 ?
+C z196 ?
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_invert_limb)
+ stg %r9, 72(%r15)
+ srlg %r9, %r2, 55
+ agr %r9, %r9
+ larl %r4, approx_tab-512
+ srlg %r3, %r2, 24
+ aghi %r3, 1
+ lghi %r5, 1
+ llgh %r4, 0(%r9, %r4)
+ sllg %r9, %r4, 11
+ msgr %r4, %r4
+ msgr %r4, %r3
+ srlg %r4, %r4, 40
+ aghi %r9, -1
+ sgr %r9, %r4
+ sllg %r0, %r9, 60
+ sllg %r1, %r9, 13
+ msgr %r9, %r9
+ msgr %r9, %r3
+ sgr %r0, %r9
+ ngr %r5, %r2
+ srlg %r4, %r2, 1
+ srlg %r3, %r0, 47
+ agr %r3, %r1
+ agr %r4, %r5
+ msgr %r4, %r3
+ srlg %r1, %r3, 1
+ lcgr %r5, %r5
+ ngr %r1, %r5
+ sgr %r1, %r4
+ mlgr %r0, %r3
+ srlg %r9, %r0, 1
+ sllg %r4, %r3, 31
+ agr %r4, %r9
+ lgr %r1, %r4
+ mlgr %r0, %r2
+ algr %r1, %r2
+ alcgr %r0, %r2
+ lgr %r2, %r4
+ sgr %r2, %r0
+ lg %r9, 72(%r15)
+ br %r14
+EPILOGUE()
+ RODATA
+ ALIGN(2)
+approx_tab:
+ .word 0x7fd,0x7f5,0x7ed,0x7e5,0x7dd,0x7d5,0x7ce,0x7c6
+ .word 0x7bf,0x7b7,0x7b0,0x7a8,0x7a1,0x79a,0x792,0x78b
+ .word 0x784,0x77d,0x776,0x76f,0x768,0x761,0x75b,0x754
+ .word 0x74d,0x747,0x740,0x739,0x733,0x72c,0x726,0x720
+ .word 0x719,0x713,0x70d,0x707,0x700,0x6fa,0x6f4,0x6ee
+ .word 0x6e8,0x6e2,0x6dc,0x6d6,0x6d1,0x6cb,0x6c5,0x6bf
+ .word 0x6ba,0x6b4,0x6ae,0x6a9,0x6a3,0x69e,0x698,0x693
+ .word 0x68d,0x688,0x683,0x67d,0x678,0x673,0x66e,0x669
+ .word 0x664,0x65e,0x659,0x654,0x64f,0x64a,0x645,0x640
+ .word 0x63c,0x637,0x632,0x62d,0x628,0x624,0x61f,0x61a
+ .word 0x616,0x611,0x60c,0x608,0x603,0x5ff,0x5fa,0x5f6
+ .word 0x5f1,0x5ed,0x5e9,0x5e4,0x5e0,0x5dc,0x5d7,0x5d3
+ .word 0x5cf,0x5cb,0x5c6,0x5c2,0x5be,0x5ba,0x5b6,0x5b2
+ .word 0x5ae,0x5aa,0x5a6,0x5a2,0x59e,0x59a,0x596,0x592
+ .word 0x58e,0x58a,0x586,0x583,0x57f,0x57b,0x577,0x574
+ .word 0x570,0x56c,0x568,0x565,0x561,0x55e,0x55a,0x556
+ .word 0x553,0x54f,0x54c,0x548,0x545,0x541,0x53e,0x53a
+ .word 0x537,0x534,0x530,0x52d,0x52a,0x526,0x523,0x520
+ .word 0x51c,0x519,0x516,0x513,0x50f,0x50c,0x509,0x506
+ .word 0x503,0x500,0x4fc,0x4f9,0x4f6,0x4f3,0x4f0,0x4ed
+ .word 0x4ea,0x4e7,0x4e4,0x4e1,0x4de,0x4db,0x4d8,0x4d5
+ .word 0x4d2,0x4cf,0x4cc,0x4ca,0x4c7,0x4c4,0x4c1,0x4be
+ .word 0x4bb,0x4b9,0x4b6,0x4b3,0x4b0,0x4ad,0x4ab,0x4a8
+ .word 0x4a5,0x4a3,0x4a0,0x49d,0x49b,0x498,0x495,0x493
+ .word 0x490,0x48d,0x48b,0x488,0x486,0x483,0x481,0x47e
+ .word 0x47c,0x479,0x477,0x474,0x472,0x46f,0x46d,0x46a
+ .word 0x468,0x465,0x463,0x461,0x45e,0x45c,0x459,0x457
+ .word 0x455,0x452,0x450,0x44e,0x44b,0x449,0x447,0x444
+ .word 0x442,0x440,0x43e,0x43b,0x439,0x437,0x435,0x432
+ .word 0x430,0x42e,0x42c,0x42a,0x428,0x425,0x423,0x421
+ .word 0x41f,0x41d,0x41b,0x419,0x417,0x414,0x412,0x410
+ .word 0x40e,0x40c,0x40a,0x408,0x406,0x404,0x402,0x400
+ASM_END()
--- /dev/null
+dnl S/390-64 logops.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb variant 1 variant 2 variant 3
+C rp!=up rp=up
+C z900 4.5 2.25 5.5 5.5
+C z990 2.75 2 3.25 3.25
+C z9 ? ? ?
+C z10 ? ? ?
+C z196 ? ? ?
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`vp', `%r4')
+define(`n', `%r5')
+
+ifdef(`OPERATION_and_n',`
+ define(`func',`mpn_and_n')
+ define(`VARIANT_1')
+ define(`LOGOPC',`nc')
+ define(`LOGOP',`ng')')
+ifdef(`OPERATION_andn_n',`
+ define(`func',`mpn_andn_n')
+ define(`VARIANT_2')
+ define(`LOGOP',`ng')')
+ifdef(`OPERATION_nand_n',`
+ define(`func',`mpn_nand_n')
+ define(`VARIANT_3')
+ define(`LOGOP',`ng')')
+ifdef(`OPERATION_ior_n',`
+ define(`func',`mpn_ior_n')
+ define(`VARIANT_1')
+ define(`LOGOPC',`oc')
+ define(`LOGOP',`og')')
+ifdef(`OPERATION_iorn_n',`
+ define(`func',`mpn_iorn_n')
+ define(`VARIANT_2')
+ define(`LOGOP',`og')')
+ifdef(`OPERATION_nior_n',`
+ define(`func',`mpn_nior_n')
+ define(`VARIANT_3')
+ define(`LOGOP',`og')')
+ifdef(`OPERATION_xor_n',`
+ define(`func',`mpn_xor_n')
+ define(`VARIANT_1')
+ define(`LOGOPC',`xc')
+ define(`LOGOP',`xg')')
+ifdef(`OPERATION_xnor_n',`
+ define(`func',`mpn_xnor_n')
+ define(`VARIANT_2')
+ define(`LOGOP',`xg')')
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+ASM_START()
+PROLOGUE(func)
+ifdef(`VARIANT_1',`
+ cgr rp, up
+ jne L(normal)
+
+ sllg n, n, 3
+ aghi n, -1
+ srlg %r1, n, 8
+ ltgr %r1, %r1 C < 256 bytes to copy?
+ je L(1)
+
+L(tp): LOGOPC 0(256, rp), 0(vp)
+ la rp, 256(rp)
+ la vp, 256(vp)
+ brctg %r1, L(tp)
+
+L(1): bras %r1, L(2) C make r1 point to mvc insn
+ LOGOPC 0(1, rp), 0(vp)
+L(2): ex n, 0(%r1) C execute mvc with length ((n-1) mod 256)+1
+L(rtn): br %r14
+
+
+L(normal):
+ stmg %r6, %r8, 48(%r15)
+ aghi n, 3
+ lghi %r7, 3
+ srlg %r0, n, 2
+ ngr %r7, n C n mod 4
+ je L(b1)
+ cghi %r7, 2
+ jl L(b2)
+ jne L(top)
+
+L(b3): lmg %r5, %r7, 0(up)
+ la up, 24(up)
+ LOGOP %r5, 0(vp)
+ LOGOP %r6, 8(vp)
+ LOGOP %r7, 16(vp)
+ stmg %r5, %r7, 0(rp)
+ la rp, 24(rp)
+ la vp, 24(vp)
+ j L(mid)
+
+L(b1): lg %r5, 0(up)
+ la up, 8(up)
+ LOGOP %r5, 0(vp)
+ stg %r5, 0(rp)
+ la rp, 8(rp)
+ la vp, 8(vp)
+ j L(mid)
+
+L(b2): lmg %r5, %r6, 0(up)
+ la up, 16(up)
+ LOGOP %r5, 0(vp)
+ LOGOP %r6, 8(vp)
+ stmg %r5, %r6, 0(rp)
+ la rp, 16(rp)
+ la vp, 16(vp)
+ j L(mid)
+
+L(top): lmg %r5, %r8, 0(up)
+ la up, 32(up)
+ LOGOP %r5, 0(vp)
+ LOGOP %r6, 8(vp)
+ LOGOP %r7, 16(vp)
+ LOGOP %r8, 24(vp)
+ stmg %r5, %r8, 0(rp)
+ la rp, 32(rp)
+ la vp, 32(vp)
+L(mid): brctg %r0, L(top)
+
+ lmg %r6, %r8, 48(%r15)
+ br %r14
+')
+
+ifdef(`VARIANT_2',`
+ stmg %r6, %r8, 48(%r15)
+ lghi %r1, -1
+
+ aghi n, 3
+ lghi %r7, 3
+ srlg %r0, n, 2
+ ngr %r7, n C n mod 4
+ je L(b1)
+ cghi %r7, 2
+ jl L(b2)
+ jne L(top)
+
+L(b3): lmg %r5, %r7, 0(vp)
+ la vp, 24(vp)
+ xgr %r5, %r1
+ xgr %r6, %r1
+ xgr %r7, %r1
+ LOGOP %r5, 0(up)
+ LOGOP %r6, 8(up)
+ LOGOP %r7, 16(up)
+ stmg %r5, %r7, 0(rp)
+ la rp, 24(rp)
+ la up, 24(up)
+ j L(mid)
+
+L(b1): lg %r5, 0(vp)
+ la vp, 8(vp)
+ xgr %r5, %r1
+ LOGOP %r5, 0(up)
+ stg %r5, 0(rp)
+ la rp, 8(rp)
+ la up, 8(up)
+ j L(mid)
+
+L(b2): lmg %r5, %r6, 0(vp)
+ la vp, 16(vp)
+ xgr %r5, %r1
+ xgr %r6, %r1
+ LOGOP %r5, 0(up)
+ LOGOP %r6, 8(up)
+ stmg %r5, %r6, 0(rp)
+ la rp, 16(rp)
+ la up, 16(up)
+ j L(mid)
+
+L(top): lmg %r5, %r8, 0(vp)
+ la vp, 32(vp)
+ xgr %r5, %r1
+ xgr %r6, %r1
+ xgr %r7, %r1
+ xgr %r8, %r1
+ LOGOP %r5, 0(up)
+ LOGOP %r6, 8(up)
+ LOGOP %r7, 16(up)
+ LOGOP %r8, 24(up)
+ la up, 32(up)
+ stmg %r5, %r8, 0(rp)
+ la rp, 32(rp)
+L(mid): brctg %r0, L(top)
+
+ lmg %r6, %r8, 48(%r15)
+ br %r14
+')
+
+ifdef(`VARIANT_3',`
+ stmg %r6, %r8, 48(%r15)
+ lghi %r1, -1
+
+ aghi n, 3
+ lghi %r7, 3
+ srlg %r0, n, 2
+ ngr %r7, n C n mod 4
+ je L(b1)
+ cghi %r7, 2
+ jl L(b2)
+ jne L(top)
+
+L(b3): lmg %r5, %r7, 0(vp)
+ la vp, 24(vp)
+ LOGOP %r5, 0(up)
+ LOGOP %r6, 8(up)
+ xgr %r5, %r1
+ xgr %r6, %r1
+ LOGOP %r7, 16(up)
+ xgr %r7, %r1
+ stmg %r5, %r7, 0(rp)
+ la rp, 24(rp)
+ la up, 24(up)
+ j L(mid)
+
+L(b1): lg %r5, 0(vp)
+ la vp, 8(vp)
+ LOGOP %r5, 0(up)
+ xgr %r5, %r1
+ stg %r5, 0(rp)
+ la rp, 8(rp)
+ la up, 8(up)
+ j L(mid)
+
+L(b2): lmg %r5, %r6, 0(vp)
+ la vp, 16(vp)
+ LOGOP %r5, 0(up)
+ LOGOP %r6, 8(up)
+ xgr %r5, %r1
+ xgr %r6, %r1
+ stmg %r5, %r6, 0(rp)
+ la rp, 16(rp)
+ la up, 16(up)
+ j L(mid)
+
+L(top): lmg %r5, %r8, 0(vp)
+ la vp, 32(vp)
+ LOGOP %r5, 0(up)
+ LOGOP %r6, 8(up)
+ xgr %r5, %r1
+ xgr %r6, %r1
+ LOGOP %r7, 16(up)
+ LOGOP %r8, 24(up)
+ xgr %r7, %r1
+ xgr %r8, %r1
+ stmg %r5, %r8, 0(rp)
+ la up, 32(up)
+ la rp, 32(rp)
+L(mid): brctg %r0, L(top)
+
+ lmg %r6, %r8, 48(%r15)
+ br %r14
+')
+
+EPILOGUE()
--- /dev/null
+dnl S/390-64 mpn_lshift.
+
+dnl Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 7
+C z990 3
+C z9 ?
+C z10 ?
+C z196 ?
+
+C NOTES
+C * This uses discrete loads and stores in a software pipeline. Using lmg and
+C stmg is not faster.
+C * One could assume more pipelining could approach 2.5 c/l, but we have not
+C found any 8-way loop that runs better than the current 4-way loop.
+C * Consider using the same feed-in code for 1 <= n <= 3 as for n mod 4,
+C similarly to the x86_64 sqr_basecase feed-in.
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+define(`cnt', `%r5')
+
+define(`tnc', `%r6')
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+ cghi n, 3
+ jh L(gt1)
+
+ stmg %r6, %r7, 48(%r15)
+ larl %r1, L(tab)-4
+ lcgr tnc, cnt
+ sllg n, n, 2
+ b 0(n,%r1)
+L(tab): j L(n1)
+ j L(n2)
+ j L(n3)
+
+L(n1): lg %r1, 0(up)
+ sllg %r0, %r1, 0(cnt)
+ stg %r0, 0(rp)
+ srlg %r2, %r1, 0(tnc)
+ lg %r6, 48(%r15) C restoring r7 not needed
+ br %r14
+
+L(n2): lg %r1, 8(up)
+ srlg %r4, %r1, 0(tnc)
+ sllg %r0, %r1, 0(cnt)
+ j L(cj)
+
+L(n3): lg %r1, 16(up)
+ srlg %r4, %r1, 0(tnc)
+ sllg %r0, %r1, 0(cnt)
+ lg %r1, 8(up)
+ srlg %r7, %r1, 0(tnc)
+ ogr %r7, %r0
+ sllg %r0, %r1, 0(cnt)
+ stg %r7, 16(rp)
+L(cj): lg %r1, 0(up)
+ srlg %r7, %r1, 0(tnc)
+ ogr %r7, %r0
+ sllg %r0, %r1, 0(cnt)
+ stg %r7, 8(rp)
+ stg %r0, 0(rp)
+ lgr %r2, %r4
+ lmg %r6, %r7, 48(%r15)
+ br %r14
+
+L(gt1): stmg %r6, %r13, 48(%r15)
+ lcgr tnc, cnt C tnc = -cnt
+
+ sllg %r1, n, 3
+ srlg %r0, n, 2 C loop count
+
+ agr up, %r1 C point up at end of U
+ agr rp, %r1 C point rp at end of R
+ aghi up, -56
+ aghi rp, -40
+
+ lghi %r7, 3
+ ngr %r7, n
+ je L(b0)
+ cghi %r7, 2
+ jl L(b1)
+ je L(b2)
+
+L(b3): lg %r7, 48(up)
+ srlg %r9, %r7, 0(tnc)
+ sllg %r11, %r7, 0(cnt)
+ lg %r8, 40(up)
+ lg %r7, 32(up)
+ srlg %r4, %r8, 0(tnc)
+ sllg %r13, %r8, 0(cnt)
+ ogr %r11, %r4
+ la rp, 16(rp)
+ j L(lm3)
+
+L(b2): lg %r8, 48(up)
+ lg %r7, 40(up)
+ srlg %r9, %r8, 0(tnc)
+ sllg %r13, %r8, 0(cnt)
+ la rp, 24(rp)
+ la up, 8(up)
+ j L(lm2)
+
+L(b1): lg %r7, 48(up)
+ srlg %r9, %r7, 0(tnc)
+ sllg %r11, %r7, 0(cnt)
+ lg %r8, 40(up)
+ lg %r7, 32(up)
+ srlg %r4, %r8, 0(tnc)
+ sllg %r10, %r8, 0(cnt)
+ ogr %r11, %r4
+ la rp, 32(rp)
+ la up, 16(up)
+ j L(lm1)
+
+L(b0): lg %r8, 48(up)
+ lg %r7, 40(up)
+ srlg %r9, %r8, 0(tnc)
+ sllg %r10, %r8, 0(cnt)
+ la rp, 40(rp)
+ la up, 24(up)
+ j L(lm0)
+
+C ALIGN(16)
+L(top): srlg %r4, %r8, 0(tnc)
+ sllg %r13, %r8, 0(cnt)
+ ogr %r11, %r4
+ stg %r10, 24(rp)
+L(lm3): stg %r11, 16(rp)
+L(lm2): srlg %r12, %r7, 0(tnc)
+ sllg %r11, %r7, 0(cnt)
+ lg %r8, 24(up)
+ lg %r7, 16(up)
+ ogr %r13, %r12
+ srlg %r4, %r8, 0(tnc)
+ sllg %r10, %r8, 0(cnt)
+ ogr %r11, %r4
+ stg %r13, 8(rp)
+L(lm1): stg %r11, 0(rp)
+L(lm0): srlg %r12, %r7, 0(tnc)
+ aghi rp, -32
+ sllg %r11, %r7, 0(cnt)
+ lg %r8, 8(up)
+ lg %r7, 0(up)
+ aghi up, -32
+ ogr %r10, %r12
+ brctg %r0, L(top)
+
+L(end): srlg %r4, %r8, 0(tnc)
+ sllg %r13, %r8, 0(cnt)
+ ogr %r11, %r4
+ stg %r10, 24(rp)
+ stg %r11, 16(rp)
+ srlg %r12, %r7, 0(tnc)
+ sllg %r11, %r7, 0(cnt)
+ ogr %r13, %r12
+ stg %r13, 8(rp)
+ stg %r11, 0(rp)
+ lgr %r2, %r9
+
+ lmg %r6, %r13, 48(%r15)
+ br %r14
+EPILOGUE()
--- /dev/null
+dnl S/390-64 mpn_lshiftc.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 9
+C z990 3.5
+C z9 ?
+C z10 ?
+C z196 ?
+
+C NOTES
+C * See notes in lshift.asm.
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+define(`cnt', `%r5')
+
+define(`tnc', `%r6')
+
+ASM_START()
+PROLOGUE(mpn_lshiftc)
+ cghi n, 3
+ jh L(gt1)
+
+ stmg %r6, %r8, 48(%r15)
+ larl %r1, L(tab)-4
+ lcgr tnc, cnt
+ sllg n, n, 2
+ lghi %r8, -1
+ b 0(n,%r1)
+L(tab): j L(n1)
+ j L(n2)
+ j L(n3)
+
+L(n1): lg %r1, 0(up)
+ sllg %r0, %r1, 0(cnt)
+ xgr %r0, %r8
+ stg %r0, 0(rp)
+ srlg %r2, %r1, 0(tnc)
+ lmg %r6, %r8, 48(%r15)
+ br %r14
+
+L(n2): lg %r1, 8(up)
+ srlg %r4, %r1, 0(tnc)
+ sllg %r0, %r1, 0(cnt)
+ j L(cj)
+
+L(n3): lg %r1, 16(up)
+ srlg %r4, %r1, 0(tnc)
+ sllg %r0, %r1, 0(cnt)
+ lg %r1, 8(up)
+ srlg %r7, %r1, 0(tnc)
+ ogr %r7, %r0
+ sllg %r0, %r1, 0(cnt)
+ xgr %r7, %r8
+ stg %r7, 16(rp)
+L(cj): lg %r1, 0(up)
+ srlg %r7, %r1, 0(tnc)
+ ogr %r7, %r0
+ sllg %r0, %r1, 0(cnt)
+ xgr %r7, %r8
+ xgr %r0, %r8
+ stg %r7, 8(rp)
+ stg %r0, 0(rp)
+ lgr %r2, %r4
+ lmg %r6, %r8, 48(%r15)
+ br %r14
+
+L(gt1): stmg %r6, %r14, 48(%r15)
+ lcgr tnc, cnt C tnc = -cnt
+
+ sllg %r1, n, 3
+ srlg %r0, n, 2 C loop count
+
+ agr up, %r1 C point up at end of U
+ agr rp, %r1 C point rp at end of R
+ aghi up, -56
+ aghi rp, -40
+
+ lghi %r7, 3
+ lghi %r14, -1
+ ngr %r7, n
+ je L(b0)
+ cghi %r7, 2
+ jl L(b1)
+ je L(b2)
+
+L(b3): lg %r7, 48(up)
+ srlg %r9, %r7, 0(tnc)
+ sllg %r11, %r7, 0(cnt)
+ lg %r8, 40(up)
+ lg %r7, 32(up)
+ srlg %r4, %r8, 0(tnc)
+ sllg %r13, %r8, 0(cnt)
+ ogr %r11, %r4
+ la rp, 16(rp)
+ xgr %r11, %r14
+ j L(lm3)
+
+L(b2): lg %r8, 48(up)
+ lg %r7, 40(up)
+ srlg %r9, %r8, 0(tnc)
+ sllg %r13, %r8, 0(cnt)
+ la rp, 24(rp)
+ la up, 8(up)
+ j L(lm2)
+
+L(b1): lg %r7, 48(up)
+ srlg %r9, %r7, 0(tnc)
+ sllg %r11, %r7, 0(cnt)
+ lg %r8, 40(up)
+ lg %r7, 32(up)
+ srlg %r4, %r8, 0(tnc)
+ sllg %r10, %r8, 0(cnt)
+ ogr %r11, %r4
+ la rp, 32(rp)
+ la up, 16(up)
+ xgr %r11, %r14
+ j L(lm1)
+
+L(b0): lg %r8, 48(up)
+ lg %r7, 40(up)
+ srlg %r9, %r8, 0(tnc)
+ sllg %r10, %r8, 0(cnt)
+ la rp, 40(rp)
+ la up, 24(up)
+ j L(lm0)
+
+C ALIGN(16)
+L(top): srlg %r4, %r8, 0(tnc)
+ sllg %r13, %r8, 0(cnt)
+ ogr %r11, %r4
+ xgr %r10, %r14
+ xgr %r11, %r14
+ stg %r10, 24(rp)
+L(lm3): stg %r11, 16(rp)
+L(lm2): srlg %r12, %r7, 0(tnc)
+ sllg %r11, %r7, 0(cnt)
+ lg %r8, 24(up)
+ lg %r7, 16(up)
+ ogr %r13, %r12
+ srlg %r4, %r8, 0(tnc)
+ sllg %r10, %r8, 0(cnt)
+ ogr %r11, %r4
+ xgr %r13, %r14
+ xgr %r11, %r14
+ stg %r13, 8(rp)
+L(lm1): stg %r11, 0(rp)
+L(lm0): srlg %r12, %r7, 0(tnc)
+ aghi rp, -32
+ sllg %r11, %r7, 0(cnt)
+ lg %r8, 8(up)
+ lg %r7, 0(up)
+ aghi up, -32
+ ogr %r10, %r12
+ brctg %r0, L(top)
+
+L(end): srlg %r4, %r8, 0(tnc)
+ sllg %r13, %r8, 0(cnt)
+ ogr %r11, %r4
+ xgr %r10, %r14
+ xgr %r11, %r14
+ stg %r10, 24(rp)
+ stg %r11, 16(rp)
+ srlg %r12, %r7, 0(tnc)
+ sllg %r11, %r7, 0(cnt)
+ ogr %r13, %r12
+ xgr %r13, %r14
+ xgr %r11, %r14
+ stg %r13, 8(rp)
+ stg %r11, 0(rp)
+ lgr %r2, %r9
+
+ lmg %r6, %r14, 48(%r15)
+ br %r14
+EPILOGUE()
--- /dev/null
+dnl S/390-64 mpn_addmul_1
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 5.8
+C z990 2
+C z9 ?
+C z10 ?
+C z196 ?
+
+C TODO
+C * Optimise summation code, see x86_64.
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`n', `%r3')
+
+ASM_START()
+PROLOGUE(mpn_mod_34lsub1)
+ stmg %r7, %r12, 56(%r15)
+ lghi %r11, 0
+ lghi %r12, 0
+ lghi %r0, 0
+ lghi %r8, 0
+ lghi %r9, 0
+ lghi %r10, 0
+ lghi %r7, 0
+ aghi %r3, -3
+ jl .L3
+
+L(top): alg %r0, 0(%r2)
+ alcg %r12, 8(%r2)
+ alcg %r11, 16(%r2)
+ alcgr %r8, %r7
+ la %r2, 24(%r2)
+ aghi %r3, -3
+ jnl L(top)
+
+ lgr %r7, %r8
+ srlg %r1, %r11, 16
+ nihh %r7, 0 C 0xffffffffffff
+ agr %r7, %r1
+ srlg %r8, %r8, 48
+ agr %r7, %r8
+ sllg %r11, %r11, 32
+ nihh %r11, 0
+ agr %r7, %r11
+.L3:
+ cghi %r3, -3
+ je .L6
+ alg %r0, 0(%r2)
+ alcgr %r10, %r10
+ cghi %r3, -2
+ je .L6
+ alg %r12, 8(%r2)
+ alcgr %r9, %r9
+.L6:
+ srlg %r1, %r0, 48
+ nihh %r0, 0 C 0xffffffffffff
+ agr %r0, %r1
+ agr %r0, %r7
+ srlg %r1, %r12, 32
+ agr %r0, %r1
+ srlg %r1, %r10, 32
+ agr %r0, %r1
+ llgfr %r12, %r12
+ srlg %r1, %r9, 16
+ sllg %r12, %r12, 16
+ llgfr %r10, %r10
+ agr %r0, %r1
+ llill %r2, 65535
+ agr %r0, %r12
+ sllg %r10, %r10, 16
+ ngr %r2, %r9
+ agr %r0, %r10
+ sllg %r2, %r2, 32
+ agr %r2, %r0
+ lmg %r7, %r12, 56(%r15)
+ br %r14
+EPILOGUE()
--- /dev/null
+dnl S/390-64 mpn_mul_1
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 29
+C z990 22
+C z9 ?
+C z10 ?
+C z196 ?
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+define(`v0', `%r5')
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+ stmg %r11, %r12, 88(%r15)
+ lghi %r12, 0 C zero index register
+ aghi %r12, 0 C clear carry flag
+ lghi %r11, 0 C clear carry limb
+
+L(top): lg %r1, 0(%r12,up)
+ mlgr %r0, v0
+ alcgr %r1, %r11
+ lgr %r11, %r0 C copy high part to carry limb
+ stg %r1, 0(%r12,rp)
+ la %r12, 8(%r12)
+ brctg n, L(top)
+
+ lghi %r2, 0
+ alcgr %r2, %r11
+
+ lmg %r11, %r12, 88(%r15)
+ br %r14
+EPILOGUE()
--- /dev/null
+dnl S/390-64 mpn_mul_basecase.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 ?
+C z990 23
+C z9 ?
+C z10 ?
+C z196 ?
+
+C TODO
+C * Perhaps add special case for un <= 2.
+C * Replace loops by faster code. The mul_1 and addmul_1 loops could be sped
+C up by about 10%.
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`un', `%r4')
+define(`vp', `%r5')
+define(`vn', `%r6')
+
+define(`zero', `%r8')
+
+ASM_START()
+PROLOGUE(mpn_mul_basecase)
+ cghi un, 2
+ jhe L(ge2)
+
+C un = vn = 1
+ lg %r1, 0(vp)
+ mlg %r0, 0(up)
+ stg %r1, 0(rp)
+ stg %r0, 8(rp)
+ br %r14
+
+L(ge2): C jne L(gen)
+
+
+L(gen):
+C mul_1 =======================================================================
+
+ stmg %r6, %r12, 48(%r15)
+ lghi zero, 0
+ aghi un, -1
+
+ lg %r7, 0(vp)
+ lg %r11, 0(up)
+ lghi %r12, 8 C init index register
+ mlgr %r10, %r7
+ lgr %r9, un
+ stg %r11, 0(rp)
+ cr %r15, %r15 C clear carry flag
+
+L(tm): lg %r1, 0(%r12,up)
+ mlgr %r0, %r7
+ alcgr %r1, %r10
+ lgr %r10, %r0 C copy high part to carry limb
+ stg %r1, 0(%r12,rp)
+ la %r12, 8(%r12)
+ brctg %r9, L(tm)
+
+ alcgr %r0, zero
+ stg %r0, 0(%r12,rp)
+
+C addmul_1 loop ===============================================================
+
+ aghi vn, -1
+ je L(outer_end)
+L(outer_loop):
+
+ la rp, 8(rp) C rp += 1
+ la vp, 8(vp) C up += 1
+ lg %r7, 0(vp)
+ lg %r11, 0(up)
+ lghi %r12, 8 C init index register
+ mlgr %r10, %r7
+ lgr %r9, un
+ alg %r11, 0(rp)
+ stg %r11, 0(rp)
+
+L(tam): lg %r1, 0(%r12,up)
+ lg %r11, 0(%r12,rp)
+ mlgr %r0, %r7
+ alcgr %r1, %r11
+ alcgr %r0, zero
+ algr %r1, %r10
+ lgr %r10, %r0
+ stg %r1, 0(%r12,rp)
+ la %r12, 8(%r12)
+ brctg %r9, L(tam)
+
+ alcgr %r0, zero
+ stg %r0, 0(%r12,rp)
+
+ brctg vn, L(outer_loop)
+L(outer_end):
+
+ lmg %r6, %r12, 48(%r15)
+ br %r14
+EPILOGUE()
--- /dev/null
+dnl S/390-64 mpn_rshift.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 7
+C z990 3
+C z9 ?
+C z10 ?
+C z196 ?
+
+C NOTES
+C * See notes in lshift.asm.
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+define(`cnt', `%r5')
+
+define(`tnc', `%r6')
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+ cghi n, 3
+ jh L(gt1)
+
+ stmg %r6, %r7, 48(%r15)
+ larl %r1, L(tab)-4
+ lcgr tnc, cnt
+ sllg n, n, 2
+ b 0(n,%r1)
+L(tab): j L(n1)
+ j L(n2)
+ j L(n3)
+
+L(n1): lg %r1, 0(up)
+ srlg %r0, %r1, 0(cnt)
+ stg %r0, 0(rp)
+ sllg %r2, %r1, 0(tnc)
+ lg %r6, 48(%r15) C restoring r7 not needed
+ br %r14
+
+L(n2): lg %r1, 0(up)
+ sllg %r4, %r1, 0(tnc)
+ srlg %r0, %r1, 0(cnt)
+ lg %r1, 8(up)
+ sllg %r7, %r1, 0(tnc)
+ ogr %r7, %r0
+ srlg %r0, %r1, 0(cnt)
+ stg %r7, 0(rp)
+ stg %r0, 8(rp)
+ lgr %r2, %r4
+ lmg %r6, %r7, 48(%r15)
+ br %r14
+
+
+L(n3): lg %r1, 0(up)
+ sllg %r4, %r1, 0(tnc)
+ srlg %r0, %r1, 0(cnt)
+ lg %r1, 8(up)
+ sllg %r7, %r1, 0(tnc)
+ ogr %r7, %r0
+ srlg %r0, %r1, 0(cnt)
+ stg %r7, 0(rp)
+ lg %r1, 16(up)
+ sllg %r7, %r1, 0(tnc)
+ ogr %r7, %r0
+ srlg %r0, %r1, 0(cnt)
+ stg %r7, 8(rp)
+ stg %r0, 16(rp)
+ lgr %r2, %r4
+ lmg %r6, %r7, 48(%r15)
+ br %r14
+
+L(gt1): stmg %r6, %r13, 48(%r15)
+ lcgr tnc, cnt C tnc = -cnt
+
+ sllg %r1, n, 3
+ srlg %r0, n, 2 C loop count
+
+ lghi %r7, 3
+ ngr %r7, n
+ je L(b0)
+ cghi %r7, 2
+ jl L(b1)
+ je L(b2)
+
+L(b3): aghi rp, -8
+ lg %r7, 0(up)
+ sllg %r9, %r7, 0(tnc)
+ srlg %r11, %r7, 0(cnt)
+ lg %r8, 8(up)
+ lg %r7, 16(up)
+ sllg %r4, %r8, 0(tnc)
+ srlg %r13, %r8, 0(cnt)
+ ogr %r11, %r4
+ la up, 24(up)
+ j L(lm3)
+
+L(b2): aghi rp, -16
+ lg %r8, 0(up)
+ lg %r7, 8(up)
+ sllg %r9, %r8, 0(tnc)
+ srlg %r13, %r8, 0(cnt)
+ la up, 16(up)
+ j L(lm2)
+
+L(b1): aghi rp, -24
+ lg %r7, 0(up)
+ sllg %r9, %r7, 0(tnc)
+ srlg %r11, %r7, 0(cnt)
+ lg %r8, 8(up)
+ lg %r7, 16(up)
+ sllg %r4, %r8, 0(tnc)
+ srlg %r10, %r8, 0(cnt)
+ ogr %r11, %r4
+ la up, 8(up)
+ j L(lm1)
+
+L(b0): aghi rp, -32
+ lg %r8, 0(up)
+ lg %r7, 8(up)
+ sllg %r9, %r8, 0(tnc)
+ srlg %r10, %r8, 0(cnt)
+ j L(lm0)
+
+C ALIGN(16)
+L(top): sllg %r4, %r8, 0(tnc)
+ srlg %r13, %r8, 0(cnt)
+ ogr %r11, %r4
+ stg %r10, 0(rp)
+L(lm3): stg %r11, 8(rp)
+L(lm2): sllg %r12, %r7, 0(tnc)
+ srlg %r11, %r7, 0(cnt)
+ lg %r8, 0(up)
+ lg %r7, 8(up)
+ ogr %r13, %r12
+ sllg %r4, %r8, 0(tnc)
+ srlg %r10, %r8, 0(cnt)
+ ogr %r11, %r4
+ stg %r13, 16(rp)
+L(lm1): stg %r11, 24(rp)
+L(lm0): sllg %r12, %r7, 0(tnc)
+ aghi rp, 32
+ srlg %r11, %r7, 0(cnt)
+ lg %r8, 16(up)
+ lg %r7, 24(up)
+ aghi up, 32
+ ogr %r10, %r12
+ brctg %r0, L(top)
+
+L(end): sllg %r4, %r8, 0(tnc)
+ srlg %r13, %r8, 0(cnt)
+ ogr %r11, %r4
+ stg %r10, 0(rp)
+ stg %r11, 8(rp)
+ sllg %r12, %r7, 0(tnc)
+ srlg %r11, %r7, 0(cnt)
+ ogr %r13, %r12
+ stg %r13, 16(rp)
+ stg %r11, 24(rp)
+ lgr %r2, %r9
+
+ lmg %r6, %r13, 48(%r15)
+ br %r14
+EPILOGUE()
--- /dev/null
+dnl S/390-64 mpn_sqr_basecase.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 ?
+C z990 23
+C z9 ?
+C z10 ?
+C z196 ?
+
+C TODO
+C * Clean up.
+C * Stop iterating addmul_1 loop at latest for n = 2, implement longer tail.
+C This will ask for basecase handling of n = 3.
+C * Update counters and pointers more straightforwardly, possibly lowering
+C register usage.
+C * Should we use this allocation-free style for more sqr_basecase asm
+C implementations? The only disadvantage is that it requires R != U.
+C * Replace loops by faster code. The mul_1 and addmul_1 loops could be sped
+C up by about 10%. The sqr_diag_addlsh1 loop could probably be sped up even
+C more.
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+
+define(`zero', `%r8')
+define(`rp_saved', `%r9')
+define(`up_saved', `%r13')
+define(`n_saved', `%r14')
+
+ASM_START()
+PROLOGUE(mpn_sqr_basecase)
+ aghi n, -2
+ jhe L(ge2)
+
+C n = 1
+ lg %r5, 0(up)
+ mlgr %r4, %r5
+ stg %r5, 0(rp)
+ stg %r4, 8(rp)
+ br %r14
+
+L(ge2): jne L(gen)
+
+C n = 2
+ stmg %r6, %r8, 48(%r15)
+ lghi zero, 0
+
+ lg %r5, 0(up)
+ mlgr %r4, %r5 C u0 * u0
+ lg %r1, 8(up)
+ mlgr %r0, %r1 C u1 * u1
+ stg %r5, 0(rp)
+
+ lg %r7, 0(up)
+ mlg %r6, 8(up) C u0 * u1
+ algr %r7, %r7
+ alcgr %r6, %r6
+ alcgr %r0, zero
+
+ algr %r4, %r7
+ alcgr %r1, %r6
+ alcgr %r0, zero
+ stg %r4, 8(rp)
+ stg %r1, 16(rp)
+ stg %r0, 24(rp)
+
+ lmg %r6, %r8, 48(%r15)
+ br %r14
+
+L(gen):
+C mul_1 =======================================================================
+
+ stmg %r6, %r14, 48(%r15)
+ lghi zero, 0
+ lgr up_saved, up
+ lgr rp_saved, rp
+ lgr n_saved, n
+
+ lg %r6, 0(up)
+ lg %r11, 8(up)
+ lghi %r12, 16 C init index register
+ mlgr %r10, %r6
+ lgr %r5, n
+ stg %r11, 8(rp)
+ cr %r15, %r15 C clear carry flag
+
+L(tm): lg %r1, 0(%r12,up)
+ mlgr %r0, %r6
+ alcgr %r1, %r10
+ lgr %r10, %r0 C copy high part to carry limb
+ stg %r1, 0(%r12,rp)
+ la %r12, 8(%r12)
+ brctg %r5, L(tm)
+
+ alcgr %r0, zero
+ stg %r0, 0(%r12,rp)
+
+C addmul_1 loop ===============================================================
+
+ aghi n, -1
+ je L(outer_end)
+L(outer_loop):
+
+ la rp, 16(rp) C rp += 2
+ la up, 8(up) C up += 1
+ lg %r6, 0(up)
+ lg %r11, 8(up)
+ lghi %r12, 16 C init index register
+ mlgr %r10, %r6
+ lgr %r5, n
+ alg %r11, 8(rp)
+ stg %r11, 8(rp)
+
+L(tam): lg %r1, 0(%r12,up)
+ lg %r7, 0(%r12,rp)
+ mlgr %r0, %r6
+ alcgr %r1, %r7
+ alcgr %r0, zero
+ algr %r1, %r10
+ lgr %r10, %r0
+ stg %r1, 0(%r12,rp)
+ la %r12, 8(%r12)
+ brctg %r5, L(tam)
+
+ alcgr %r0, zero
+ stg %r0, 0(%r12,rp)
+
+ brctg n, L(outer_loop)
+L(outer_end):
+
+ lg %r6, 8(up)
+ lg %r1, 16(up)
+ lgr %r7, %r0 C Same as: lg %r7, 24(,rp)
+ mlgr %r0, %r6
+ algr %r1, %r7
+ alcgr %r0, zero
+ stg %r1, 24(rp)
+ stg %r0, 32(rp)
+
+C sqr_diag_addlsh1 ============================================================
+
+define(`up', `up_saved')
+define(`rp', `rp_saved')
+ la n, 1(n_saved)
+
+ lg %r1, 0(up)
+ mlgr %r0, %r1
+ stg %r1, 0(rp)
+C clr %r15, %r15 C clear carry (already clear per above)
+
+L(top): lg %r11, 8(up)
+ la up, 8(up)
+ lg %r6, 8(rp)
+ lg %r7, 16(rp)
+ mlgr %r10, %r11
+ alcgr %r6, %r6
+ alcgr %r7, %r7
+ alcgr %r10, zero C propagate carry to high product limb
+ algr %r6, %r0
+ alcgr %r7, %r11
+ stmg %r6, %r7, 8(rp)
+ la rp, 16(rp)
+ lgr %r0, %r10 C copy carry limb
+ brctg n, L(top)
+
+ alcgr %r0, zero
+ stg %r0, 8(rp)
+
+ lmg %r6, %r14, 48(%r15)
+ br %r14
+EPILOGUE()
--- /dev/null
+dnl S/390-64 mpn_sublsh1_n
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 10
+C z990 5
+C z9 ?
+C z10 ?
+C z196 ?
+
+C TODO
+C * Optimise for small n
+C * Compute RETVAL for sublsh1_n less stupidly
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`vp', `%r4')
+define(`n', `%r5')
+
+ifdef(`OPERATION_addlsh1_n',`
+ define(ADSBR, algr)
+ define(ADSBCR, alcgr)
+ define(INITCY, `lghi %r13, -1')
+ define(RETVAL, `la %r2, 2(%r1,%r13)')
+ define(func, mpn_addlsh1_n)
+')
+ifdef(`OPERATION_sublsh1_n',`
+ define(ADSBR, slgr)
+ define(ADSBCR, slbgr)
+ define(INITCY, `lghi %r13, 0')
+ define(RETVAL,`dnl
+ slgr %r1, %r13
+ lghi %r2, 1
+ algr %r2, %r1')
+ define(func, mpn_sublsh1_n)
+')
+
+ASM_START()
+PROLOGUE(mpn_sublsh1_n)
+ stmg %r6, %r13, 48(%r15)
+
+ aghi n, 3
+ lghi %r7, 3
+ srlg %r0, n, 2
+ ngr %r7, n C n mod 4
+ je L(b1)
+ cghi %r7, 2
+ jl L(b2)
+ jne L(b0)
+
+L(b3): lmg %r5, %r7, 0(up)
+ la up, 24(up)
+ lmg %r9, %r11, 0(vp)
+ la vp, 24(vp)
+
+ algr %r9, %r9
+ alcgr %r10, %r10
+ alcgr %r11, %r11
+ slbgr %r1, %r1
+
+ ADSBR %r5, %r9
+ ADSBCR %r6, %r10
+ ADSBCR %r7, %r11
+ slbgr %r13, %r13
+
+ stmg %r5, %r7, 0(rp)
+ la rp, 24(rp)
+ brctg %r0, L(top)
+ j L(end)
+
+L(b0): lghi %r1, -1
+ INITCY
+ j L(top)
+
+L(b1): lg %r5, 0(up)
+ la up, 8(up)
+ lg %r9, 0(vp)
+ la vp, 8(vp)
+
+ algr %r9, %r9
+ slbgr %r1, %r1
+ ADSBR %r5, %r9
+ slbgr %r13, %r13
+
+ stg %r5, 0(rp)
+ la rp, 8(rp)
+ brctg %r0, L(top)
+ j L(end)
+
+L(b2): lmg %r5, %r6, 0(up)
+ la up, 16(up)
+ lmg %r9, %r10, 0(vp)
+ la vp, 16(vp)
+
+ algr %r9, %r9
+ alcgr %r10, %r10
+ slbgr %r1, %r1
+
+ ADSBR %r5, %r9
+ ADSBCR %r6, %r10
+ slbgr %r13, %r13
+
+ stmg %r5, %r6, 0(rp)
+ la rp, 16(rp)
+ brctg %r0, L(top)
+ j L(end)
+
+L(top): lmg %r9, %r12, 0(vp)
+ la vp, 32(vp)
+
+ aghi %r1, 1 C restore carry
+
+ alcgr %r9, %r9
+ alcgr %r10, %r10
+ alcgr %r11, %r11
+ alcgr %r12, %r12
+
+ slbgr %r1, %r1 C save carry
+
+ lmg %r5, %r8, 0(up)
+ la up, 32(up)
+
+ aghi %r13, 1 C restore carry
+
+ ADSBCR %r5, %r9
+ ADSBCR %r6, %r10
+ ADSBCR %r7, %r11
+ ADSBCR %r8, %r12
+
+ slbgr %r13, %r13 C save carry
+
+ stmg %r5, %r8, 0(rp)
+ la rp, 32(rp)
+ brctg %r0, L(top)
+
+L(end): RETVAL
+ lmg %r6, %r13, 48(%r15)
+ br %r14
+EPILOGUE()
--- /dev/null
+dnl S/390-64 mpn_submul_1
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C z900 35
+C z990 24
+C z9 ?
+C z10 ?
+C z196 ?
+
+C INPUT PARAMETERS
+define(`rp', `%r2')
+define(`up', `%r3')
+define(`n', `%r4')
+define(`v0', `%r5')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+ stmg %r9, %r12, 72(%r15)
+ lghi %r12, 0
+ slgr %r11, %r11
+
+L(top): lg %r1, 0(%r12, up)
+ lg %r10, 0(%r12, rp)
+ mlgr %r0, v0
+ slbgr %r10, %r1
+ slbgr %r9, %r9
+ slgr %r0, %r9 C conditional incr
+ slgr %r10, %r11
+ lgr %r11, %r0
+ stg %r10, 0(%r12, rp)
+ la %r12, 8(%r12)
+ brctg %r4, L(top)
+
+ lgr %r2, %r11
+ slbgr %r9, %r9
+ slgr %r2, %r9
+
+ lmg %r9, %r12, 72(%r15)
+ br %r14
+EPILOGUE()
--- /dev/null
+dnl SPARC T1 32-bit mpn_add_n.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(`rp', %o0)
+define(`ap', %o1)
+define(`bp', %o2)
+define(`n', %o3)
+define(`cy', %o4)
+
+define(`i', %o3)
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc)
+
+ASM_START()
+PROLOGUE(mpn_add_nc)
+ b L(ent)
+ srl cy, 0, cy C strip any bogus high bits
+EPILOGUE()
+
+PROLOGUE(mpn_add_n)
+ mov 0, cy
+L(ent): srl n, 0, n C strip any bogus high bits
+ sll n, 2, n
+ add ap, n, ap
+ add bp, n, bp
+ add rp, n, rp
+ neg n, i
+
+L(top): lduw [ap+i], %g1
+ lduw [bp+i], %g2
+ add %g1, %g2, %g3
+ add %g3, cy, %g3
+ stw %g3, [rp+i]
+ add i, 4, i
+ brnz i, L(top)
+ srlx %g3, 32, cy
+
+ retl
+ mov cy, %o0 C return value
+EPILOGUE()
--- /dev/null
+dnl SPARC T1 32-bit mpn_addmul_1.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T1: 27
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`n', `%o2')
+define(`v0', `%o3')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ mov 0, %g4
+ srl v0, 0, v0
+ srl n, 0, n
+ dec n C n--
+
+L(top): lduw [up+0], %g1
+ add up, 4, up C up++
+ mulx %g1, v0, %g3
+ lduw [rp+0], %g2
+ add %g2, %g3, %g3
+ add %g4, %g3, %g3
+ stw %g3, [rp+0]
+ add rp, 4, rp C rp++
+ srlx %g3, 32, %g4
+ brnz n, L(top)
+ dec n C n--
+
+ retl
+ mov %g4, %o0 C return value
+EPILOGUE()
--- /dev/null
+/* UltraSPARC T 32-bit gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#define DIVREM_1_NORM_THRESHOLD 0 /* always */
+#define DIVREM_1_UNNORM_THRESHOLD 3
+#define MOD_1_1P_METHOD 2
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 9
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 10
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 21
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 22
+#define USE_PREINV_DIVREM_1 1
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD 35
+
+#define MUL_TOOM22_THRESHOLD 14
+#define MUL_TOOM33_THRESHOLD 98
+#define MUL_TOOM44_THRESHOLD 166
+#define MUL_TOOM6H_THRESHOLD 226
+#define MUL_TOOM8H_THRESHOLD 333
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 97
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 139
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 97
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 98
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 120
+
+#define SQR_BASECASE_THRESHOLD 6
+#define SQR_TOOM2_THRESHOLD 34
+#define SQR_TOOM3_THRESHOLD 110
+#define SQR_TOOM4_THRESHOLD 178
+#define SQR_TOOM6_THRESHOLD 240
+#define SQR_TOOM8_THRESHOLD 333
+
+#define MULMID_TOOM42_THRESHOLD 22
+
+#define MULMOD_BNM1_THRESHOLD 9
+#define SQRMOD_BNM1_THRESHOLD 13
+
+#define MUL_FFT_MODF_THRESHOLD 280 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 280, 5}, { 15, 6}, { 8, 5}, { 17, 6}, \
+ { 9, 5}, { 19, 6}, { 13, 7}, { 7, 6}, \
+ { 17, 7}, { 9, 6}, { 20, 7}, { 11, 6}, \
+ { 23, 7}, { 13, 8}, { 7, 7}, { 21, 8}, \
+ { 11, 7}, { 25, 9}, { 7, 8}, { 15, 7}, \
+ { 33, 8}, { 19, 7}, { 41, 8}, { 23, 7}, \
+ { 49, 8}, { 27, 9}, { 15, 8}, { 31, 7}, \
+ { 63, 8}, { 39, 9}, { 23, 8}, { 47,10}, \
+ { 15, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \
+ { 79, 9}, { 47,10}, { 31, 9}, { 79,10}, \
+ { 47,11}, { 31,10}, { 63, 9}, { 127, 8}, \
+ { 255,10}, { 79, 9}, { 159, 8}, { 319,10}, \
+ { 95, 9}, { 191, 8}, { 383,11}, { 63,10}, \
+ { 127, 9}, { 255,10}, { 143, 9}, { 287,10}, \
+ { 159, 9}, { 319,10}, { 175,11}, { 95,10}, \
+ { 191, 9}, { 383,12}, { 4096,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 66
+#define MUL_FFT_THRESHOLD 3712
+
+#define SQR_FFT_MODF_THRESHOLD 240 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 240, 5}, { 15, 6}, { 8, 5}, { 17, 6}, \
+ { 13, 7}, { 7, 6}, { 17, 7}, { 9, 6}, \
+ { 20, 7}, { 11, 6}, { 23, 7}, { 13, 8}, \
+ { 7, 7}, { 19, 8}, { 11, 7}, { 25, 9}, \
+ { 7, 8}, { 15, 7}, { 33, 8}, { 19, 7}, \
+ { 39, 8}, { 23, 7}, { 47, 8}, { 27, 9}, \
+ { 15, 8}, { 39, 9}, { 23, 8}, { 47,10}, \
+ { 15, 9}, { 31, 8}, { 63, 9}, { 39, 8}, \
+ { 79, 9}, { 47,10}, { 31, 9}, { 63, 8}, \
+ { 127, 9}, { 71, 8}, { 143, 9}, { 79,10}, \
+ { 47,11}, { 31,10}, { 63, 9}, { 127, 8}, \
+ { 255, 9}, { 143,10}, { 79, 9}, { 159, 8}, \
+ { 319, 9}, { 175,10}, { 95, 9}, { 191, 8}, \
+ { 383, 9}, { 207,11}, { 63,10}, { 127, 9}, \
+ { 255,10}, { 143, 9}, { 287,10}, { 159, 9}, \
+ { 319,10}, { 175,11}, { 95,10}, { 191, 9}, \
+ { 383,10}, { 207,12}, { 4096,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 70
+#define SQR_FFT_THRESHOLD 2624
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 51
+#define MULLO_MUL_N_THRESHOLD 6633
+
+#define DC_DIV_QR_THRESHOLD 51
+#define DC_DIVAPPR_Q_THRESHOLD 202
+#define DC_BDIV_QR_THRESHOLD 47
+#define DC_BDIV_Q_THRESHOLD 124
+
+#define INV_MULMOD_BNM1_THRESHOLD 26
+#define INV_NEWTON_THRESHOLD 266
+#define INV_APPR_THRESHOLD 222
+
+#define BINV_NEWTON_THRESHOLD 296
+#define REDC_1_TO_REDC_N_THRESHOLD 59
+
+#define MU_DIV_QR_THRESHOLD 1334
+#define MU_DIVAPPR_Q_THRESHOLD 1499
+#define MUPI_DIV_QR_THRESHOLD 116
+#define MU_BDIV_QR_THRESHOLD 1057
+#define MU_BDIV_Q_THRESHOLD 1334
+
+#define POWM_SEC_TABLE 6,35,213,724,2618
+
+#define MATRIX22_STRASSEN_THRESHOLD 15
+#define HGCD_THRESHOLD 84
+#define HGCD_APPR_THRESHOLD 101
+#define HGCD_REDUCE_THRESHOLD 1437
+#define GCD_DC_THRESHOLD 372
+#define GCDEXT_DC_THRESHOLD 253
+#define JACOBI_BASE_METHOD 2
+
+#define GET_STR_DC_THRESHOLD 12
+#define GET_STR_PRECOMPUTE_THRESHOLD 27
+#define SET_STR_DC_THRESHOLD 399
+#define SET_STR_PRECOMPUTE_THRESHOLD 885
+
+#define FAC_DSC_THRESHOLD 179
+#define FAC_ODD_THRESHOLD 29
--- /dev/null
+dnl SPARC T1 32-bit mpn_mul_1.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T1: 23
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`n', `%o2')
+define(`v0', `%o3')
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+ mov 0, %g4
+ srl v0, 0, v0
+ srl n, 0, n
+ dec n C n--
+
+L(top): lduw [up+0], %g1
+ add up, 4, up C up++
+ mulx %g1, v0, %g3
+ add %g4, %g3, %g3
+ stw %g3, [rp+0]
+ add rp, 4, rp C rp++
+ srlx %g3, 32, %g4
+ brnz n, L(top)
+ dec n C n--
+
+ retl
+ mov %g4, %o0 C return value
+EPILOGUE()
--- /dev/null
+dnl SPARC T1 32-bit mpn_sqr_diagonal.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`n', `%o2')
+
+ASM_START()
+PROLOGUE(mpn_sqr_diagonal)
+ deccc n C n--
+ nop
+
+L(top): lduw [up+0], %g1
+ add up, 4, up C up++
+ mulx %g1, %g1, %g3
+ stw %g3, [rp+0]
+ srlx %g3, 32, %g4
+ stw %g4, [rp+4]
+ add rp, 8, rp C rp += 2
+ bnz %icc, L(top)
+ deccc n C n--
+
+ retl
+ nop
+EPILOGUE()
--- /dev/null
+dnl SPARC T1 32-bit mpn_sub_n.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(`rp', %o0)
+define(`ap', %o1)
+define(`bp', %o2)
+define(`n', %o3)
+define(`cy', %o4)
+
+define(`i', %o3)
+
+MULFUNC_PROLOGUE(mpn_sub_n mpn_sub_nc)
+
+ASM_START()
+PROLOGUE(mpn_sub_nc)
+ b L(ent)
+ srl cy, 0, cy C strip any bogus high bits
+EPILOGUE()
+
+PROLOGUE(mpn_sub_n)
+ mov 0, cy
+L(ent): srl n, 0, n C strip any bogus high bits
+ sll n, 2, n
+ add ap, n, ap
+ add bp, n, bp
+ add rp, n, rp
+ neg n, i
+
+L(top): lduw [ap+i], %g1
+ lduw [bp+i], %g2
+ sub %g1, %g2, %g3
+ sub %g3, cy, %g3
+ stw %g3, [rp+i]
+ add i, 4, i
+ brnz i, L(top)
+ srlx %g3, 63, cy
+
+ retl
+ mov cy, %o0 C return value
+EPILOGUE()
--- /dev/null
+dnl SPARC T1 32-bit mpn_submul_1.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T1: 27
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`n', `%o2')
+define(`v0', `%o3')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+ subcc %g0, %g0, %g4 C clear CF and g4
+ srl v0, 0, v0
+ srl n, 0, n
+ dec n C n--
+
+L(top): lduw [up+0], %g1
+ add up, 4, up C up++
+ mulx %g1, v0, %g3
+ lduw [rp+0], %g2
+ addx %g4, %g3, %g3
+ srlx %g3, 32, %g4
+ subcc %g2, %g3, %g3
+ stw %g3, [rp+0]
+ add rp, 4, rp C rp++
+ brnz n, L(top)
+ dec n C n--
+
+ retl
+ addx %g4, 0, %o0 C return value
+EPILOGUE()
/* SPARC v9 32-bit gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2009, 2010 Free
-Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2009, 2010, 2011
+Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#define BYTES_PER_MP_LIMB 4
#define DIVREM_1_NORM_THRESHOLD 3
-#define DIVREM_1_UNNORM_THRESHOLD 5
-#define MOD_1_NORM_THRESHOLD 4
-#define MOD_1_UNNORM_THRESHOLD 7
-#define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* never */
-#define MOD_1U_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD MP_SIZE_T_MAX
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD MP_SIZE_T_MAX
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVREM_1_UNNORM_THRESHOLD 4
+#define MOD_1_1P_METHOD 2
+#define MOD_1_NORM_THRESHOLD 3
+#define MOD_1_UNNORM_THRESHOLD 4
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 11
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 11
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 22
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 61
#define USE_PREINV_DIVREM_1 1
-#define DIVREM_2_THRESHOLD 0 /* always */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIVEXACT_1_THRESHOLD 0 /* always */
#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
-#define MUL_TOOM22_THRESHOLD 32
-#define MUL_TOOM33_THRESHOLD 96
-#define MUL_TOOM44_THRESHOLD 143
-#define MUL_TOOM6H_THRESHOLD 216
-#define MUL_TOOM8H_THRESHOLD 494
-
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 96
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 145
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 97
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 92
-
-#define SQR_BASECASE_THRESHOLD 12
-#define SQR_TOOM2_THRESHOLD 62
-#define SQR_TOOM3_THRESHOLD 103
-#define SQR_TOOM4_THRESHOLD 274
-#define SQR_TOOM6_THRESHOLD 274
-#define SQR_TOOM8_THRESHOLD 542
-
-#define MULMOD_BNM1_THRESHOLD 14
-#define SQRMOD_BNM1_THRESHOLD 21
-
-#define MUL_FFT_TABLE { 272, 736, 1152, 3584, 10240, 24576, 98304, 917504, 0 }
-#define MUL_FFT_MODF_THRESHOLD 248
-#define MUL_FFT_THRESHOLD 2112
-
-#define SQR_FFT_TABLE { 336, 800, 1408, 3584, 10240, 24576, 98304, 393216, 0 }
-#define SQR_FFT_MODF_THRESHOLD 248
-#define SQR_FFT_THRESHOLD 2112
+#define MUL_TOOM22_THRESHOLD 27
+#define MUL_TOOM33_THRESHOLD 112
+#define MUL_TOOM44_THRESHOLD 124
+#define MUL_TOOM6H_THRESHOLD 160
+#define MUL_TOOM8H_THRESHOLD 242
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 69
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 93
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 71
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 53
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 70
+
+#define SQR_BASECASE_THRESHOLD 5
+#define SQR_TOOM2_THRESHOLD 64
+#define SQR_TOOM3_THRESHOLD 85
+#define SQR_TOOM4_THRESHOLD 158
+#define SQR_TOOM6_THRESHOLD 185
+#define SQR_TOOM8_THRESHOLD 224
+
+#define MULMID_TOOM42_THRESHOLD 64
+
+#define MULMOD_BNM1_THRESHOLD 11
+#define SQRMOD_BNM1_THRESHOLD 16
+
+#define MUL_FFT_MODF_THRESHOLD 212 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 212, 5}, { 11, 6}, { 7, 5}, { 17, 6}, \
+ { 9, 5}, { 20, 6}, { 13, 7}, { 7, 6}, \
+ { 16, 7}, { 9, 6}, { 20, 7}, { 13, 8}, \
+ { 7, 7}, { 19, 8}, { 11, 7}, { 25, 9}, \
+ { 7, 8}, { 15, 7}, { 31, 8}, { 19, 7}, \
+ { 39, 8}, { 27, 9}, { 15, 8}, { 39, 9}, \
+ { 23,10}, { 15, 9}, { 31, 8}, { 67, 9}, \
+ { 39, 8}, { 79, 7}, { 159, 8}, { 83, 7}, \
+ { 175, 8}, { 91, 9}, { 47, 8}, { 95,10}, \
+ { 31, 9}, { 63, 8}, { 127, 9}, { 71, 8}, \
+ { 143, 9}, { 79, 8}, { 159, 9}, { 87,10}, \
+ { 47, 9}, { 95,11}, { 31,10}, { 63, 9}, \
+ { 127, 8}, { 255, 9}, { 143,10}, { 79, 9}, \
+ { 175,10}, { 95, 9}, { 191, 8}, { 415,10}, \
+ { 111,11}, { 63,10}, { 127, 9}, { 271,10}, \
+ { 143, 9}, { 287, 8}, { 575,10}, { 175,11}, \
+ { 95,10}, { 191, 9}, { 415, 8}, { 831,10}, \
+ { 223,12}, { 63,11}, { 127,10}, { 287, 9}, \
+ { 575,11}, { 159,10}, { 319, 9}, { 639, 8}, \
+ { 1407,11}, { 191,10}, { 415, 9}, { 831,11}, \
+ { 223,10}, { 447,12}, { 4096,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 86
+#define MUL_FFT_THRESHOLD 2688
+
+#define SQR_FFT_MODF_THRESHOLD 180 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 180, 5}, { 6, 4}, { 13, 5}, { 13, 6}, \
+ { 7, 5}, { 15, 6}, { 13, 7}, { 7, 6}, \
+ { 17, 7}, { 9, 6}, { 20, 7}, { 11, 6}, \
+ { 23, 7}, { 13, 8}, { 7, 7}, { 19, 8}, \
+ { 11, 7}, { 25, 9}, { 7, 8}, { 15, 7}, \
+ { 31, 8}, { 23, 9}, { 15, 8}, { 39, 9}, \
+ { 23,10}, { 15, 9}, { 31, 8}, { 63, 7}, \
+ { 127, 9}, { 47,10}, { 31, 9}, { 63, 8}, \
+ { 127, 9}, { 71, 8}, { 143, 7}, { 287, 6}, \
+ { 575,10}, { 47, 9}, { 95,11}, { 31,10}, \
+ { 63, 9}, { 127, 8}, { 255, 9}, { 143,10}, \
+ { 79, 9}, { 159, 8}, { 319, 9}, { 175, 8}, \
+ { 351, 7}, { 703,10}, { 95, 9}, { 191, 8}, \
+ { 383, 9}, { 207,10}, { 111,11}, { 63,10}, \
+ { 127, 9}, { 255,10}, { 143, 9}, { 287, 8}, \
+ { 575,10}, { 159, 9}, { 319,10}, { 175, 9}, \
+ { 351, 8}, { 703,11}, { 95,10}, { 191, 9}, \
+ { 383,10}, { 207, 9}, { 415, 8}, { 831,12}, \
+ { 63,11}, { 127,10}, { 255, 9}, { 543,10}, \
+ { 287, 9}, { 575,11}, { 159,10}, { 319, 9}, \
+ { 639,10}, { 351, 9}, { 703, 8}, { 1407,11}, \
+ { 191,10}, { 415, 9}, { 831,11}, { 223,10}, \
+ { 447, 9}, { 895,12}, { 4096,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 94
+#define SQR_FFT_THRESHOLD 1856
#define MULLO_BASECASE_THRESHOLD 0 /* always */
-#define MULLO_DC_THRESHOLD 106
-#define MULLO_MUL_N_THRESHOLD 3493
+#define MULLO_DC_THRESHOLD 145
+#define MULLO_MUL_N_THRESHOLD 5333
-#define DC_DIV_QR_THRESHOLD 123
-#define DC_DIVAPPR_Q_THRESHOLD 396
-#define DC_BDIV_QR_THRESHOLD 121
-#define DC_BDIV_Q_THRESHOLD 280
+#define DC_DIV_QR_THRESHOLD 78
+#define DC_DIVAPPR_Q_THRESHOLD 414
+#define DC_BDIV_QR_THRESHOLD 75
+#define DC_BDIV_Q_THRESHOLD 360
-#define INV_MULMOD_BNM1_THRESHOLD 62
+#define INV_MULMOD_BNM1_THRESHOLD 52
#define INV_NEWTON_THRESHOLD 351
-#define INV_APPR_THRESHOLD 357
-
-#define BINV_NEWTON_THRESHOLD 324
-#define REDC_1_TO_REDC_N_THRESHOLD 78
-
-#define MU_DIV_QR_THRESHOLD 1895
-#define MU_DIVAPPR_Q_THRESHOLD 1895
-#define MUPI_DIV_QR_THRESHOLD 122
-#define MU_BDIV_QR_THRESHOLD 872
-#define MU_BDIV_Q_THRESHOLD 2801
-
-#define MATRIX22_STRASSEN_THRESHOLD 13
-#define HGCD_THRESHOLD 144
-#define GCD_DC_THRESHOLD 630
-#define GCDEXT_DC_THRESHOLD 416
-#define JACOBI_BASE_METHOD 2
-
-#define GET_STR_DC_THRESHOLD 9
-#define GET_STR_PRECOMPUTE_THRESHOLD 17
-#define SET_STR_DC_THRESHOLD 537
-#define SET_STR_PRECOMPUTE_THRESHOLD 1576
+#define INV_APPR_THRESHOLD 354
+
+#define BINV_NEWTON_THRESHOLD 234
+#define REDC_1_TO_REDC_N_THRESHOLD 60
+
+#define MU_DIV_QR_THRESHOLD 855
+#define MU_DIVAPPR_Q_THRESHOLD 1099
+#define MUPI_DIV_QR_THRESHOLD 112
+#define MU_BDIV_QR_THRESHOLD 839
+#define MU_BDIV_Q_THRESHOLD 979
+
+#define POWM_SEC_TABLE 4,23,127,453,1679,2870
+
+#define MATRIX22_STRASSEN_THRESHOLD 9
+#define HGCD_THRESHOLD 87
+#define HGCD_APPR_THRESHOLD 126
+#define HGCD_REDUCE_THRESHOLD 1679
+#define GCD_DC_THRESHOLD 283
+#define GCDEXT_DC_THRESHOLD 189
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 14
+#define GET_STR_PRECOMPUTE_THRESHOLD 28
+#define SET_STR_DC_THRESHOLD 262
+#define SET_STR_PRECOMPUTE_THRESHOLD 548
+
+#define FAC_DSC_THRESHOLD 156
+#define FAC_ODD_THRESHOLD 28
instructions. No conditional move can issue 1-5 cycles after a load. (This
might have been fixed for UltraSPARC-3.)
-The UltraSPARC-3 pipeline is very simular to he one of UltraSPARC-1/2 , but is
+The UltraSPARC-3 pipeline is very simular to the one of UltraSPARC-1/2 , but is
somewhat slower. Branches execute slower, and there may be other new stalls.
But integer multiply doesn't stall the entire CPU and also has a much lower
latency. But it's still not pipelined, and thus useless for our needs.
+++ /dev/null
-dnl SPARC v9 mpn_add_n -- Add two limb vectors of the same length > 0 and
-dnl store sum in a third limb vector.
-
-dnl Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C UltraSPARC 1&2: 4
-C UltraSPARC 3: 4.5
-
-C Compute carry-out from the most significant bits of u,v, and r, where
-C r=u+v+carry_in, using logic operations.
-
-C This code runs at 4 cycles/limb on UltraSPARC 1 and 2. It has a 4 insn
-C recurrency, and the UltraSPARC 1 and 2 the IE units are 100% saturated.
-C Therefore, it seems futile to try to optimize this any further...
-
-C INPUT PARAMETERS
-define(`rp',`%i0')
-define(`up',`%i1')
-define(`vp',`%i2')
-define(`n',`%i3')
-
-define(`u0',`%l0')
-define(`u1',`%l2')
-define(`u2',`%l4')
-define(`u3',`%l6')
-define(`v0',`%l1')
-define(`v1',`%l3')
-define(`v2',`%l5')
-define(`v3',`%l7')
-
-define(`cy',`%i4')
-
-define(`fanop',`fitod %f0,%f2') dnl A quasi nop running in the FA pipe
-define(`fmnop',`fmuld %f0,%f0,%f4') dnl A quasi nop running in the FM pipe
-
-ASM_START()
- REGISTER(%g2,#scratch)
- REGISTER(%g3,#scratch)
-PROLOGUE(mpn_add_n)
- save %sp,-160,%sp
-
- fitod %f0,%f0 C make sure f0 contains small, quiet number
- subcc n,4,%g0
- bl,pn %icc,.Loop0
- mov 0,cy
-
- ldx [up+0],u0
- ldx [vp+0],v0
- add up,32,up
- ldx [up-24],u1
- ldx [vp+8],v1
- add vp,32,vp
- ldx [up-16],u2
- ldx [vp-16],v2
- ldx [up-8],u3
- ldx [vp-8],v3
- subcc n,8,n
- add u0,v0,%g1 C main add
- add %g1,cy,%g4 C carry add
- or u0,v0,%g2
- bl,pn %icc,.Lend4567
- fanop
- b,a .Loop
-
- .align 16
-C START MAIN LOOP
-.Loop: andn %g2,%g4,%g2
- and u0,v0,%g3
- ldx [up+0],u0
- fanop
-C --
- or %g3,%g2,%g2
- ldx [vp+0],v0
- add up,32,up
- fanop
-C --
- srlx %g2,63,cy
- add u1,v1,%g1
- stx %g4,[rp+0]
- fanop
-C --
- add %g1,cy,%g4
- or u1,v1,%g2
- fmnop
- fanop
-C --
- andn %g2,%g4,%g2
- and u1,v1,%g3
- ldx [up-24],u1
- fanop
-C --
- or %g3,%g2,%g2
- ldx [vp+8],v1
- add vp,32,vp
- fanop
-C --
- srlx %g2,63,cy
- add u2,v2,%g1
- stx %g4,[rp+8]
- fanop
-C --
- add %g1,cy,%g4
- or u2,v2,%g2
- fmnop
- fanop
-C --
- andn %g2,%g4,%g2
- and u2,v2,%g3
- ldx [up-16],u2
- fanop
-C --
- or %g3,%g2,%g2
- ldx [vp-16],v2
- add rp,32,rp
- fanop
-C --
- srlx %g2,63,cy
- add u3,v3,%g1
- stx %g4,[rp-16]
- fanop
-C --
- add %g1,cy,%g4
- or u3,v3,%g2
- fmnop
- fanop
-C --
- andn %g2,%g4,%g2
- and u3,v3,%g3
- ldx [up-8],u3
- fanop
-C --
- or %g3,%g2,%g2
- subcc n,4,n
- ldx [vp-8],v3
- fanop
-C --
- srlx %g2,63,cy
- add u0,v0,%g1
- stx %g4,[rp-8]
- fanop
-C --
- add %g1,cy,%g4
- or u0,v0,%g2
- bge,pt %icc,.Loop
- fanop
-C END MAIN LOOP
-.Lend4567:
- andn %g2,%g4,%g2
- and u0,v0,%g3
- or %g3,%g2,%g2
- srlx %g2,63,cy
- add u1,v1,%g1
- stx %g4,[rp+0]
- add %g1,cy,%g4
- or u1,v1,%g2
- andn %g2,%g4,%g2
- and u1,v1,%g3
- or %g3,%g2,%g2
- srlx %g2,63,cy
- add u2,v2,%g1
- stx %g4,[rp+8]
- add %g1,cy,%g4
- or u2,v2,%g2
- andn %g2,%g4,%g2
- and u2,v2,%g3
- or %g3,%g2,%g2
- add rp,32,rp
- srlx %g2,63,cy
- add u3,v3,%g1
- stx %g4,[rp-16]
- add %g1,cy,%g4
- or u3,v3,%g2
- andn %g2,%g4,%g2
- and u3,v3,%g3
- or %g3,%g2,%g2
- srlx %g2,63,cy
- stx %g4,[rp-8]
-
- addcc n,4,n
- bz,pn %icc,.Lret
- fanop
-
-.Loop0: ldx [up],u0
- add up,8,up
- ldx [vp],v0
- add vp,8,vp
- add rp,8,rp
- subcc n,1,n
- add u0,v0,%g1
- or u0,v0,%g2
- add %g1,cy,%g4
- and u0,v0,%g3
- andn %g2,%g4,%g2
- stx %g4,[rp-8]
- or %g3,%g2,%g2
- bnz,pt %icc,.Loop0
- srlx %g2,63,cy
-
-.Lret: mov cy,%i0
- ret
- restore
-EPILOGUE(mpn_add_n)
+++ /dev/null
-dnl SPARC v9 64-bit mpn_addmul_1 -- Multiply a limb vector with a limb and add
-dnl the result to a second limb vector.
-
-dnl Copyright 1998, 2000, 2001, 2002, 2003, 2004 Free Software Foundation,
-dnl Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C UltraSPARC 1&2: 14
-C UltraSPARC 3: 17.5
-
-C Algorithm: We use eight floating-point multiplies per limb product, with the
-C invariant v operand split into four 16-bit pieces, and the up operand split
-C into 32-bit pieces. We sum pairs of 48-bit partial products using
-C floating-point add, then convert the four 49-bit product-sums and transfer
-C them to the integer unit.
-
-C Possible optimizations:
-C 0. Rewrite to use algorithm of mpn_addmul_2.
-C 1. Align the stack area where we transfer the four 49-bit product-sums
-C to a 32-byte boundary. That would minimize the cache collision.
-C (UltraSPARC-1/2 use a direct-mapped cache.) (Perhaps even better would
-C be to align the area to map to the area immediately before up?)
-C 2. Sum the 4 49-bit quantities using 32-bit operations, as in the
-C develop mpn_addmul_2. This would save many integer instructions.
-C 3. Unrolling. Questionable if it is worth the code expansion, given that
-C it could only save 1 cycle/limb.
-C 4. Specialize for particular v values. If its upper 32 bits are zero, we
-C could save many operations, in the FPU (fmuld), but more so in the IEU
-C since we'll be summing 48-bit quantities, which might be simpler.
-C 5. Ideally, we should schedule the f2/f3 and f4/f5 RAW further apart, and
-C the i00,i16,i32,i48 RAW less apart. The latter apart-scheduling should
-C not be greater than needed for L2 cache latency, and also not so great
-C that i16 needs to be copied.
-C 6. Avoid performing mem+fa+fm in the same cycle, at least not when we want
-C to get high IEU bandwidth. (12 of the 14 cycles will be free for 2 IEU
-C ops.)
-
-C Instruction classification (as per UltraSPARC-1/2 functional units):
-C 8 FM
-C 10 FA
-C 12 MEM
-C 10 ISHIFT + 14 IADDLOG
-C 1 BRANCH
-C 55 insns totally (plus one mov insn that should be optimized out)
-
-C The loop executes 56 instructions in 14 cycles on UltraSPARC-1/2, i.e we
-C sustain the peak execution rate of 4 instructions/cycle.
-
-C INPUT PARAMETERS
-C rp i0
-C up i1
-C n i2
-C v i3
-
-ASM_START()
- REGISTER(%g2,#scratch)
- REGISTER(%g3,#scratch)
-
-define(`p00', `%f8') define(`p16',`%f10') define(`p32',`%f12') define(`p48',`%f14')
-define(`r32',`%f16') define(`r48',`%f18') define(`r64',`%f20') define(`r80',`%f22')
-define(`v00',`%f24') define(`v16',`%f26') define(`v32',`%f28') define(`v48',`%f30')
-define(`u00',`%f32') define(`u32', `%f34')
-define(`a00',`%f36') define(`a16',`%f38') define(`a32',`%f40') define(`a48',`%f42')
-define(`cy',`%g1')
-define(`rlimb',`%g3')
-define(`i00',`%l0') define(`i16',`%l1') define(`i32',`%l2') define(`i48',`%l3')
-define(`xffffffff',`%l7')
-define(`xffff',`%o0')
-
-PROLOGUE(mpn_addmul_1)
-
-C Initialization. (1) Split v operand into four 16-bit chunks and store them
-C as IEEE double in fp registers. (2) Clear upper 32 bits of fp register pairs
-C f2 and f4. (3) Store masks in registers aliased to `xffff' and `xffffffff'.
-
- save %sp, -256, %sp
- mov -1, %g4
- srlx %g4, 48, xffff C store mask in register `xffff'
- and %i3, xffff, %g2
- stx %g2, [%sp+2223+0]
- srlx %i3, 16, %g3
- and %g3, xffff, %g3
- stx %g3, [%sp+2223+8]
- srlx %i3, 32, %g2
- and %g2, xffff, %g2
- stx %g2, [%sp+2223+16]
- srlx %i3, 48, %g3
- stx %g3, [%sp+2223+24]
- srlx %g4, 32, xffffffff C store mask in register `xffffffff'
-
- sllx %i2, 3, %i2
- mov 0, cy C clear cy
- add %i0, %i2, %i0
- add %i1, %i2, %i1
- neg %i2
- add %i1, 4, %i5
- add %i0, -32, %i4
- add %i0, -16, %i0
-
- ldd [%sp+2223+0], v00
- ldd [%sp+2223+8], v16
- ldd [%sp+2223+16], v32
- ldd [%sp+2223+24], v48
- ld [%sp+2223+0],%f2 C zero f2
- ld [%sp+2223+0],%f4 C zero f4
- ld [%i5+%i2], %f3 C read low 32 bits of up[i]
- ld [%i1+%i2], %f5 C read high 32 bits of up[i]
- fxtod v00, v00
- fxtod v16, v16
- fxtod v32, v32
- fxtod v48, v48
-
-C Start real work. (We sneakingly read f3 and f5 above...)
-C The software pipeline is very deep, requiring 4 feed-in stages.
-
- fxtod %f2, u00
- fxtod %f4, u32
- fmuld u00, v00, a00
- fmuld u00, v16, a16
- fmuld u00, v32, p32
- fmuld u32, v00, r32
- fmuld u00, v48, p48
- addcc %i2, 8, %i2
- bnz,pt %icc, .L_two_or_more
- fmuld u32, v16, r48
-
-.L_one:
- fmuld u32, v32, r64 C FIXME not urgent
- faddd p32, r32, a32
- fdtox a00, a00
- faddd p48, r48, a48
- fmuld u32, v48, r80 C FIXME not urgent
- fdtox a16, a16
- fdtox a32, a32
- fdtox a48, a48
- std a00, [%sp+2223+0]
- std a16, [%sp+2223+8]
- std a32, [%sp+2223+16]
- std a48, [%sp+2223+24]
- add %i2, 8, %i2
-
- fdtox r64, a00
- ldx [%i0+%i2], rlimb C read rp[i]
- fdtox r80, a16
- ldx [%sp+2223+0], i00
- ldx [%sp+2223+8], i16
- ldx [%sp+2223+16], i32
- ldx [%sp+2223+24], i48
- std a00, [%sp+2223+0]
- std a16, [%sp+2223+8]
- add %i2, 8, %i2
-
- srlx rlimb, 32, %g4 C HI(rlimb)
- and rlimb, xffffffff, %g5 C LO(rlimb)
- add i00, %g5, %g5 C i00+ now in g5
- ldx [%sp+2223+0], i00
- srlx i16, 48, %l4 C (i16 >> 48)
- mov i16, %g2
- ldx [%sp+2223+8], i16
- srlx i48, 16, %l5 C (i48 >> 16)
- add i32, %g4, %g4 C i32+ now in g4
- sllx i48, 32, %l6 C (i48 << 32)
- srlx %g4, 32, %o3 C (i32 >> 32)
- add %l5, %l4, %o1 C hi64- in %o1
- std a00, [%sp+2223+0]
- sllx %g4, 16, %o2 C (i32 << 16)
- add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
- std a16, [%sp+2223+8]
- sllx %o1, 48, %o3 C (hi64 << 48)
- add %g2, %o2, %o2 C mi64- in %o2
- add %l6, %o2, %o2 C mi64- in %o2
- sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
- add cy, %g5, %o4 C x = prev(i00) + cy
- b .L_out_1
- add %i2, 8, %i2
-
-.L_two_or_more:
- ld [%i5+%i2], %f3 C read low 32 bits of up[i]
- fmuld u32, v32, r64 C FIXME not urgent
- faddd p32, r32, a32
- ld [%i1+%i2], %f5 C read high 32 bits of up[i]
- fdtox a00, a00
- faddd p48, r48, a48
- fmuld u32, v48, r80 C FIXME not urgent
- fdtox a16, a16
- fdtox a32, a32
- fxtod %f2, u00
- fxtod %f4, u32
- fdtox a48, a48
- std a00, [%sp+2223+0]
- fmuld u00, v00, p00
- std a16, [%sp+2223+8]
- fmuld u00, v16, p16
- std a32, [%sp+2223+16]
- fmuld u00, v32, p32
- std a48, [%sp+2223+24]
- faddd p00, r64, a00
- fmuld u32, v00, r32
- faddd p16, r80, a16
- fmuld u00, v48, p48
- addcc %i2, 8, %i2
- bnz,pt %icc, .L_three_or_more
- fmuld u32, v16, r48
-
-.L_two:
- fmuld u32, v32, r64 C FIXME not urgent
- faddd p32, r32, a32
- fdtox a00, a00
- ldx [%i0+%i2], rlimb C read rp[i]
- faddd p48, r48, a48
- fmuld u32, v48, r80 C FIXME not urgent
- fdtox a16, a16
- ldx [%sp+2223+0], i00
- fdtox a32, a32
- ldx [%sp+2223+8], i16
- ldx [%sp+2223+16], i32
- ldx [%sp+2223+24], i48
- fdtox a48, a48
- std a00, [%sp+2223+0]
- std a16, [%sp+2223+8]
- std a32, [%sp+2223+16]
- std a48, [%sp+2223+24]
- add %i2, 8, %i2
-
- fdtox r64, a00
- srlx rlimb, 32, %g4 C HI(rlimb)
- and rlimb, xffffffff, %g5 C LO(rlimb)
- ldx [%i0+%i2], rlimb C read rp[i]
- add i00, %g5, %g5 C i00+ now in g5
- fdtox r80, a16
- ldx [%sp+2223+0], i00
- srlx i16, 48, %l4 C (i16 >> 48)
- mov i16, %g2
- ldx [%sp+2223+8], i16
- srlx i48, 16, %l5 C (i48 >> 16)
- add i32, %g4, %g4 C i32+ now in g4
- ldx [%sp+2223+16], i32
- sllx i48, 32, %l6 C (i48 << 32)
- ldx [%sp+2223+24], i48
- srlx %g4, 32, %o3 C (i32 >> 32)
- add %l5, %l4, %o1 C hi64- in %o1
- std a00, [%sp+2223+0]
- sllx %g4, 16, %o2 C (i32 << 16)
- add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
- std a16, [%sp+2223+8]
- sllx %o1, 48, %o3 C (hi64 << 48)
- add %g2, %o2, %o2 C mi64- in %o2
- add %l6, %o2, %o2 C mi64- in %o2
- sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
- add cy, %g5, %o4 C x = prev(i00) + cy
- b .L_out_2
- add %i2, 8, %i2
-
-.L_three_or_more:
- ld [%i5+%i2], %f3 C read low 32 bits of up[i]
- fmuld u32, v32, r64 C FIXME not urgent
- faddd p32, r32, a32
- ld [%i1+%i2], %f5 C read high 32 bits of up[i]
- fdtox a00, a00
- ldx [%i0+%i2], rlimb C read rp[i]
- faddd p48, r48, a48
- fmuld u32, v48, r80 C FIXME not urgent
- fdtox a16, a16
- ldx [%sp+2223+0], i00
- fdtox a32, a32
- ldx [%sp+2223+8], i16
- fxtod %f2, u00
- ldx [%sp+2223+16], i32
- fxtod %f4, u32
- ldx [%sp+2223+24], i48
- fdtox a48, a48
- std a00, [%sp+2223+0]
- fmuld u00, v00, p00
- std a16, [%sp+2223+8]
- fmuld u00, v16, p16
- std a32, [%sp+2223+16]
- fmuld u00, v32, p32
- std a48, [%sp+2223+24]
- faddd p00, r64, a00
- fmuld u32, v00, r32
- faddd p16, r80, a16
- fmuld u00, v48, p48
- addcc %i2, 8, %i2
- bnz,pt %icc, .L_four_or_more
- fmuld u32, v16, r48
-
-.L_three:
- fmuld u32, v32, r64 C FIXME not urgent
- faddd p32, r32, a32
- fdtox a00, a00
- srlx rlimb, 32, %g4 C HI(rlimb)
- and rlimb, xffffffff, %g5 C LO(rlimb)
- ldx [%i0+%i2], rlimb C read rp[i]
- faddd p48, r48, a48
- add i00, %g5, %g5 C i00+ now in g5
- fmuld u32, v48, r80 C FIXME not urgent
- fdtox a16, a16
- ldx [%sp+2223+0], i00
- fdtox a32, a32
- srlx i16, 48, %l4 C (i16 >> 48)
- mov i16, %g2
- ldx [%sp+2223+8], i16
- srlx i48, 16, %l5 C (i48 >> 16)
- add i32, %g4, %g4 C i32+ now in g4
- ldx [%sp+2223+16], i32
- sllx i48, 32, %l6 C (i48 << 32)
- ldx [%sp+2223+24], i48
- fdtox a48, a48
- srlx %g4, 32, %o3 C (i32 >> 32)
- add %l5, %l4, %o1 C hi64- in %o1
- std a00, [%sp+2223+0]
- sllx %g4, 16, %o2 C (i32 << 16)
- add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
- std a16, [%sp+2223+8]
- sllx %o1, 48, %o3 C (hi64 << 48)
- add %g2, %o2, %o2 C mi64- in %o2
- std a32, [%sp+2223+16]
- add %l6, %o2, %o2 C mi64- in %o2
- std a48, [%sp+2223+24]
- sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
- add cy, %g5, %o4 C x = prev(i00) + cy
- b .L_out_3
- add %i2, 8, %i2
-
-.L_four_or_more:
- ld [%i5+%i2], %f3 C read low 32 bits of up[i]
- fmuld u32, v32, r64 C FIXME not urgent
- faddd p32, r32, a32
- ld [%i1+%i2], %f5 C read high 32 bits of up[i]
- fdtox a00, a00
- srlx rlimb, 32, %g4 C HI(rlimb)
- and rlimb, xffffffff, %g5 C LO(rlimb)
- ldx [%i0+%i2], rlimb C read rp[i]
- faddd p48, r48, a48
- add i00, %g5, %g5 C i00+ now in g5
- fmuld u32, v48, r80 C FIXME not urgent
- fdtox a16, a16
- ldx [%sp+2223+0], i00
- fdtox a32, a32
- srlx i16, 48, %l4 C (i16 >> 48)
- mov i16, %g2
- ldx [%sp+2223+8], i16
- fxtod %f2, u00
- srlx i48, 16, %l5 C (i48 >> 16)
- add i32, %g4, %g4 C i32+ now in g4
- ldx [%sp+2223+16], i32
- fxtod %f4, u32
- sllx i48, 32, %l6 C (i48 << 32)
- ldx [%sp+2223+24], i48
- fdtox a48, a48
- srlx %g4, 32, %o3 C (i32 >> 32)
- add %l5, %l4, %o1 C hi64- in %o1
- std a00, [%sp+2223+0]
- fmuld u00, v00, p00
- sllx %g4, 16, %o2 C (i32 << 16)
- add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
- std a16, [%sp+2223+8]
- fmuld u00, v16, p16
- sllx %o1, 48, %o3 C (hi64 << 48)
- add %g2, %o2, %o2 C mi64- in %o2
- std a32, [%sp+2223+16]
- fmuld u00, v32, p32
- add %l6, %o2, %o2 C mi64- in %o2
- std a48, [%sp+2223+24]
- faddd p00, r64, a00
- fmuld u32, v00, r32
- sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
- faddd p16, r80, a16
- fmuld u00, v48, p48
- add cy, %g5, %o4 C x = prev(i00) + cy
- addcc %i2, 8, %i2
- bnz,pt %icc, .Loop
- fmuld u32, v16, r48
-
-.L_four:
- b,a .L_out_4
-
-C BEGIN MAIN LOOP
- .align 16
-.Loop:
-C 00
- srlx %o4, 16, %o5 C (x >> 16)
- ld [%i5+%i2], %f3 C read low 32 bits of up[i]
- fmuld u32, v32, r64 C FIXME not urgent
- faddd p32, r32, a32
-C 01
- add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
- and %o4, xffff, %o5 C (x & 0xffff)
- ld [%i1+%i2], %f5 C read high 32 bits of up[i]
- fdtox a00, a00
-C 02
- srlx rlimb, 32, %g4 C HI(rlimb)
- and rlimb, xffffffff, %g5 C LO(rlimb)
- ldx [%i0+%i2], rlimb C read rp[i]
- faddd p48, r48, a48
-C 03
- srlx %o2, 48, %o7 C (mi64 >> 48)
- add i00, %g5, %g5 C i00+ now in g5
- fmuld u32, v48, r80 C FIXME not urgent
- fdtox a16, a16
-C 04
- sllx %o2, 16, %i3 C (mi64 << 16)
- add %o7, %o1, cy C new cy
- ldx [%sp+2223+0], i00
- fdtox a32, a32
-C 05
- srlx i16, 48, %l4 C (i16 >> 48)
- mov i16, %g2
- ldx [%sp+2223+8], i16
- fxtod %f2, u00
-C 06
- srlx i48, 16, %l5 C (i48 >> 16)
- add i32, %g4, %g4 C i32+ now in g4
- ldx [%sp+2223+16], i32
- fxtod %f4, u32
-C 07
- sllx i48, 32, %l6 C (i48 << 32)
- or %i3, %o5, %o5
- ldx [%sp+2223+24], i48
- fdtox a48, a48
-C 08
- srlx %g4, 32, %o3 C (i32 >> 32)
- add %l5, %l4, %o1 C hi64- in %o1
- std a00, [%sp+2223+0]
- fmuld u00, v00, p00
-C 09
- sllx %g4, 16, %o2 C (i32 << 16)
- add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
- std a16, [%sp+2223+8]
- fmuld u00, v16, p16
-C 10
- sllx %o1, 48, %o3 C (hi64 << 48)
- add %g2, %o2, %o2 C mi64- in %o2
- std a32, [%sp+2223+16]
- fmuld u00, v32, p32
-C 11
- add %l6, %o2, %o2 C mi64- in %o2
- std a48, [%sp+2223+24]
- faddd p00, r64, a00
- fmuld u32, v00, r32
-C 12
- sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
- stx %o5, [%i4+%i2]
- faddd p16, r80, a16
- fmuld u00, v48, p48
-C 13
- add cy, %g5, %o4 C x = prev(i00) + cy
- addcc %i2, 8, %i2
- bnz,pt %icc, .Loop
- fmuld u32, v16, r48
-C END MAIN LOOP
-
-.L_out_4:
- srlx %o4, 16, %o5 C (x >> 16)
- fmuld u32, v32, r64 C FIXME not urgent
- faddd p32, r32, a32
- add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
- and %o4, xffff, %o5 C (x & 0xffff)
- fdtox a00, a00
- srlx rlimb, 32, %g4 C HI(rlimb)
- and rlimb, xffffffff, %g5 C LO(rlimb)
- ldx [%i0+%i2], rlimb C read rp[i]
- faddd p48, r48, a48
- srlx %o2, 48, %o7 C (mi64 >> 48)
- add i00, %g5, %g5 C i00+ now in g5
- fmuld u32, v48, r80 C FIXME not urgent
- fdtox a16, a16
- sllx %o2, 16, %i3 C (mi64 << 16)
- add %o7, %o1, cy C new cy
- ldx [%sp+2223+0], i00
- fdtox a32, a32
- srlx i16, 48, %l4 C (i16 >> 48)
- mov i16, %g2
- ldx [%sp+2223+8], i16
- srlx i48, 16, %l5 C (i48 >> 16)
- add i32, %g4, %g4 C i32+ now in g4
- ldx [%sp+2223+16], i32
- sllx i48, 32, %l6 C (i48 << 32)
- or %i3, %o5, %o5
- ldx [%sp+2223+24], i48
- fdtox a48, a48
- srlx %g4, 32, %o3 C (i32 >> 32)
- add %l5, %l4, %o1 C hi64- in %o1
- std a00, [%sp+2223+0]
- sllx %g4, 16, %o2 C (i32 << 16)
- add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
- std a16, [%sp+2223+8]
- sllx %o1, 48, %o3 C (hi64 << 48)
- add %g2, %o2, %o2 C mi64- in %o2
- std a32, [%sp+2223+16]
- add %l6, %o2, %o2 C mi64- in %o2
- std a48, [%sp+2223+24]
- sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
- stx %o5, [%i4+%i2]
- add cy, %g5, %o4 C x = prev(i00) + cy
- add %i2, 8, %i2
-.L_out_3:
- srlx %o4, 16, %o5 C (x >> 16)
- add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
- and %o4, xffff, %o5 C (x & 0xffff)
- fdtox r64, a00
- srlx rlimb, 32, %g4 C HI(rlimb)
- and rlimb, xffffffff, %g5 C LO(rlimb)
- ldx [%i0+%i2], rlimb C read rp[i]
- srlx %o2, 48, %o7 C (mi64 >> 48)
- add i00, %g5, %g5 C i00+ now in g5
- fdtox r80, a16
- sllx %o2, 16, %i3 C (mi64 << 16)
- add %o7, %o1, cy C new cy
- ldx [%sp+2223+0], i00
- srlx i16, 48, %l4 C (i16 >> 48)
- mov i16, %g2
- ldx [%sp+2223+8], i16
- srlx i48, 16, %l5 C (i48 >> 16)
- add i32, %g4, %g4 C i32+ now in g4
- ldx [%sp+2223+16], i32
- sllx i48, 32, %l6 C (i48 << 32)
- or %i3, %o5, %o5
- ldx [%sp+2223+24], i48
- srlx %g4, 32, %o3 C (i32 >> 32)
- add %l5, %l4, %o1 C hi64- in %o1
- std a00, [%sp+2223+0]
- sllx %g4, 16, %o2 C (i32 << 16)
- add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
- std a16, [%sp+2223+8]
- sllx %o1, 48, %o3 C (hi64 << 48)
- add %g2, %o2, %o2 C mi64- in %o2
- add %l6, %o2, %o2 C mi64- in %o2
- sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
- stx %o5, [%i4+%i2]
- add cy, %g5, %o4 C x = prev(i00) + cy
- add %i2, 8, %i2
-.L_out_2:
- srlx %o4, 16, %o5 C (x >> 16)
- add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
- and %o4, xffff, %o5 C (x & 0xffff)
- srlx rlimb, 32, %g4 C HI(rlimb)
- and rlimb, xffffffff, %g5 C LO(rlimb)
- srlx %o2, 48, %o7 C (mi64 >> 48)
- add i00, %g5, %g5 C i00+ now in g5
- sllx %o2, 16, %i3 C (mi64 << 16)
- add %o7, %o1, cy C new cy
- ldx [%sp+2223+0], i00
- srlx i16, 48, %l4 C (i16 >> 48)
- mov i16, %g2
- ldx [%sp+2223+8], i16
- srlx i48, 16, %l5 C (i48 >> 16)
- add i32, %g4, %g4 C i32+ now in g4
- sllx i48, 32, %l6 C (i48 << 32)
- or %i3, %o5, %o5
- srlx %g4, 32, %o3 C (i32 >> 32)
- add %l5, %l4, %o1 C hi64- in %o1
- sllx %g4, 16, %o2 C (i32 << 16)
- add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
- sllx %o1, 48, %o3 C (hi64 << 48)
- add %g2, %o2, %o2 C mi64- in %o2
- add %l6, %o2, %o2 C mi64- in %o2
- sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
- stx %o5, [%i4+%i2]
- add cy, %g5, %o4 C x = prev(i00) + cy
- add %i2, 8, %i2
-.L_out_1:
- srlx %o4, 16, %o5 C (x >> 16)
- add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
- and %o4, xffff, %o5 C (x & 0xffff)
- srlx %o2, 48, %o7 C (mi64 >> 48)
- sllx %o2, 16, %i3 C (mi64 << 16)
- add %o7, %o1, cy C new cy
- or %i3, %o5, %o5
- stx %o5, [%i4+%i2]
-
- sllx i00, 0, %g2
- add %g2, cy, cy
- sllx i16, 16, %g3
- add %g3, cy, cy
-
- return %i7+8
- mov cy, %o0
-EPILOGUE(mpn_addmul_1)
+++ /dev/null
-dnl SPARC v9 64-bit mpn_addmul_2 -- Multiply an n limb number with 2-limb
-dnl number and add the result to a n limb vector.
-
-dnl Copyright 2002, 2003 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C UltraSPARC 1&2: 9
-C UltraSPARC 3: 10
-
-C Algorithm: We use 16 floating-point multiplies per limb product, with the
-C 2-limb v operand split into eight 16-bit pieces, and the n-limb u operand
-C split into 32-bit pieces. We sum four 48-bit partial products using
-C floating-point add, then convert the resulting four 50-bit quantities and
-C transfer them to the integer unit.
-
-C Possible optimizations:
-C 1. Align the stack area where we transfer the four 50-bit product-sums
-C to a 32-byte boundary. That would minimize the cache collision.
-C (UltraSPARC-1/2 use a direct-mapped cache.) (Perhaps even better would
-C be to align the area to map to the area immediately before up?)
-C 2. Perform two of the fp->int conversions with integer instructions. We
-C can get almost ten free IEU slots, if we clean up bookkeeping and the
-C silly carry-limb code.
-C 3. For an mpn_addmul_1 based on this, we need to fix the silly carry-limb
-C code.
-
-C OSP (Overlapping software pipeline) version of mpn_mul_basecase:
-C Operand swap will require 8 LDDA and 8 FXTOD, which will mean 8 cycles.
-C FI = 20
-C L = 9 x un * vn
-C WDFI = 10 x vn / 2
-C WD = 4
-
-C Instruction classification (as per UltraSPARC functional units).
-C Assuming silly carry code is fixed. Includes bookkeeping.
-C
-C mpn_addmul_X mpn_mul_X
-C 1 2 1 2
-C ========== ==========
-C FM 8 16 8 16
-C FA 10 18 10 18
-C MEM 12 12 10 10
-C ISHIFT 6 6 6 6
-C IADDLOG 11 11 10 10
-C BRANCH 1 1 1 1
-C
-C TOTAL IEU 17 17 16 16
-C TOTAL 48 64 45 61
-C
-C IEU cycles 8.5 8.5 8 8
-C MEM cycles 12 12 10 10
-C ISSUE cycles 12 16 11.25 15.25
-C FPU cycles 10 18 10 18
-C cycles/loop 12 18 12 18
-C cycles/limb 12 9 12 9
-
-
-C INPUT PARAMETERS
-C rp[n + 1] i0
-C up[n] i1
-C n i2
-C vp[2] i3
-
-
-ASM_START()
- REGISTER(%g2,#scratch)
- REGISTER(%g3,#scratch)
-
-C Combine registers:
-C u00_hi= u32_hi
-C u00_lo= u32_lo
-C a000 = out000
-C a016 = out016
-C Free: f52 f54
-
-
-define(`p000', `%f8') define(`p016',`%f10')
-define(`p032',`%f12') define(`p048',`%f14')
-define(`p064',`%f16') define(`p080',`%f18')
-define(`p096a',`%f20') define(`p112a',`%f22')
-define(`p096b',`%f56') define(`p112b',`%f58')
-
-define(`out000',`%f0') define(`out016',`%f6')
-
-define(`v000',`%f24') define(`v016',`%f26')
-define(`v032',`%f28') define(`v048',`%f30')
-define(`v064',`%f44') define(`v080',`%f46')
-define(`v096',`%f48') define(`v112',`%f50')
-
-define(`u00',`%f32') define(`u32', `%f34')
-
-define(`a000',`%f36') define(`a016',`%f38')
-define(`a032',`%f40') define(`a048',`%f42')
-define(`a064',`%f60') define(`a080',`%f62')
-
-define(`u00_hi',`%f2') define(`u32_hi',`%f4')
-define(`u00_lo',`%f3') define(`u32_lo',`%f5')
-
-define(`cy',`%g1')
-define(`rlimb',`%g3')
-define(`i00',`%l0') define(`i16',`%l1')
-define(`r00',`%l2') define(`r32',`%l3')
-define(`xffffffff',`%l7')
-define(`xffff',`%o0')
-
-
-PROLOGUE(mpn_addmul_2)
-
-C Initialization. (1) Split v operand into eight 16-bit chunks and store them
-C as IEEE double in fp registers. (2) Clear upper 32 bits of fp register pairs
-C f2 and f4. (3) Store masks in registers aliased to `xffff' and `xffffffff'.
-C This code could be better scheduled.
-
- save %sp, -256, %sp
-
-ifdef(`HAVE_VIS',
-` mov -1, %g4
- wr %g0, 0xD2, %asi
- srlx %g4, 32, xffffffff C store mask in register `xffffffff'
- ldda [%i3+6] %asi, v000
- ldda [%i3+4] %asi, v016
- ldda [%i3+2] %asi, v032
- ldda [%i3+0] %asi, v048
- fxtod v000, v000
- ldda [%i3+14] %asi, v064
- fxtod v016, v016
- ldda [%i3+12] %asi, v080
- fxtod v032, v032
- ldda [%i3+10] %asi, v096
- fxtod v048, v048
- ldda [%i3+8] %asi, v112
- fxtod v064, v064
- fxtod v080, v080
- fxtod v096, v096
- fxtod v112, v112
- fzero u00_hi
- fzero u32_hi
-',
-` mov -1, %g4
- ldx [%i3+0], %l0 C vp[0]
- srlx %g4, 48, xffff C store mask in register `xffff'
- ldx [%i3+8], %l1 C vp[1]
-
- and %l0, xffff, %g2
- stx %g2, [%sp+2223+0]
- srlx %l0, 16, %g3
- and %g3, xffff, %g3
- stx %g3, [%sp+2223+8]
- srlx %l0, 32, %g2
- and %g2, xffff, %g2
- stx %g2, [%sp+2223+16]
- srlx %l0, 48, %g3
- stx %g3, [%sp+2223+24]
- and %l1, xffff, %g2
- stx %g2, [%sp+2223+32]
- srlx %l1, 16, %g3
- and %g3, xffff, %g3
- stx %g3, [%sp+2223+40]
- srlx %l1, 32, %g2
- and %g2, xffff, %g2
- stx %g2, [%sp+2223+48]
- srlx %l1, 48, %g3
- stx %g3, [%sp+2223+56]
-
- srlx %g4, 32, xffffffff C store mask in register `xffffffff'
-
- ldd [%sp+2223+0], v000
- ldd [%sp+2223+8], v016
- ldd [%sp+2223+16], v032
- ldd [%sp+2223+24], v048
- fxtod v000, v000
- ldd [%sp+2223+32], v064
- fxtod v016, v016
- ldd [%sp+2223+40], v080
- fxtod v032, v032
- ldd [%sp+2223+48], v096
- fxtod v048, v048
- ldd [%sp+2223+56], v112
- fxtod v064, v064
- ld [%sp+2223+0], u00_hi C zero u00_hi
- fxtod v080, v080
- ld [%sp+2223+0], u32_hi C zero u32_hi
- fxtod v096, v096
- fxtod v112, v112
-')
-C Initialization done.
- mov 0, %g2
- mov 0, rlimb
- mov 0, %g4
- add %i0, -8, %i0 C BOOKKEEPING
-
-C Start software pipeline.
-
- ld [%i1+4], u00_lo C read low 32 bits of up[i]
- fxtod u00_hi, u00
-C mid
- ld [%i1+0], u32_lo C read high 32 bits of up[i]
- fmuld u00, v000, a000
- fmuld u00, v016, a016
- fmuld u00, v032, a032
- fmuld u00, v048, a048
- add %i2, -1, %i2 C BOOKKEEPING
- fmuld u00, v064, p064
- add %i1, 8, %i1 C BOOKKEEPING
- fxtod u32_hi, u32
- fmuld u00, v080, p080
- fmuld u00, v096, p096a
- brnz,pt %i2, .L_2_or_more
- fmuld u00, v112, p112a
-
-.L1: fdtox a000, out000
- fmuld u32, v000, p000
- fdtox a016, out016
- fmuld u32, v016, p016
- fmovd p064, a064
- fmuld u32, v032, p032
- fmovd p080, a080
- fmuld u32, v048, p048
- std out000, [%sp+2223+16]
- faddd p000, a032, a000
- fmuld u32, v064, p064
- std out016, [%sp+2223+24]
- fxtod u00_hi, u00
- faddd p016, a048, a016
- fmuld u32, v080, p080
- faddd p032, a064, a032
- fmuld u32, v096, p096b
- faddd p048, a080, a048
- fmuld u32, v112, p112b
-C mid
- fdtox a000, out000
- fdtox a016, out016
- faddd p064, p096a, a064
- faddd p080, p112a, a080
- std out000, [%sp+2223+0]
- b .L_wd2
- std out016, [%sp+2223+8]
-
-.L_2_or_more:
- ld [%i1+4], u00_lo C read low 32 bits of up[i]
- fdtox a000, out000
- fmuld u32, v000, p000
- fdtox a016, out016
- fmuld u32, v016, p016
- fmovd p064, a064
- fmuld u32, v032, p032
- fmovd p080, a080
- fmuld u32, v048, p048
- std out000, [%sp+2223+16]
- faddd p000, a032, a000
- fmuld u32, v064, p064
- std out016, [%sp+2223+24]
- fxtod u00_hi, u00
- faddd p016, a048, a016
- fmuld u32, v080, p080
- faddd p032, a064, a032
- fmuld u32, v096, p096b
- faddd p048, a080, a048
- fmuld u32, v112, p112b
-C mid
- ld [%i1+0], u32_lo C read high 32 bits of up[i]
- fdtox a000, out000
- fmuld u00, v000, p000
- fdtox a016, out016
- fmuld u00, v016, p016
- faddd p064, p096a, a064
- fmuld u00, v032, p032
- faddd p080, p112a, a080
- fmuld u00, v048, p048
- add %i2, -1, %i2 C BOOKKEEPING
- std out000, [%sp+2223+0]
- faddd p000, a032, a000
- fmuld u00, v064, p064
- add %i1, 8, %i1 C BOOKKEEPING
- std out016, [%sp+2223+8]
- fxtod u32_hi, u32
- faddd p016, a048, a016
- fmuld u00, v080, p080
- faddd p032, a064, a032
- fmuld u00, v096, p096a
- faddd p048, a080, a048
- brnz,pt %i2, .L_3_or_more
- fmuld u00, v112, p112a
-
- b .Lend
- nop
-
-C 64 32 0
-C . . .
-C . |__rXXX_| 32
-C . |___cy___| 34
-C . |_______i00__| 50
-C |_______i16__| . 50
-
-
-C BEGIN MAIN LOOP
- .align 16
-.L_3_or_more:
-.Loop: ld [%i1+4], u00_lo C read low 32 bits of up[i]
- and %g2, xffffffff, %g2
- fdtox a000, out000
- fmuld u32, v000, p000
-C
- lduw [%i0+4+8], r00 C read low 32 bits of rp[i]
- add %g2, rlimb, %l5
- fdtox a016, out016
- fmuld u32, v016, p016
-C
- srlx %l5, 32, cy
- ldx [%sp+2223+16], i00
- faddd p064, p096b, a064
- fmuld u32, v032, p032
-C
- add %g4, cy, cy C new cy
- ldx [%sp+2223+24], i16
- faddd p080, p112b, a080
- fmuld u32, v048, p048
-C
- nop
- std out000, [%sp+2223+16]
- faddd p000, a032, a000
- fmuld u32, v064, p064
-C
- add i00, r00, rlimb
- add %i0, 8, %i0 C BOOKKEEPING
- std out016, [%sp+2223+24]
- fxtod u00_hi, u00
-C
- sllx i16, 16, %g2
- add cy, rlimb, rlimb
- faddd p016, a048, a016
- fmuld u32, v080, p080
-C
- srlx i16, 16, %g4
- add %g2, rlimb, %l5
- faddd p032, a064, a032
- fmuld u32, v096, p096b
-C
- stw %l5, [%i0+4]
- nop
- faddd p048, a080, a048
- fmuld u32, v112, p112b
-C midloop
- ld [%i1+0], u32_lo C read high 32 bits of up[i]
- and %g2, xffffffff, %g2
- fdtox a000, out000
- fmuld u00, v000, p000
-C
- lduw [%i0+0], r32 C read high 32 bits of rp[i]
- add %g2, rlimb, %l5
- fdtox a016, out016
- fmuld u00, v016, p016
-C
- srlx %l5, 32, cy
- ldx [%sp+2223+0], i00
- faddd p064, p096a, a064
- fmuld u00, v032, p032
-C
- add %g4, cy, cy C new cy
- ldx [%sp+2223+8], i16
- faddd p080, p112a, a080
- fmuld u00, v048, p048
-C
- add %i2, -1, %i2 C BOOKKEEPING
- std out000, [%sp+2223+0]
- faddd p000, a032, a000
- fmuld u00, v064, p064
-C
- add i00, r32, rlimb
- add %i1, 8, %i1 C BOOKKEEPING
- std out016, [%sp+2223+8]
- fxtod u32_hi, u32
-C
- sllx i16, 16, %g2
- add cy, rlimb, rlimb
- faddd p016, a048, a016
- fmuld u00, v080, p080
-C
- srlx i16, 16, %g4
- add %g2, rlimb, %l5
- faddd p032, a064, a032
- fmuld u00, v096, p096a
-C
- stw %l5, [%i0+0]
- faddd p048, a080, a048
- brnz,pt %i2, .Loop
- fmuld u00, v112, p112a
-C END MAIN LOOP
-
-C WIND-DOWN PHASE 1
-.Lend: and %g2, xffffffff, %g2
- fdtox a000, out000
- fmuld u32, v000, p000
- lduw [%i0+4+8], r00 C read low 32 bits of rp[i]
- add %g2, rlimb, %l5
- fdtox a016, out016
- fmuld u32, v016, p016
- srlx %l5, 32, cy
- ldx [%sp+2223+16], i00
- faddd p064, p096b, a064
- fmuld u32, v032, p032
- add %g4, cy, cy C new cy
- ldx [%sp+2223+24], i16
- faddd p080, p112b, a080
- fmuld u32, v048, p048
- std out000, [%sp+2223+16]
- faddd p000, a032, a000
- fmuld u32, v064, p064
- add i00, r00, rlimb
- add %i0, 8, %i0 C BOOKKEEPING
- std out016, [%sp+2223+24]
- sllx i16, 16, %g2
- add cy, rlimb, rlimb
- faddd p016, a048, a016
- fmuld u32, v080, p080
- srlx i16, 16, %g4
- add %g2, rlimb, %l5
- faddd p032, a064, a032
- fmuld u32, v096, p096b
- stw %l5, [%i0+4]
- faddd p048, a080, a048
- fmuld u32, v112, p112b
-C mid
- and %g2, xffffffff, %g2
- fdtox a000, out000
- lduw [%i0+0], r32 C read high 32 bits of rp[i]
- add %g2, rlimb, %l5
- fdtox a016, out016
- srlx %l5, 32, cy
- ldx [%sp+2223+0], i00
- faddd p064, p096a, a064
- add %g4, cy, cy C new cy
- ldx [%sp+2223+8], i16
- faddd p080, p112a, a080
- std out000, [%sp+2223+0]
- add i00, r32, rlimb
- std out016, [%sp+2223+8]
- sllx i16, 16, %g2
- add cy, rlimb, rlimb
- srlx i16, 16, %g4
- add %g2, rlimb, %l5
- stw %l5, [%i0+0]
-
-C WIND-DOWN PHASE 2
-.L_wd2: and %g2, xffffffff, %g2
- fdtox a032, out000
- lduw [%i0+4+8], r00 C read low 32 bits of rp[i]
- add %g2, rlimb, %l5
- fdtox a048, out016
- srlx %l5, 32, cy
- ldx [%sp+2223+16], i00
- add %g4, cy, cy C new cy
- ldx [%sp+2223+24], i16
- std out000, [%sp+2223+16]
- add i00, r00, rlimb
- add %i0, 8, %i0 C BOOKKEEPING
- std out016, [%sp+2223+24]
- sllx i16, 16, %g2
- add cy, rlimb, rlimb
- srlx i16, 16, %g4
- add %g2, rlimb, %l5
- stw %l5, [%i0+4]
-C mid
- and %g2, xffffffff, %g2
- fdtox a064, out000
- lduw [%i0+0], r32 C read high 32 bits of rp[i]
- add %g2, rlimb, %l5
- fdtox a080, out016
- srlx %l5, 32, cy
- ldx [%sp+2223+0], i00
- add %g4, cy, cy C new cy
- ldx [%sp+2223+8], i16
- std out000, [%sp+2223+0]
- add i00, r32, rlimb
- std out016, [%sp+2223+8]
- sllx i16, 16, %g2
- add cy, rlimb, rlimb
- srlx i16, 16, %g4
- add %g2, rlimb, %l5
- stw %l5, [%i0+0]
-
-C WIND-DOWN PHASE 3
-.L_wd3: and %g2, xffffffff, %g2
- fdtox p096b, out000
- add %g2, rlimb, %l5
- fdtox p112b, out016
- srlx %l5, 32, cy
- ldx [%sp+2223+16], rlimb
- add %g4, cy, cy C new cy
- ldx [%sp+2223+24], i16
- std out000, [%sp+2223+16]
- add %i0, 8, %i0 C BOOKKEEPING
- std out016, [%sp+2223+24]
- sllx i16, 16, %g2
- add cy, rlimb, rlimb
- srlx i16, 16, %g4
- add %g2, rlimb, %l5
- stw %l5, [%i0+4]
-C mid
- and %g2, xffffffff, %g2
- add %g2, rlimb, %l5
- srlx %l5, 32, cy
- ldx [%sp+2223+0], rlimb
- add %g4, cy, cy C new cy
- ldx [%sp+2223+8], i16
- sllx i16, 16, %g2
- add cy, rlimb, rlimb
- srlx i16, 16, %g4
- add %g2, rlimb, %l5
- stw %l5, [%i0+0]
-
- and %g2, xffffffff, %g2
- add %g2, rlimb, %l5
- srlx %l5, 32, cy
- ldx [%sp+2223+16], i00
- add %g4, cy, cy C new cy
- ldx [%sp+2223+24], i16
-
- sllx i16, 16, %g2
- add i00, cy, cy
- return %i7+8
- add %g2, cy, %o0
-EPILOGUE(mpn_addmul_2)
add %g1,%o0,%o0
add %g1,%o1,%o1
addcc %o2,-8,%o2
- bl,pt %icc,L(end01234567)
+ bl,pt %xcc,L(end01234567)
nop
L(loop1):
ldx [%o1-8],%g1
stx %o4,[%o0-56]
stx %o5,[%o0-64]
addcc %o2,-8,%o2
- bge,pt %icc,L(loop1)
+ bge,pt %xcc,L(loop1)
add %o0,-64,%o0
L(end01234567):
addcc %o2,8,%o2
- bz,pn %icc,L(end)
+ bz,pn %xcc,L(end)
nop
L(loop2):
ldx [%o1-8],%g1
add %o1,-8,%o1
addcc %o2,-1,%o2
stx %g1,[%o0-8]
- bg,pt %icc,L(loop2)
+ bg,pt %xcc,L(loop2)
add %o0,-8,%o0
L(end): retl
nop
REGISTER(%g3,#scratch)
PROLOGUE(mpn_copyi)
addcc %o2,-8,%o2
- bl,pt %icc,L(end01234567)
+ bl,pt %xcc,L(end01234567)
nop
L(loop1):
ldx [%o1+0],%g1
stx %o4,[%o0+48]
stx %o5,[%o0+56]
addcc %o2,-8,%o2
- bge,pt %icc,L(loop1)
+ bge,pt %xcc,L(loop1)
add %o0,64,%o0
L(end01234567):
addcc %o2,8,%o2
- bz,pn %icc,L(end)
+ bz,pn %xcc,L(end)
nop
L(loop2):
ldx [%o1+0],%g1
add %o1,8,%o1
addcc %o2,-1,%o2
stx %g1,[%o0+0]
- bg,pt %icc,L(loop2)
+ bg,pt %xcc,L(loop2)
add %o0,8,%o0
L(end): retl
nop
--- /dev/null
+dnl SPARC64 mpn_gcd_1.
+
+dnl Based on the K7 gcd_1.asm, by Kevin Ryde. Rehacked for SPARC by Torbjorn
+dnl Granlund.
+
+dnl Copyright 2000, 2001, 2002, 2005, 2009, 2011, 2012 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/bit (approx)
+C UltraSPARC 1&2: 5.1
+C UltraSPARC 3: 5.0
+C UltraSPARC T1: 12.8
+C Numbers measured with: speed -CD -s32-64 -t32 mpn_gcd_1
+
+C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
+
+deflit(MAXSHIFT, 7)
+deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
+
+ .section ".rodata"
+ctz_table:
+ .byte MAXSHIFT
+forloop(i,1,MASK,
+` .byte m4_count_trailing_zeros(i)
+')
+
+
+C Threshold of when to call bmod when U is one limb. Should be about
+C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
+define(`BMOD_THRES_LOG2', 14)
+
+C INPUT PARAMETERS
+define(`up', `%i0')
+define(`n', `%i1')
+define(`v0', `%i2')
+
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_gcd_1)
+ save %sp, -192, %sp
+ ldx [up+0], %g1 C U low limb
+ mov -1, %i4
+ or v0, %g1, %g2 C x | y
+
+L(twos):
+ inc %i4
+ andcc %g2, 1, %g0
+ bz,a %xcc, L(twos)
+ srlx %g2, 1, %g2
+
+L(divide_strip_y):
+ andcc v0, 1, %g0
+ bz,a %xcc, L(divide_strip_y)
+ srlx v0, 1, v0
+
+ cmp n, 1 C if n > 1 we need
+ bnz %xcc, L(bmod) C to call bmod_1
+ nop
+
+C Both U and V are single limbs, reduce with bmod if u0 >> v0.
+ srlx %g1, BMOD_THRES_LOG2, %g2
+ cmp %g2, v0
+ bleu %xcc, L(noreduce)
+ mov %g1, %o0
+
+L(bmod):
+ mov up, %o0
+ mov n, %o1
+ mov v0, %o2
+ call mpn_modexact_1c_odd
+ mov 0, %o3
+
+L(noreduce):
+
+ifdef(`PIC',`
+ sethi %hi(_GLOBAL_OFFSET_TABLE_-4), %l7
+ call L(LGETPC0)
+ add %l7, %lo(_GLOBAL_OFFSET_TABLE_+4), %l7
+ sethi %hi(ctz_table), %g1
+ or %g1, %lo(ctz_table), %g1
+ ldx [%l7+%g1], %i5
+',`
+ sethi %hh(ctz_table), %l7
+ or %l7, %hm(ctz_table), %l7
+ sllx %l7, 32, %l7
+ sethi %lm(ctz_table), %g1
+ add %l7, %g1, %l7
+ or %l7, %lo(ctz_table), %i5
+')
+
+ cmp %o0, 0
+ bnz %xcc, L(mid)
+ andcc %o0, MASK, %g3 C
+
+ return %i7+8
+ sllx %o2, %o4, %o0 C CAUTION: v0 alias for o2
+
+ ALIGN(16)
+L(top): movcc %xcc, %l4, v0 C v = min(u,v)
+ movcc %xcc, %l2, %o0 C u = |v - u]
+ cmp %g3, 0 C are all MAXSHIFT low bits zero?
+L(mid): ldub [%i5+%g3], %g3 C
+ bz,a %xcc, L(shift_alot) C
+ srlx %o0, MAXSHIFT, %o0
+ srlx %o0, %g3, %l4 C new u, odd
+ nop C force parallel exec of sub insns
+ subcc v0, %l4, %l2 C v - u, set flags for branch and movcc
+ sub %l4, v0, %o0 C u - v
+ bnz %xcc, L(top) C
+ and %l2, MASK, %g3 C extract low MAXSHIFT bits from (v-u)
+
+ return %i7+8
+ sllx %o2, %o4, %o0 C CAUTION: v0 alias for o2
+
+L(shift_alot):
+ b L(mid)
+ andcc %o0, MASK, %g3 C
+
+ifdef(`PIC',`
+L(LGETPC0):
+ retl
+ add %o7, %l7, %l7
+')
+EPILOGUE()
#define MOD_1_NORM_THRESHOLD 3
#define MOD_1_UNNORM_THRESHOLD 3
#define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* never */
-#define MOD_1U_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD MP_SIZE_T_MAX
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD MP_SIZE_T_MAX
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 22
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 27
#define PREINV_MOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
#define USE_PREINV_DIVREM_1 1
-#define DIVREM_2_THRESHOLD 7
#define DIVEXACT_1_THRESHOLD 0 /* always */
#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
+++ /dev/null
-dnl SPARC v9 mpn_lshift
-
-dnl Copyright 1996, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-
-include(`../config.m4')
-
-C cycles/limb
-C UltraSPARC 1&2: 2
-C UltraSPARC 3: 3.25
-
-C INPUT PARAMETERS
-define(`rp',`%i0')
-define(`up',`%i1')
-define(`n',`%i2')
-define(`cnt',`%i3')
-
-define(`u0',`%l0')
-define(`u1',`%l2')
-define(`u2',`%l4')
-define(`u3',`%l6')
-
-define(`tnc',`%i4')
-
-define(`fanop',`fitod %f0,%f2') dnl A quasi nop running in the FA pipe
-define(`fmnop',`fmuld %f0,%f0,%f4') dnl A quasi nop running in the FM pipe
-
-ASM_START()
- REGISTER(%g2,#scratch)
- REGISTER(%g3,#scratch)
-PROLOGUE(mpn_lshift)
- save %sp,-160,%sp
-
- sllx n,3,%g1
- sub %g0,cnt,tnc C negate shift count
- add up,%g1,up C make %o1 point at end of src
- add rp,%g1,rp C make %o0 point at end of res
- ldx [up-8],u3 C load first limb
- subcc n,5,n
- srlx u3,tnc,%i5 C compute function result
- sllx u3,cnt,%g3
- bl,pn %icc,.Lend1234
- fanop
-
- subcc n,4,n
- ldx [up-16],u0
- ldx [up-24],u1
- add up,-32,up
- ldx [up-0],u2
- ldx [up-8],u3
- srlx u0,tnc,%g2
-
- bl,pn %icc,.Lend5678
- fanop
-
- b,a .Loop
- .align 16
-.Loop:
- sllx u0,cnt,%g1
- or %g3,%g2,%g3
- ldx [up-16],u0
- fanop
-C --
- srlx u1,tnc,%g2
- subcc n,4,n
- stx %g3,[rp-8]
- fanop
-C --
- sllx u1,cnt,%g3
- or %g1,%g2,%g1
- ldx [up-24],u1
- fanop
-C --
- srlx u2,tnc,%g2
- stx %g1,[rp-16]
- add up,-32,up
- fanop
-C --
- sllx u2,cnt,%g1
- or %g3,%g2,%g3
- ldx [up-0],u2
- fanop
-C --
- srlx u3,tnc,%g2
- stx %g3,[rp-24]
- add rp,-32,rp
- fanop
-C --
- sllx u3,cnt,%g3
- or %g1,%g2,%g1
- ldx [up-8],u3
- fanop
-C --
- srlx u0,tnc,%g2
- stx %g1,[rp-0]
- bge,pt %icc,.Loop
- fanop
-C --
-.Lend5678:
- sllx u0,cnt,%g1
- or %g3,%g2,%g3
- srlx u1,tnc,%g2
- stx %g3,[rp-8]
- sllx u1,cnt,%g3
- or %g1,%g2,%g1
- srlx u2,tnc,%g2
- stx %g1,[rp-16]
- sllx u2,cnt,%g1
- or %g3,%g2,%g3
- srlx u3,tnc,%g2
- stx %g3,[rp-24]
- add rp,-32,rp
- sllx u3,cnt,%g3 C carry...
- or %g1,%g2,%g1
- stx %g1,[rp-0]
-
-.Lend1234:
- addcc n,4,n
- bz,pn %icc,.Lret
- fanop
-.Loop0:
- add rp,-8,rp
- subcc n,1,n
- ldx [up-16],u3
- add up,-8,up
- srlx u3,tnc,%g2
- or %g3,%g2,%g3
- stx %g3,[rp]
- sllx u3,cnt,%g3
- bnz,pt %icc,.Loop0
- fanop
-.Lret:
- stx %g3,[rp-8]
- mov %i5,%i0
- ret
- restore
-EPILOGUE(mpn_lshift)
/* UltraSPARC 64 mpn_mod_1 -- mpn by limb remainder.
-Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2003 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2003, 2010 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
sizes, but at size==2 it was only about the same speed and at size==3 was
slower. */
-mp_limb_t
-mpn_mod_1 (mp_srcptr src_limbptr, mp_size_t size_limbs, mp_limb_t d_limb)
+static mp_limb_t
+mpn_mod_1_anynorm (mp_srcptr src_limbptr, mp_size_t size_limbs, mp_limb_t d_limb)
{
int norm, norm_rshift;
mp_limb_t src_high_limb;
return r >> norm;
}
}
+
+mp_limb_t
+mpn_mod_1 (mp_srcptr ap, mp_size_t n, mp_limb_t b)
+{
+ ASSERT (n >= 0);
+ ASSERT (b != 0);
+
+ /* Should this be handled at all? Rely on callers? Note un==0 is currently
+ required by mpz/fdiv_r_ui.c and possibly other places. */
+ if (n == 0)
+ return 0;
+
+ if (UNLIKELY ((b & GMP_NUMB_HIGHBIT) != 0))
+ {
+ if (BELOW_THRESHOLD (n, MOD_1N_TO_MOD_1_1_THRESHOLD))
+ {
+ return mpn_mod_1_anynorm (ap, n, b);
+ }
+ else
+ {
+ mp_limb_t pre[4];
+ mpn_mod_1_1p_cps (pre, b);
+ return mpn_mod_1_1p (ap, n, b, pre);
+ }
+ }
+ else
+ {
+ if (BELOW_THRESHOLD (n, MOD_1U_TO_MOD_1_1_THRESHOLD))
+ {
+ return mpn_mod_1_anynorm (ap, n, b);
+ }
+ else if (BELOW_THRESHOLD (n, MOD_1_1_TO_MOD_1_2_THRESHOLD))
+ {
+ mp_limb_t pre[4];
+ mpn_mod_1_1p_cps (pre, b);
+ return mpn_mod_1_1p (ap, n, b << pre[1], pre);
+ }
+ else if (BELOW_THRESHOLD (n, MOD_1_2_TO_MOD_1_4_THRESHOLD) || UNLIKELY (b > GMP_NUMB_MASK / 4))
+ {
+ mp_limb_t pre[5];
+ mpn_mod_1s_2p_cps (pre, b);
+ return mpn_mod_1s_2p (ap, n, b << pre[1], pre);
+ }
+ else
+ {
+ mp_limb_t pre[7];
+ mpn_mod_1s_4p_cps (pre, b);
+ return mpn_mod_1s_4p (ap, n, b << pre[1], pre);
+ }
+ }
+}
--- /dev/null
+/* mpn_mod_1s_4p (ap, n, b, cps)
+ Divide (ap,,n) by b. Return the single-limb remainder.
+ Requires that d < B / 4.
+
+ Contributed to the GNU project by Torbjorn Granlund.
+ Based on a suggestion by Peter L. Montgomery.
+
+ THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
+ SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
+ GUARANTEED THAT THEY WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2008, 2009, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+#include "mpn/sparc64/sparc64.h"
+
+void
+mpn_mod_1s_4p_cps (mp_limb_t cps[7], mp_limb_t b)
+{
+ mp_limb_t bi;
+ mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
+ int cnt;
+
+ ASSERT (b <= (~(mp_limb_t) 0) / 4);
+
+ count_leading_zeros (cnt, b);
+
+ b <<= cnt;
+ invert_limb (bi, b);
+
+ B1modb = -b * ((bi >> (GMP_LIMB_BITS-cnt)) | (CNST_LIMB(1) << cnt));
+ ASSERT (B1modb <= b); /* NB: not fully reduced mod b */
+ udiv_rnnd_preinv (B2modb, B1modb, 0, b, bi);
+ udiv_rnnd_preinv (B3modb, B2modb, 0, b, bi);
+ udiv_rnnd_preinv (B4modb, B3modb, 0, b, bi);
+ udiv_rnnd_preinv (B5modb, B4modb, 0, b, bi);
+
+ cps[0] = bi;
+ cps[1] = cnt;
+ cps[2] = B1modb >> cnt;
+ cps[3] = B2modb >> cnt;
+ cps[4] = B3modb >> cnt;
+ cps[5] = B4modb >> cnt;
+ cps[6] = B5modb >> cnt;
+
+#if WANT_ASSERT
+ {
+ int i;
+ b = cps[2];
+ for (i = 3; i <= 6; i++)
+ {
+ b += cps[i];
+ ASSERT (b >= cps[i]);
+ }
+ }
+#endif
+}
+
+mp_limb_t
+mpn_mod_1s_4p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t cps[7])
+{
+ mp_limb_t rh, rl, bi, ph, pl, ch, cl, r;
+ mp_limb_t B1modb, B2modb, B3modb, B4modb, B5modb;
+ mp_size_t i;
+ int cnt;
+
+ ASSERT (n >= 1);
+
+ B1modb = cps[2];
+ B2modb = cps[3];
+ B3modb = cps[4];
+ B4modb = cps[5];
+ B5modb = cps[6];
+
+ if ((b >> 32) == 0)
+ {
+ switch (n & 3)
+ {
+ case 0:
+ umul_ppmm_s (ph, pl, ap[n - 3], B1modb);
+ add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 4]);
+ umul_ppmm_s (ch, cl, ap[n - 2], B2modb);
+ add_ssaaaa (ph, pl, ph, pl, ch, cl);
+ umul_ppmm_s (rh, rl, ap[n - 1], B3modb);
+ add_ssaaaa (rh, rl, rh, rl, ph, pl);
+ n -= 4;
+ break;
+ case 1:
+ rh = 0;
+ rl = ap[n - 1];
+ n -= 1;
+ break;
+ case 2:
+ rh = ap[n - 1];
+ rl = ap[n - 2];
+ n -= 2;
+ break;
+ case 3:
+ umul_ppmm_s (ph, pl, ap[n - 2], B1modb);
+ add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 3]);
+ umul_ppmm_s (rh, rl, ap[n - 1], B2modb);
+ add_ssaaaa (rh, rl, rh, rl, ph, pl);
+ n -= 3;
+ break;
+ }
+
+ for (i = n - 4; i >= 0; i -= 4)
+ {
+ /* rr = ap[i] < B
+ + ap[i+1] * (B mod b) <= (B-1)(b-1)
+ + ap[i+2] * (B^2 mod b) <= (B-1)(b-1)
+ + ap[i+3] * (B^3 mod b) <= (B-1)(b-1)
+ + LO(rr) * (B^4 mod b) <= (B-1)(b-1)
+ + HI(rr) * (B^5 mod b) <= (B-1)(b-1)
+ */
+ umul_ppmm_s (ph, pl, ap[i + 1], B1modb);
+ add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 0]);
+
+ umul_ppmm_s (ch, cl, ap[i + 2], B2modb);
+ add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+ umul_ppmm_s (ch, cl, ap[i + 3], B3modb);
+ add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+ umul_ppmm_s (ch, cl, rl, B4modb);
+ add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+ umul_ppmm_s (rh, rl, rh, B5modb);
+ add_ssaaaa (rh, rl, rh, rl, ph, pl);
+ }
+
+ umul_ppmm_s (rh, cl, rh, B1modb);
+ add_ssaaaa (rh, rl, rh, rl, 0, cl);
+ }
+ else
+ {
+ switch (n & 3)
+ {
+ case 0:
+ umul_ppmm (ph, pl, ap[n - 3], B1modb);
+ add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 4]);
+ umul_ppmm (ch, cl, ap[n - 2], B2modb);
+ add_ssaaaa (ph, pl, ph, pl, ch, cl);
+ umul_ppmm (rh, rl, ap[n - 1], B3modb);
+ add_ssaaaa (rh, rl, rh, rl, ph, pl);
+ n -= 4;
+ break;
+ case 1:
+ rh = 0;
+ rl = ap[n - 1];
+ n -= 1;
+ break;
+ case 2:
+ rh = ap[n - 1];
+ rl = ap[n - 2];
+ n -= 2;
+ break;
+ case 3:
+ umul_ppmm (ph, pl, ap[n - 2], B1modb);
+ add_ssaaaa (ph, pl, ph, pl, 0, ap[n - 3]);
+ umul_ppmm (rh, rl, ap[n - 1], B2modb);
+ add_ssaaaa (rh, rl, rh, rl, ph, pl);
+ n -= 3;
+ break;
+ }
+
+ for (i = n - 4; i >= 0; i -= 4)
+ {
+ /* rr = ap[i] < B
+ + ap[i+1] * (B mod b) <= (B-1)(b-1)
+ + ap[i+2] * (B^2 mod b) <= (B-1)(b-1)
+ + ap[i+3] * (B^3 mod b) <= (B-1)(b-1)
+ + LO(rr) * (B^4 mod b) <= (B-1)(b-1)
+ + HI(rr) * (B^5 mod b) <= (B-1)(b-1)
+ */
+ umul_ppmm (ph, pl, ap[i + 1], B1modb);
+ add_ssaaaa (ph, pl, ph, pl, 0, ap[i + 0]);
+
+ umul_ppmm (ch, cl, ap[i + 2], B2modb);
+ add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+ umul_ppmm (ch, cl, ap[i + 3], B3modb);
+ add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+ umul_ppmm (ch, cl, rl, B4modb);
+ add_ssaaaa (ph, pl, ph, pl, ch, cl);
+
+ umul_ppmm (rh, rl, rh, B5modb);
+ add_ssaaaa (rh, rl, rh, rl, ph, pl);
+ }
+
+ umul_ppmm (rh, cl, rh, B1modb);
+ add_ssaaaa (rh, rl, rh, rl, 0, cl);
+ }
+
+ bi = cps[0];
+ cnt = cps[1];
+
+ r = (rh << cnt) | (rl >> (GMP_LIMB_BITS - cnt));
+ udiv_rnnd_preinv (r, r, rl << cnt, b, bi);
+
+ return r >> cnt;
+}
+++ /dev/null
-dnl SPARC v9 64-bit mpn_mul_1 -- Multiply a limb vector with a limb and store
-dnl the result in a second limb vector.
-
-dnl Copyright 1998, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C UltraSPARC 1&2: 14
-C UltraSPARC 3: 18.5
-
-C Algorithm: We use eight floating-point multiplies per limb product, with the
-C invariant v operand split into four 16-bit pieces, and the s1 operand split
-C into 32-bit pieces. We sum pairs of 48-bit partial products using
-C floating-point add, then convert the four 49-bit product-sums and transfer
-C them to the integer unit.
-
-C Possible optimizations:
-C 1. Align the stack area where we transfer the four 49-bit product-sums
-C to a 32-byte boundary. That would minimize the cache collision.
-C (UltraSPARC-1/2 use a direct-mapped cache.) (Perhaps even better would
-C be to align the area to map to the area immediately before s1?)
-C 2. Sum the 4 49-bit quantities using 32-bit operations, as in the
-C develop mpn_addmul_2. This would save many integer instructions.
-C 3. Unrolling. Questionable if it is worth the code expansion, given that
-C it could only save 1 cycle/limb.
-C 4. Specialize for particular v values. If its upper 32 bits are zero, we
-C could save many operations, in the FPU (fmuld), but more so in the IEU
-C since we'll be summing 48-bit quantities, which might be simpler.
-C 5. Ideally, we should schedule the f2/f3 and f4/f5 RAW further apart, and
-C the i00,i16,i32,i48 RAW less apart. The latter apart-scheduling should
-C not be greater than needed for L2 cache latency, and also not so great
-C that i16 needs to be copied.
-C 6. Avoid performing mem+fa+fm in the same cycle, at least not when we want
-C to get high IEU bandwidth. (12 of the 14 cycles will be free for 2 IEU
-C ops.)
-
-C Instruction classification (as per UltraSPARC-1/2 functional units):
-C 8 FM
-C 10 FA
-C 11 MEM
-C 9 ISHIFT + 10? IADDLOG
-C 1 BRANCH
-C 49 insns totally (plus three mov insns that should be optimized out)
-
-C The loop executes 53 instructions in 14 cycles on UltraSPARC-1/2, i.e we
-C sustain 3.79 instructions/cycle.
-
-C INPUT PARAMETERS
-C rp i0
-C up i1
-C n i2
-C v i3
-
-ASM_START()
- REGISTER(%g2,#scratch)
- REGISTER(%g3,#scratch)
-
-define(`p00', `%f8') define(`p16',`%f10') define(`p32',`%f12') define(`p48',`%f14')
-define(`r32',`%f16') define(`r48',`%f18') define(`r64',`%f20') define(`r80',`%f22')
-define(`v00',`%f24') define(`v16',`%f26') define(`v32',`%f28') define(`v48',`%f30')
-define(`u00',`%f32') define(`u32', `%f34')
-define(`a00',`%f36') define(`a16',`%f38') define(`a32',`%f40') define(`a48',`%f42')
-define(`cy',`%g1')
-define(`rlimb',`%g3')
-define(`i00',`%l0') define(`i16',`%l1') define(`i32',`%l2') define(`i48',`%l3')
-define(`xffffffff',`%l7')
-define(`xffff',`%o0')
-
-PROLOGUE(mpn_mul_1)
-
-C Initialization. (1) Split v operand into four 16-bit chunks and store them
-C as IEEE double in fp registers. (2) Clear upper 32 bits of fp register pairs
-C f2 and f4. (3) Store masks in registers aliased to `xffff' and `xffffffff'.
-
- save %sp, -256, %sp
- mov -1, %g4
- srlx %g4, 48, xffff C store mask in register `xffff'
- and %i3, xffff, %g2
- stx %g2, [%sp+2223+0]
- srlx %i3, 16, %g3
- and %g3, xffff, %g3
- stx %g3, [%sp+2223+8]
- srlx %i3, 32, %g2
- and %g2, xffff, %g2
- stx %g2, [%sp+2223+16]
- srlx %i3, 48, %g3
- stx %g3, [%sp+2223+24]
- srlx %g4, 32, xffffffff C store mask in register `xffffffff'
-
- sllx %i2, 3, %i2
- mov 0, cy C clear cy
- add %i0, %i2, %i0
- add %i1, %i2, %i1
- neg %i2
- add %i1, 4, %i5
- add %i0, -32, %i4
- add %i0, -16, %i0
-
- ldd [%sp+2223+0], v00
- ldd [%sp+2223+8], v16
- ldd [%sp+2223+16], v32
- ldd [%sp+2223+24], v48
- ld [%sp+2223+0],%f2 C zero f2
- ld [%sp+2223+0],%f4 C zero f4
- ld [%i5+%i2], %f3 C read low 32 bits of up[i]
- ld [%i1+%i2], %f5 C read high 32 bits of up[i]
- fxtod v00, v00
- fxtod v16, v16
- fxtod v32, v32
- fxtod v48, v48
-
-C Start real work. (We sneakingly read f3 and f5 above...)
-C The software pipeline is very deep, requiring 4 feed-in stages.
-
- fxtod %f2, u00
- fxtod %f4, u32
- fmuld u00, v00, a00
- fmuld u00, v16, a16
- fmuld u00, v32, p32
- fmuld u32, v00, r32
- fmuld u00, v48, p48
- addcc %i2, 8, %i2
- bnz,pt %icc, .L_two_or_more
- fmuld u32, v16, r48
-
-.L_one:
- fmuld u32, v32, r64 C FIXME not urgent
- faddd p32, r32, a32
- fdtox a00, a00
- faddd p48, r48, a48
- fmuld u32, v48, r80 C FIXME not urgent
- fdtox a16, a16
- fdtox a32, a32
- fdtox a48, a48
- std a00, [%sp+2223+0]
- std a16, [%sp+2223+8]
- std a32, [%sp+2223+16]
- std a48, [%sp+2223+24]
- add %i2, 8, %i2
-
- fdtox r64, a00
- fdtox r80, a16
- ldx [%sp+2223+0], i00
- ldx [%sp+2223+8], i16
- ldx [%sp+2223+16], i32
- ldx [%sp+2223+24], i48
- std a00, [%sp+2223+0]
- std a16, [%sp+2223+8]
- add %i2, 8, %i2
-
- mov i00, %g5 C i00+ now in g5
- ldx [%sp+2223+0], i00
- srlx i16, 48, %l4 C (i16 >> 48)
- mov i16, %g2
- ldx [%sp+2223+8], i16
- srlx i48, 16, %l5 C (i48 >> 16)
- mov i32, %g4 C i32+ now in g4
- sllx i48, 32, %l6 C (i48 << 32)
- srlx %g4, 32, %o3 C (i32 >> 32)
- add %l5, %l4, %o1 C hi64- in %o1
- std a00, [%sp+2223+0]
- sllx %g4, 16, %o2 C (i32 << 16)
- add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
- std a16, [%sp+2223+8]
- sllx %o1, 48, %o3 C (hi64 << 48)
- add %g2, %o2, %o2 C mi64- in %o2
- add %l6, %o2, %o2 C mi64- in %o2
- sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
- add cy, %g5, %o4 C x = prev(i00) + cy
- b .L_out_1
- add %i2, 8, %i2
-
-.L_two_or_more:
- ld [%i5+%i2], %f3 C read low 32 bits of up[i]
- fmuld u32, v32, r64 C FIXME not urgent
- faddd p32, r32, a32
- ld [%i1+%i2], %f5 C read high 32 bits of up[i]
- fdtox a00, a00
- faddd p48, r48, a48
- fmuld u32, v48, r80 C FIXME not urgent
- fdtox a16, a16
- fdtox a32, a32
- fxtod %f2, u00
- fxtod %f4, u32
- fdtox a48, a48
- std a00, [%sp+2223+0]
- fmuld u00, v00, p00
- std a16, [%sp+2223+8]
- fmuld u00, v16, p16
- std a32, [%sp+2223+16]
- fmuld u00, v32, p32
- std a48, [%sp+2223+24]
- faddd p00, r64, a00
- fmuld u32, v00, r32
- faddd p16, r80, a16
- fmuld u00, v48, p48
- addcc %i2, 8, %i2
- bnz,pt %icc, .L_three_or_more
- fmuld u32, v16, r48
-
-.L_two:
- fmuld u32, v32, r64 C FIXME not urgent
- faddd p32, r32, a32
- fdtox a00, a00
- faddd p48, r48, a48
- fmuld u32, v48, r80 C FIXME not urgent
- fdtox a16, a16
- ldx [%sp+2223+0], i00
- fdtox a32, a32
- ldx [%sp+2223+8], i16
- ldx [%sp+2223+16], i32
- ldx [%sp+2223+24], i48
- fdtox a48, a48
- std a00, [%sp+2223+0]
- std a16, [%sp+2223+8]
- std a32, [%sp+2223+16]
- std a48, [%sp+2223+24]
- add %i2, 8, %i2
-
- fdtox r64, a00
- mov i00, %g5 C i00+ now in g5
- fdtox r80, a16
- ldx [%sp+2223+0], i00
- srlx i16, 48, %l4 C (i16 >> 48)
- mov i16, %g2
- ldx [%sp+2223+8], i16
- srlx i48, 16, %l5 C (i48 >> 16)
- mov i32, %g4 C i32+ now in g4
- ldx [%sp+2223+16], i32
- sllx i48, 32, %l6 C (i48 << 32)
- ldx [%sp+2223+24], i48
- srlx %g4, 32, %o3 C (i32 >> 32)
- add %l5, %l4, %o1 C hi64- in %o1
- std a00, [%sp+2223+0]
- sllx %g4, 16, %o2 C (i32 << 16)
- add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
- std a16, [%sp+2223+8]
- sllx %o1, 48, %o3 C (hi64 << 48)
- add %g2, %o2, %o2 C mi64- in %o2
- add %l6, %o2, %o2 C mi64- in %o2
- sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
- add cy, %g5, %o4 C x = prev(i00) + cy
- b .L_out_2
- add %i2, 8, %i2
-
-.L_three_or_more:
- ld [%i5+%i2], %f3 C read low 32 bits of up[i]
- fmuld u32, v32, r64 C FIXME not urgent
- faddd p32, r32, a32
- ld [%i1+%i2], %f5 C read high 32 bits of up[i]
- fdtox a00, a00
- faddd p48, r48, a48
- fmuld u32, v48, r80 C FIXME not urgent
- fdtox a16, a16
- ldx [%sp+2223+0], i00
- fdtox a32, a32
- ldx [%sp+2223+8], i16
- fxtod %f2, u00
- ldx [%sp+2223+16], i32
- fxtod %f4, u32
- ldx [%sp+2223+24], i48
- fdtox a48, a48
- std a00, [%sp+2223+0]
- fmuld u00, v00, p00
- std a16, [%sp+2223+8]
- fmuld u00, v16, p16
- std a32, [%sp+2223+16]
- fmuld u00, v32, p32
- std a48, [%sp+2223+24]
- faddd p00, r64, a00
- fmuld u32, v00, r32
- faddd p16, r80, a16
- fmuld u00, v48, p48
- addcc %i2, 8, %i2
- bnz,pt %icc, .L_four_or_more
- fmuld u32, v16, r48
-
-.L_three:
- fmuld u32, v32, r64 C FIXME not urgent
- faddd p32, r32, a32
- fdtox a00, a00
- faddd p48, r48, a48
- mov i00, %g5 C i00+ now in g5
- fmuld u32, v48, r80 C FIXME not urgent
- fdtox a16, a16
- ldx [%sp+2223+0], i00
- fdtox a32, a32
- srlx i16, 48, %l4 C (i16 >> 48)
- mov i16, %g2
- ldx [%sp+2223+8], i16
- srlx i48, 16, %l5 C (i48 >> 16)
- mov i32, %g4 C i32+ now in g4
- ldx [%sp+2223+16], i32
- sllx i48, 32, %l6 C (i48 << 32)
- ldx [%sp+2223+24], i48
- fdtox a48, a48
- srlx %g4, 32, %o3 C (i32 >> 32)
- add %l5, %l4, %o1 C hi64- in %o1
- std a00, [%sp+2223+0]
- sllx %g4, 16, %o2 C (i32 << 16)
- add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
- std a16, [%sp+2223+8]
- sllx %o1, 48, %o3 C (hi64 << 48)
- add %g2, %o2, %o2 C mi64- in %o2
- std a32, [%sp+2223+16]
- add %l6, %o2, %o2 C mi64- in %o2
- std a48, [%sp+2223+24]
- sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
- add cy, %g5, %o4 C x = prev(i00) + cy
- b .L_out_3
- add %i2, 8, %i2
-
-.L_four_or_more:
- ld [%i5+%i2], %f3 C read low 32 bits of up[i]
- fmuld u32, v32, r64 C FIXME not urgent
- faddd p32, r32, a32
- ld [%i1+%i2], %f5 C read high 32 bits of up[i]
- fdtox a00, a00
- faddd p48, r48, a48
- mov i00, %g5 C i00+ now in g5
- fmuld u32, v48, r80 C FIXME not urgent
- fdtox a16, a16
- ldx [%sp+2223+0], i00
- fdtox a32, a32
- srlx i16, 48, %l4 C (i16 >> 48)
- mov i16, %g2
- ldx [%sp+2223+8], i16
- fxtod %f2, u00
- srlx i48, 16, %l5 C (i48 >> 16)
- mov i32, %g4 C i32+ now in g4
- ldx [%sp+2223+16], i32
- fxtod %f4, u32
- sllx i48, 32, %l6 C (i48 << 32)
- ldx [%sp+2223+24], i48
- fdtox a48, a48
- srlx %g4, 32, %o3 C (i32 >> 32)
- add %l5, %l4, %o1 C hi64- in %o1
- std a00, [%sp+2223+0]
- fmuld u00, v00, p00
- sllx %g4, 16, %o2 C (i32 << 16)
- add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
- std a16, [%sp+2223+8]
- fmuld u00, v16, p16
- sllx %o1, 48, %o3 C (hi64 << 48)
- add %g2, %o2, %o2 C mi64- in %o2
- std a32, [%sp+2223+16]
- fmuld u00, v32, p32
- add %l6, %o2, %o2 C mi64- in %o2
- std a48, [%sp+2223+24]
- faddd p00, r64, a00
- fmuld u32, v00, r32
- sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
- faddd p16, r80, a16
- fmuld u00, v48, p48
- add cy, %g5, %o4 C x = prev(i00) + cy
- addcc %i2, 8, %i2
- bnz,pt %icc, .Loop
- fmuld u32, v16, r48
-
-.L_four:
- b,a .L_out_4
-
-C BEGIN MAIN LOOP
- .align 16
-.Loop:
-C 00
- srlx %o4, 16, %o5 C (x >> 16)
- ld [%i5+%i2], %f3 C read low 32 bits of up[i]
- fmuld u32, v32, r64 C FIXME not urgent
- faddd p32, r32, a32
-C 01
- add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
- and %o4, xffff, %o5 C (x & 0xffff)
- ld [%i1+%i2], %f5 C read high 32 bits of up[i]
- fdtox a00, a00
-C 02
- faddd p48, r48, a48
-C 03
- srlx %o2, 48, %o7 C (mi64 >> 48)
- mov i00, %g5 C i00+ now in g5
- fmuld u32, v48, r80 C FIXME not urgent
- fdtox a16, a16
-C 04
- sllx %o2, 16, %i3 C (mi64 << 16)
- add %o7, %o1, cy C new cy
- ldx [%sp+2223+0], i00
- fdtox a32, a32
-C 05
- srlx i16, 48, %l4 C (i16 >> 48)
- mov i16, %g2
- ldx [%sp+2223+8], i16
- fxtod %f2, u00
-C 06
- srlx i48, 16, %l5 C (i48 >> 16)
- mov i32, %g4 C i32+ now in g4
- ldx [%sp+2223+16], i32
- fxtod %f4, u32
-C 07
- sllx i48, 32, %l6 C (i48 << 32)
- or %i3, %o5, %o5
- ldx [%sp+2223+24], i48
- fdtox a48, a48
-C 08
- srlx %g4, 32, %o3 C (i32 >> 32)
- add %l5, %l4, %o1 C hi64- in %o1
- std a00, [%sp+2223+0]
- fmuld u00, v00, p00
-C 09
- sllx %g4, 16, %o2 C (i32 << 16)
- add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
- std a16, [%sp+2223+8]
- fmuld u00, v16, p16
-C 10
- sllx %o1, 48, %o3 C (hi64 << 48)
- add %g2, %o2, %o2 C mi64- in %o2
- std a32, [%sp+2223+16]
- fmuld u00, v32, p32
-C 11
- add %l6, %o2, %o2 C mi64- in %o2
- std a48, [%sp+2223+24]
- faddd p00, r64, a00
- fmuld u32, v00, r32
-C 12
- sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
- stx %o5, [%i4+%i2]
- faddd p16, r80, a16
- fmuld u00, v48, p48
-C 13
- add cy, %g5, %o4 C x = prev(i00) + cy
- addcc %i2, 8, %i2
- bnz,pt %icc, .Loop
- fmuld u32, v16, r48
-C END MAIN LOOP
-
-.L_out_4:
- srlx %o4, 16, %o5 C (x >> 16)
- fmuld u32, v32, r64 C FIXME not urgent
- faddd p32, r32, a32
- add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
- and %o4, xffff, %o5 C (x & 0xffff)
- fdtox a00, a00
- faddd p48, r48, a48
- srlx %o2, 48, %o7 C (mi64 >> 48)
- mov i00, %g5 C i00+ now in g5
- fmuld u32, v48, r80 C FIXME not urgent
- fdtox a16, a16
- sllx %o2, 16, %i3 C (mi64 << 16)
- add %o7, %o1, cy C new cy
- ldx [%sp+2223+0], i00
- fdtox a32, a32
- srlx i16, 48, %l4 C (i16 >> 48)
- mov i16, %g2
- ldx [%sp+2223+8], i16
- srlx i48, 16, %l5 C (i48 >> 16)
- mov i32, %g4 C i32+ now in g4
- ldx [%sp+2223+16], i32
- sllx i48, 32, %l6 C (i48 << 32)
- or %i3, %o5, %o5
- ldx [%sp+2223+24], i48
- fdtox a48, a48
- srlx %g4, 32, %o3 C (i32 >> 32)
- add %l5, %l4, %o1 C hi64- in %o1
- std a00, [%sp+2223+0]
- sllx %g4, 16, %o2 C (i32 << 16)
- add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
- std a16, [%sp+2223+8]
- sllx %o1, 48, %o3 C (hi64 << 48)
- add %g2, %o2, %o2 C mi64- in %o2
- std a32, [%sp+2223+16]
- add %l6, %o2, %o2 C mi64- in %o2
- std a48, [%sp+2223+24]
- sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
- stx %o5, [%i4+%i2]
- add cy, %g5, %o4 C x = prev(i00) + cy
- add %i2, 8, %i2
-.L_out_3:
- srlx %o4, 16, %o5 C (x >> 16)
- add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
- and %o4, xffff, %o5 C (x & 0xffff)
- fdtox r64, a00
- srlx %o2, 48, %o7 C (mi64 >> 48)
- mov i00, %g5 C i00+ now in g5
- fdtox r80, a16
- sllx %o2, 16, %i3 C (mi64 << 16)
- add %o7, %o1, cy C new cy
- ldx [%sp+2223+0], i00
- srlx i16, 48, %l4 C (i16 >> 48)
- mov i16, %g2
- ldx [%sp+2223+8], i16
- srlx i48, 16, %l5 C (i48 >> 16)
- mov i32, %g4 C i32+ now in g4
- ldx [%sp+2223+16], i32
- sllx i48, 32, %l6 C (i48 << 32)
- or %i3, %o5, %o5
- ldx [%sp+2223+24], i48
- srlx %g4, 32, %o3 C (i32 >> 32)
- add %l5, %l4, %o1 C hi64- in %o1
- std a00, [%sp+2223+0]
- sllx %g4, 16, %o2 C (i32 << 16)
- add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
- std a16, [%sp+2223+8]
- sllx %o1, 48, %o3 C (hi64 << 48)
- add %g2, %o2, %o2 C mi64- in %o2
- add %l6, %o2, %o2 C mi64- in %o2
- sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
- stx %o5, [%i4+%i2]
- add cy, %g5, %o4 C x = prev(i00) + cy
- add %i2, 8, %i2
-.L_out_2:
- srlx %o4, 16, %o5 C (x >> 16)
- add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
- and %o4, xffff, %o5 C (x & 0xffff)
- srlx %o2, 48, %o7 C (mi64 >> 48)
- mov i00, %g5 C i00+ now in g5
- sllx %o2, 16, %i3 C (mi64 << 16)
- add %o7, %o1, cy C new cy
- ldx [%sp+2223+0], i00
- srlx i16, 48, %l4 C (i16 >> 48)
- mov i16, %g2
- ldx [%sp+2223+8], i16
- srlx i48, 16, %l5 C (i48 >> 16)
- mov i32, %g4 C i32+ now in g4
- sllx i48, 32, %l6 C (i48 << 32)
- or %i3, %o5, %o5
- srlx %g4, 32, %o3 C (i32 >> 32)
- add %l5, %l4, %o1 C hi64- in %o1
- sllx %g4, 16, %o2 C (i32 << 16)
- add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
- sllx %o1, 48, %o3 C (hi64 << 48)
- add %g2, %o2, %o2 C mi64- in %o2
- add %l6, %o2, %o2 C mi64- in %o2
- sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
- stx %o5, [%i4+%i2]
- add cy, %g5, %o4 C x = prev(i00) + cy
- add %i2, 8, %i2
-.L_out_1:
- srlx %o4, 16, %o5 C (x >> 16)
- add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
- and %o4, xffff, %o5 C (x & 0xffff)
- srlx %o2, 48, %o7 C (mi64 >> 48)
- sllx %o2, 16, %i3 C (mi64 << 16)
- add %o7, %o1, cy C new cy
- or %i3, %o5, %o5
- stx %o5, [%i4+%i2]
-
- sllx i00, 0, %g2
- add %g2, cy, cy
- sllx i16, 16, %g3
- add %g3, cy, cy
-
- return %i7+8
- mov cy, %o0
-EPILOGUE(mpn_mul_1)
+++ /dev/null
-dnl SPARC v9 mpn_rshift
-
-dnl Copyright 1996, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-
-include(`../config.m4')
-
-C cycles/limb
-C UltraSPARC 1&2: 2
-C UltraSPARC 3: 3.25
-
-C INPUT PARAMETERS
-define(`rp',`%i0')
-define(`up',`%i1')
-define(`n',`%i2')
-define(`cnt',`%i3')
-
-define(`u0',`%l0')
-define(`u1',`%l2')
-define(`u2',`%l4')
-define(`u3',`%l6')
-
-define(`tnc',`%i4')
-
-define(`fanop',`fitod %f0,%f2') dnl A quasi nop running in the FA pipe
-define(`fmnop',`fmuld %f0,%f0,%f4') dnl A quasi nop running in the FM pipe
-
-ASM_START()
- REGISTER(%g2,#scratch)
- REGISTER(%g3,#scratch)
-PROLOGUE(mpn_rshift)
- save %sp,-160,%sp
-
- sub %g0,cnt,tnc C negate shift count
- ldx [up],u3 C load first limb
- subcc n,5,n
- sllx u3,tnc,%i5 C compute function result
- srlx u3,cnt,%g3
- bl,pn %icc,.Lend1234
- fanop
-
- subcc n,4,n
- ldx [up+8],u0
- ldx [up+16],u1
- add up,32,up
- ldx [up-8],u2
- ldx [up+0],u3
- sllx u0,tnc,%g2
-
- bl,pn %icc,.Lend5678
- fanop
-
- b,a .Loop
- .align 16
-.Loop:
- srlx u0,cnt,%g1
- or %g3,%g2,%g3
- ldx [up+8],u0
- fanop
-C --
- sllx u1,tnc,%g2
- subcc n,4,n
- stx %g3,[rp+0]
- fanop
-C --
- srlx u1,cnt,%g3
- or %g1,%g2,%g1
- ldx [up+16],u1
- fanop
-C --
- sllx u2,tnc,%g2
- stx %g1,[rp+8]
- add up,32,up
- fanop
-C --
- srlx u2,cnt,%g1
- or %g3,%g2,%g3
- ldx [up-8],u2
- fanop
-C --
- sllx u3,tnc,%g2
- stx %g3,[rp+16]
- add rp,32,rp
- fanop
-C --
- srlx u3,cnt,%g3
- or %g1,%g2,%g1
- ldx [up+0],u3
- fanop
-C --
- sllx u0,tnc,%g2
- stx %g1,[rp-8]
- bge,pt %icc,.Loop
- fanop
-C --
-.Lend5678:
- srlx u0,cnt,%g1
- or %g3,%g2,%g3
- sllx u1,tnc,%g2
- stx %g3,[rp+0]
- srlx u1,cnt,%g3
- or %g1,%g2,%g1
- sllx u2,tnc,%g2
- stx %g1,[rp+8]
- srlx u2,cnt,%g1
- or %g3,%g2,%g3
- sllx u3,tnc,%g2
- stx %g3,[rp+16]
- add rp,32,rp
- srlx u3,cnt,%g3 C carry...
- or %g1,%g2,%g1
- stx %g1,[rp-8]
-
-.Lend1234:
- addcc n,4,n
- bz,pn %icc,.Lret
- fanop
-.Loop0:
- add rp,8,rp
- subcc n,1,n
- ldx [up+8],u3
- add up,8,up
- sllx u3,tnc,%g2
- or %g3,%g2,%g3
- stx %g3,[rp-8]
- srlx u3,cnt,%g3
- bnz,pt %icc,.Loop0
- fanop
-.Lret:
- stx %g3,[rp+0]
- mov %i5,%i0
- ret
- restore
-EPILOGUE(mpn_rshift)
#endif
+/* Multiply u anv v, where v < 2^32. */
+#define umul_ppmm_s(w1, w0, u, v) \
+ do { \
+ UWtype __x0, __x2; \
+ UWtype __ul, __vl, __uh; \
+ UWtype __u = (u), __v = (v); \
+ \
+ __ul = __ll_lowpart (__u); \
+ __uh = __ll_highpart (__u); \
+ __vl = __ll_lowpart (__v); \
+ \
+ __x0 = (UWtype) __ul * __vl; \
+ __x2 = (UWtype) __uh * __vl; \
+ \
+ (w1) = (__x2 + (__x0 >> W_TYPE_SIZE/2)) >> W_TYPE_SIZE/2; \
+ (w0) = (__x2 << W_TYPE_SIZE/2) + __x0; \
+ } while (0)
+
/* Count the leading zeros on a limb, but assuming it fits in 32 bits.
The count returned will be in the range 32 to 63.
This is the 32-bit generic C count_leading_zeros from longlong.h. */
+++ /dev/null
-dnl SPARC v9 64-bit mpn_sqr_diagonal.
-
-dnl Copyright 2001, 2002 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C UltraSPARC 1&2: 22
-C UltraSPARC 3: 36
-
-C This was generated by the Sun C compiler. It runs at 22 cycles/limb on the
-C UltraSPARC-1/2, three cycles slower than theoretically possible for optimal
-C code using the same algorithm. For 1-3 limbs, a special loop was generated,
-C which causes performance problems in particular for 2 and 3 limbs.
-C Ultimately, this should be replaced by hand-written code in the same software
-C pipeline style as e.g., addmul_1.asm.
-
-ASM_START()
- REGISTER(%g2,#scratch)
- REGISTER(%g3,#scratch)
-PROLOGUE(mpn_sqr_diagonal)
- save %sp, -240, %sp
-
- sethi %hi(0x1ffc00), %o0
- sethi %hi(0x3ffc00), %o1
- add %o0, 1023, %o7
- cmp %i2, 4
- add %o1, 1023, %o4
- or %g0, %i1, %g1
- or %g0, %i0, %o0
- bl,pn %xcc, .Lsmall
- or %g0, 0, %g2
-
- ldx [%i1], %o1
- add %i1, 24, %g1
- or %g0, 3, %g2
- srlx %o1, 42, %g3
- stx %g3, [%sp+2279]
- and %o1, %o7, %o2
- stx %o2, [%sp+2263]
- srlx %o1, 21, %o1
- ldd [%sp+2279], %f0
- and %o1, %o7, %o1
- stx %o1, [%sp+2271]
- ldx [%i1+8], %o2
- fxtod %f0, %f12
- srlx %o2, 21, %o1
- and %o2, %o7, %g3
- ldd [%sp+2263], %f2
- fmuld %f12, %f12, %f10
- srlx %o2, 42, %o2
- ldd [%sp+2271], %f0
- and %o1, %o7, %o1
- fxtod %f2, %f8
- stx %o2, [%sp+2279]
- stx %o1, [%sp+2271]
- fxtod %f0, %f0
- stx %g3, [%sp+2263]
- fdtox %f10, %f14
- fmuld %f12, %f8, %f6
- ldx [%i1+16], %o2
- std %f14, [%sp+2255]
- fmuld %f0, %f0, %f2
- fmuld %f8, %f8, %f10
- srlx %o2, 42, %o1
- faddd %f6, %f6, %f6
- fmuld %f12, %f0, %f12
- fmuld %f0, %f8, %f8
- ldd [%sp+2279], %f0
- ldd [%sp+2263], %f4
- fdtox %f10, %f10
- std %f10, [%sp+2239]
- faddd %f2, %f6, %f6
- ldd [%sp+2271], %f2
- fdtox %f12, %f12
- std %f12, [%sp+2247]
- fdtox %f8, %f8
- std %f8, [%sp+2231]
- fdtox %f6, %f6
- std %f6, [%sp+2223]
-
-.Loop: srlx %o2, 21, %g3
- stx %o1, [%sp+2279]
- add %g2, 1, %g2
- and %g3, %o7, %o1
- ldx [%sp+2255], %g4
- cmp %g2, %i2
- stx %o1, [%sp+2271]
- add %g1, 8, %g1
- add %o0, 16, %o0
- ldx [%sp+2239], %o1
- fxtod %f0, %f10
- fxtod %f4, %f14
- ldx [%sp+2231], %i0
- ldx [%sp+2223], %g5
- ldx [%sp+2247], %g3
- and %o2, %o7, %o2
- fxtod %f2, %f8
- fmuld %f10, %f10, %f0
- stx %o2, [%sp+2263]
- fmuld %f10, %f14, %f6
- ldx [%g1-8], %o2
- fmuld %f10, %f8, %f12
- fdtox %f0, %f2
- ldd [%sp+2279], %f0
- fmuld %f8, %f8, %f4
- faddd %f6, %f6, %f6
- fmuld %f14, %f14, %f10
- std %f2, [%sp+2255]
- sllx %g4, 20, %g4
- ldd [%sp+2271], %f2
- fmuld %f8, %f14, %f8
- sllx %i0, 22, %i1
- fdtox %f12, %f12
- std %f12, [%sp+2247]
- sllx %g5, 42, %i0
- add %o1, %i1, %o1
- faddd %f4, %f6, %f6
- ldd [%sp+2263], %f4
- add %o1, %i0, %o1
- add %g3, %g4, %g3
- fdtox %f10, %f10
- std %f10, [%sp+2239]
- srlx %o1, 42, %g4
- and %g5, %o4, %i0
- fdtox %f8, %f8
- std %f8, [%sp+2231]
- srlx %g5, 22, %g5
- sub %g4, %i0, %g4
- fdtox %f6, %f6
- std %f6, [%sp+2223]
- srlx %g4, 63, %g4
- add %g3, %g5, %g3
- add %g3, %g4, %g3
- stx %o1, [%o0-16]
- srlx %o2, 42, %o1
- bl,pt %xcc, .Loop
- stx %g3, [%o0-8]
-
- stx %o1, [%sp+2279]
- srlx %o2, 21, %o1
- fxtod %f0, %f16
- ldx [%sp+2223], %g3
- fxtod %f4, %f6
- and %o2, %o7, %o3
- stx %o3, [%sp+2263]
- fxtod %f2, %f4
- and %o1, %o7, %o1
- ldx [%sp+2231], %o2
- sllx %g3, 42, %g4
- fmuld %f16, %f16, %f14
- stx %o1, [%sp+2271]
- fmuld %f16, %f6, %f8
- add %o0, 48, %o0
- ldx [%sp+2239], %o1
- sllx %o2, 22, %o2
- fmuld %f4, %f4, %f10
- ldx [%sp+2255], %o3
- fdtox %f14, %f14
- fmuld %f4, %f6, %f2
- std %f14, [%sp+2255]
- faddd %f8, %f8, %f12
- add %o1, %o2, %o2
- fmuld %f16, %f4, %f4
- ldd [%sp+2279], %f0
- sllx %o3, 20, %g5
- add %o2, %g4, %o2
- fmuld %f6, %f6, %f6
- srlx %o2, 42, %o3
- and %g3, %o4, %g4
- srlx %g3, 22, %g3
- faddd %f10, %f12, %f16
- ldd [%sp+2271], %f12
- ldd [%sp+2263], %f8
- fxtod %f0, %f0
- sub %o3, %g4, %o3
- ldx [%sp+2247], %o1
- srlx %o3, 63, %o3
- fdtox %f2, %f10
- fxtod %f8, %f8
- std %f10, [%sp+2231]
- fdtox %f6, %f6
- std %f6, [%sp+2239]
- add %o1, %g5, %o1
- fmuld %f0, %f0, %f2
- fdtox %f16, %f16
- std %f16, [%sp+2223]
- add %o1, %g3, %o1
- fdtox %f4, %f4
- std %f4, [%sp+2247]
- fmuld %f0, %f8, %f10
- fxtod %f12, %f12
- add %o1, %o3, %o1
- stx %o2, [%o0-48]
- fmuld %f8, %f8, %f6
- stx %o1, [%o0-40]
- fdtox %f2, %f2
- ldx [%sp+2231], %o2
- faddd %f10, %f10, %f10
- ldx [%sp+2223], %g3
- fmuld %f12, %f12, %f4
- fdtox %f6, %f6
- ldx [%sp+2239], %o1
- sllx %o2, 22, %o2
- fmuld %f12, %f8, %f8
- sllx %g3, 42, %g5
- ldx [%sp+2255], %o3
- fmuld %f0, %f12, %f0
- add %o1, %o2, %o2
- faddd %f4, %f10, %f4
- ldx [%sp+2247], %o1
- add %o2, %g5, %o2
- and %g3, %o4, %g4
- fdtox %f8, %f8
- sllx %o3, 20, %g5
- std %f8, [%sp+2231]
- fdtox %f0, %f0
- srlx %o2, 42, %o3
- add %o1, %g5, %o1
- fdtox %f4, %f4
- srlx %g3, 22, %g3
- sub %o3, %g4, %o3
- std %f6, [%sp+2239]
- std %f4, [%sp+2223]
- srlx %o3, 63, %o3
- add %o1, %g3, %o1
- std %f2, [%sp+2255]
- add %o1, %o3, %o1
- std %f0, [%sp+2247]
- stx %o2, [%o0-32]
- stx %o1, [%o0-24]
- ldx [%sp+2231], %o2
- ldx [%sp+2223], %o3
- ldx [%sp+2239], %o1
- sllx %o2, 22, %o2
- sllx %o3, 42, %g5
- ldx [%sp+2255], %g4
- and %o3, %o4, %g3
- add %o1, %o2, %o2
- ldx [%sp+2247], %o1
- add %o2, %g5, %o2
- stx %o2, [%o0-16]
- sllx %g4, 20, %g4
- srlx %o2, 42, %o2
- add %o1, %g4, %o1
- srlx %o3, 22, %o3
- sub %o2, %g3, %o2
- srlx %o2, 63, %o2
- add %o1, %o3, %o1
- add %o1, %o2, %o1
- stx %o1, [%o0-8]
- ret
- restore %g0, %g0, %g0
-.Lsmall:
- ldx [%g1], %o2
-.Loop0:
- and %o2, %o7, %o1
- stx %o1, [%sp+2263]
- add %g2, 1, %g2
- srlx %o2, 21, %o1
- add %g1, 8, %g1
- srlx %o2, 42, %o2
- stx %o2, [%sp+2279]
- and %o1, %o7, %o1
- ldd [%sp+2263], %f0
- cmp %g2, %i2
- stx %o1, [%sp+2271]
- fxtod %f0, %f6
- ldd [%sp+2279], %f0
- ldd [%sp+2271], %f4
- fxtod %f0, %f2
- fmuld %f6, %f6, %f0
- fxtod %f4, %f10
- fmuld %f2, %f6, %f4
- fdtox %f0, %f0
- std %f0, [%sp+2239]
- fmuld %f10, %f6, %f8
- fmuld %f10, %f10, %f0
- faddd %f4, %f4, %f6
- fmuld %f2, %f2, %f4
- fdtox %f8, %f8
- std %f8, [%sp+2231]
- fmuld %f2, %f10, %f2
- faddd %f0, %f6, %f0
- fdtox %f4, %f4
- std %f4, [%sp+2255]
- fdtox %f2, %f2
- std %f2, [%sp+2247]
- fdtox %f0, %f0
- std %f0, [%sp+2223]
- ldx [%sp+2239], %o1
- ldx [%sp+2255], %g4
- ldx [%sp+2231], %o2
- sllx %g4, 20, %g4
- ldx [%sp+2223], %o3
- sllx %o2, 22, %o2
- sllx %o3, 42, %g5
- add %o1, %o2, %o2
- ldx [%sp+2247], %o1
- add %o2, %g5, %o2
- stx %o2, [%o0]
- and %o3, %o4, %g3
- srlx %o2, 42, %o2
- add %o1, %g4, %o1
- srlx %o3, 22, %o3
- sub %o2, %g3, %o2
- srlx %o2, 63, %o2
- add %o1, %o3, %o1
- add %o1, %o2, %o1
- stx %o1, [%o0+8]
- add %o0, 16, %o0
- bl,a,pt %xcc, .Loop0
- ldx [%g1], %o2
- ret
- restore %g0, %g0, %g0
-EPILOGUE(mpn_sqr_diagonal)
+++ /dev/null
-dnl SPARC v9 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
-dnl store difference in a third limb vector.
-
-dnl Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C UltraSPARC 1&2: 4
-C UltraSPARC 3: 4.5
-
-C Compute carry-out from the most significant bits of u,v, and r, where
-C r=u-v-carry_in, using logic operations.
-
-C This code runs at 4 cycles/limb on UltraSPARC 1 and 2. It has a 4 insn
-C recurrency, and the UltraSPARC 1 and 2 the IE units are 100% saturated.
-C Therefore, it seems futile to try to optimize this any further...
-
-C INPUT PARAMETERS
-define(`rp',`%i0')
-define(`up',`%i1')
-define(`vp',`%i2')
-define(`n',`%i3')
-
-define(`u0',`%l0')
-define(`u1',`%l2')
-define(`u2',`%l4')
-define(`u3',`%l6')
-define(`v0',`%l1')
-define(`v1',`%l3')
-define(`v2',`%l5')
-define(`v3',`%l7')
-
-define(`cy',`%i4')
-
-define(`fanop',`fitod %f0,%f2') dnl A quasi nop running in the FA pipe
-define(`fmnop',`fmuld %f0,%f0,%f4') dnl A quasi nop running in the FM pipe
-
-ASM_START()
- REGISTER(%g2,#scratch)
- REGISTER(%g3,#scratch)
-PROLOGUE(mpn_sub_n)
- save %sp,-160,%sp
-
- fitod %f0,%f0 C make sure f0 contains small, quiet number
- subcc n,4,%g0
- bl,pn %icc,.Loop0
- mov 0,cy
-
- ldx [up+0],u0
- ldx [vp+0],v0
- add up,32,up
- ldx [up-24],u1
- ldx [vp+8],v1
- add vp,32,vp
- ldx [up-16],u2
- ldx [vp-16],v2
- ldx [up-8],u3
- ldx [vp-8],v3
- subcc n,8,n
- sub u0,v0,%g1 C main sub
- sub %g1,cy,%g4 C carry sub
- orn u0,v0,%g2
- bl,pn %icc,.Lend4567
- fanop
- b,a .Loop
-
- .align 16
-C START MAIN LOOP
-.Loop: orn %g4,%g2,%g2
- andn u0,v0,%g3
- ldx [up+0],u0
- fanop
-C --
- andn %g2,%g3,%g2
- ldx [vp+0],v0
- add up,32,up
- fanop
-C --
- srlx %g2,63,cy
- sub u1,v1,%g1
- stx %g4,[rp+0]
- fanop
-C --
- sub %g1,cy,%g4
- orn u1,v1,%g2
- fmnop
- fanop
-C --
- orn %g4,%g2,%g2
- andn u1,v1,%g3
- ldx [up-24],u1
- fanop
-C --
- andn %g2,%g3,%g2
- ldx [vp+8],v1
- add vp,32,vp
- fanop
-C --
- srlx %g2,63,cy
- sub u2,v2,%g1
- stx %g4,[rp+8]
- fanop
-C --
- sub %g1,cy,%g4
- orn u2,v2,%g2
- fmnop
- fanop
-C --
- orn %g4,%g2,%g2
- andn u2,v2,%g3
- ldx [up-16],u2
- fanop
-C --
- andn %g2,%g3,%g2
- ldx [vp-16],v2
- add rp,32,rp
- fanop
-C --
- srlx %g2,63,cy
- sub u3,v3,%g1
- stx %g4,[rp-16]
- fanop
-C --
- sub %g1,cy,%g4
- orn u3,v3,%g2
- fmnop
- fanop
-C --
- orn %g4,%g2,%g2
- andn u3,v3,%g3
- ldx [up-8],u3
- fanop
-C --
- andn %g2,%g3,%g2
- subcc n,4,n
- ldx [vp-8],v3
- fanop
-C --
- srlx %g2,63,cy
- sub u0,v0,%g1
- stx %g4,[rp-8]
- fanop
-C --
- sub %g1,cy,%g4
- orn u0,v0,%g2
- bge,pt %icc,.Loop
- fanop
-C END MAIN LOOP
-.Lend4567:
- orn %g4,%g2,%g2
- andn u0,v0,%g3
- andn %g2,%g3,%g2
- srlx %g2,63,cy
- sub u1,v1,%g1
- stx %g4,[rp+0]
- sub %g1,cy,%g4
- orn u1,v1,%g2
- orn %g4,%g2,%g2
- andn u1,v1,%g3
- andn %g2,%g3,%g2
- srlx %g2,63,cy
- sub u2,v2,%g1
- stx %g4,[rp+8]
- sub %g1,cy,%g4
- orn u2,v2,%g2
- orn %g4,%g2,%g2
- andn u2,v2,%g3
- andn %g2,%g3,%g2
- add rp,32,rp
- srlx %g2,63,cy
- sub u3,v3,%g1
- stx %g4,[rp-16]
- sub %g1,cy,%g4
- orn u3,v3,%g2
- orn %g4,%g2,%g2
- andn u3,v3,%g3
- andn %g2,%g3,%g2
- srlx %g2,63,cy
- stx %g4,[rp-8]
-
- addcc n,4,n
- bz,pn %icc,.Lret
- fanop
-
-.Loop0: ldx [up],u0
- add up,8,up
- ldx [vp],v0
- add vp,8,vp
- add rp,8,rp
- subcc n,1,n
- sub u0,v0,%g1
- orn u0,v0,%g2
- sub %g1,cy,%g4
- andn u0,v0,%g3
- orn %g4,%g2,%g2
- stx %g4,[rp-8]
- andn %g2,%g3,%g2
- bnz,pt %icc,.Loop0
- srlx %g2,63,cy
-
-.Lret: mov cy,%i0
- ret
- restore
-EPILOGUE(mpn_sub_n)
+++ /dev/null
-dnl SPARC v9 64-bit mpn_submul_1 -- Multiply a limb vector with a limb and
-dnl subtract the result from a second limb vector.
-
-dnl Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C UltraSPARC 1&2: 18
-C UltraSPARC 3: 23
-
-C INPUT PARAMETERS
-C rp i0
-C up i1
-C n i2
-C v i3
-
-ASM_START()
- REGISTER(%g2,#scratch)
-
-PROLOGUE(mpn_submul_1)
- save %sp,-176,%sp
-
- sllx %i2, 3, %g2
- or %g0, %i1, %o1
- add %g2, 15, %o0
- or %g0, %i2, %o2
- and %o0, -16, %o0
- sub %sp, %o0, %sp
- add %sp, 2223, %o0
- or %g0, %o0, %l0
- call mpn_mul_1
- or %g0, %i3, %o3
- or %g0, %o0, %l1 C preserve carry value from mpn_mul_1
- or %g0, %i0, %o0
- or %g0, %i0, %o1
- or %g0, %l0, %o2
- call mpn_sub_n
- or %g0, %i2, %o3
- ret
- restore %l1, %o0, %o0 C sum carry values
-EPILOGUE(mpn_submul_1)
--- /dev/null
+dnl SPARC v9 mpn_add_n -- Add two limb vectors of the same length > 0 and
+dnl store sum in a third limb vector.
+
+dnl Copyright 2001, 2002, 2003, 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC 1&2: 4
+C UltraSPARC 3: 4.5
+
+C Compute carry-out from the most significant bits of u,v, and r, where
+C r=u+v+carry_in, using logic operations.
+
+C This code runs at 4 cycles/limb on UltraSPARC 1 and 2. It has a 4 insn
+C recurrency, and the UltraSPARC 1 and 2 the IE units are 100% saturated.
+C Therefore, it seems futile to try to optimize this any further...
+
+C INPUT PARAMETERS
+define(`rp',`%i0')
+define(`up',`%i1')
+define(`vp',`%i2')
+define(`n',`%i3')
+
+define(`u0',`%l0')
+define(`u1',`%l2')
+define(`u2',`%l4')
+define(`u3',`%l6')
+define(`v0',`%l1')
+define(`v1',`%l3')
+define(`v2',`%l5')
+define(`v3',`%l7')
+
+define(`cy',`%i4')
+
+define(`fanop',`fitod %f0,%f2') dnl A quasi nop running in the FA pipe
+define(`fmnop',`fmuld %f0,%f0,%f4') dnl A quasi nop running in the FM pipe
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_add_nc)
+ save %sp,-160,%sp
+
+ fitod %f0,%f0 C make sure f0 contains small, quiet number
+ subcc n,4,%g0
+ bl,pn %xcc,.Loop0
+ nop
+ b,a L(com)
+EPILOGUE()
+
+PROLOGUE(mpn_add_n)
+ save %sp,-160,%sp
+
+ fitod %f0,%f0 C make sure f0 contains small, quiet number
+ subcc n,4,%g0
+ bl,pn %xcc,.Loop0
+ mov 0,cy
+L(com):
+ ldx [up+0],u0
+ ldx [vp+0],v0
+ add up,32,up
+ ldx [up-24],u1
+ ldx [vp+8],v1
+ add vp,32,vp
+ ldx [up-16],u2
+ ldx [vp-16],v2
+ ldx [up-8],u3
+ ldx [vp-8],v3
+ subcc n,8,n
+ add u0,v0,%g1 C main add
+ add %g1,cy,%g4 C carry add
+ or u0,v0,%g2
+ bl,pn %xcc,.Lend4567
+ fanop
+ b,a .Loop
+
+ .align 16
+C START MAIN LOOP
+.Loop: andn %g2,%g4,%g2
+ and u0,v0,%g3
+ ldx [up+0],u0
+ fanop
+C --
+ or %g3,%g2,%g2
+ ldx [vp+0],v0
+ add up,32,up
+ fanop
+C --
+ srlx %g2,63,cy
+ add u1,v1,%g1
+ stx %g4,[rp+0]
+ fanop
+C --
+ add %g1,cy,%g4
+ or u1,v1,%g2
+ fmnop
+ fanop
+C --
+ andn %g2,%g4,%g2
+ and u1,v1,%g3
+ ldx [up-24],u1
+ fanop
+C --
+ or %g3,%g2,%g2
+ ldx [vp+8],v1
+ add vp,32,vp
+ fanop
+C --
+ srlx %g2,63,cy
+ add u2,v2,%g1
+ stx %g4,[rp+8]
+ fanop
+C --
+ add %g1,cy,%g4
+ or u2,v2,%g2
+ fmnop
+ fanop
+C --
+ andn %g2,%g4,%g2
+ and u2,v2,%g3
+ ldx [up-16],u2
+ fanop
+C --
+ or %g3,%g2,%g2
+ ldx [vp-16],v2
+ add rp,32,rp
+ fanop
+C --
+ srlx %g2,63,cy
+ add u3,v3,%g1
+ stx %g4,[rp-16]
+ fanop
+C --
+ add %g1,cy,%g4
+ or u3,v3,%g2
+ fmnop
+ fanop
+C --
+ andn %g2,%g4,%g2
+ and u3,v3,%g3
+ ldx [up-8],u3
+ fanop
+C --
+ or %g3,%g2,%g2
+ subcc n,4,n
+ ldx [vp-8],v3
+ fanop
+C --
+ srlx %g2,63,cy
+ add u0,v0,%g1
+ stx %g4,[rp-8]
+ fanop
+C --
+ add %g1,cy,%g4
+ or u0,v0,%g2
+ bge,pt %xcc,.Loop
+ fanop
+C END MAIN LOOP
+.Lend4567:
+ andn %g2,%g4,%g2
+ and u0,v0,%g3
+ or %g3,%g2,%g2
+ srlx %g2,63,cy
+ add u1,v1,%g1
+ stx %g4,[rp+0]
+ add %g1,cy,%g4
+ or u1,v1,%g2
+ andn %g2,%g4,%g2
+ and u1,v1,%g3
+ or %g3,%g2,%g2
+ srlx %g2,63,cy
+ add u2,v2,%g1
+ stx %g4,[rp+8]
+ add %g1,cy,%g4
+ or u2,v2,%g2
+ andn %g2,%g4,%g2
+ and u2,v2,%g3
+ or %g3,%g2,%g2
+ add rp,32,rp
+ srlx %g2,63,cy
+ add u3,v3,%g1
+ stx %g4,[rp-16]
+ add %g1,cy,%g4
+ or u3,v3,%g2
+ andn %g2,%g4,%g2
+ and u3,v3,%g3
+ or %g3,%g2,%g2
+ srlx %g2,63,cy
+ stx %g4,[rp-8]
+
+ addcc n,4,n
+ bz,pn %xcc,.Lret
+ fanop
+
+.Loop0: ldx [up],u0
+ add up,8,up
+ ldx [vp],v0
+ add vp,8,vp
+ add rp,8,rp
+ subcc n,1,n
+ add u0,v0,%g1
+ or u0,v0,%g2
+ add %g1,cy,%g4
+ and u0,v0,%g3
+ andn %g2,%g4,%g2
+ stx %g4,[rp-8]
+ or %g3,%g2,%g2
+ bnz,pt %xcc,.Loop0
+ srlx %g2,63,cy
+
+.Lret: mov cy,%i0
+ ret
+ restore
+EPILOGUE()
--- /dev/null
+dnl SPARC v9 64-bit mpn_addmul_1 -- Multiply a limb vector with a limb and add
+dnl the result to a second limb vector.
+
+dnl Copyright 1998, 2000, 2001, 2002, 2003, 2004 Free Software Foundation,
+dnl Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC 1&2: 14
+C UltraSPARC 3: 17.5
+
+C Algorithm: We use eight floating-point multiplies per limb product, with the
+C invariant v operand split into four 16-bit pieces, and the up operand split
+C into 32-bit pieces. We sum pairs of 48-bit partial products using
+C floating-point add, then convert the four 49-bit product-sums and transfer
+C them to the integer unit.
+
+C Possible optimizations:
+C 0. Rewrite to use algorithm of mpn_addmul_2.
+C 1. Align the stack area where we transfer the four 49-bit product-sums
+C to a 32-byte boundary. That would minimize the cache collision.
+C (UltraSPARC-1/2 use a direct-mapped cache.) (Perhaps even better would
+C be to align the area to map to the area immediately before up?)
+C 2. Sum the 4 49-bit quantities using 32-bit operations, as in the
+C develop mpn_addmul_2. This would save many integer instructions.
+C 3. Unrolling. Questionable if it is worth the code expansion, given that
+C it could only save 1 cycle/limb.
+C 4. Specialize for particular v values. If its upper 32 bits are zero, we
+C could save many operations, in the FPU (fmuld), but more so in the IEU
+C since we'll be summing 48-bit quantities, which might be simpler.
+C 5. Ideally, we should schedule the f2/f3 and f4/f5 RAW further apart, and
+C the i00,i16,i32,i48 RAW less apart. The latter apart-scheduling should
+C not be greater than needed for L2 cache latency, and also not so great
+C that i16 needs to be copied.
+C 6. Avoid performing mem+fa+fm in the same cycle, at least not when we want
+C to get high IEU bandwidth. (12 of the 14 cycles will be free for 2 IEU
+C ops.)
+
+C Instruction classification (as per UltraSPARC-1/2 functional units):
+C 8 FM
+C 10 FA
+C 12 MEM
+C 10 ISHIFT + 14 IADDLOG
+C 1 BRANCH
+C 55 insns totally (plus one mov insn that should be optimized out)
+
+C The loop executes 56 instructions in 14 cycles on UltraSPARC-1/2, i.e we
+C sustain the peak execution rate of 4 instructions/cycle.
+
+C INPUT PARAMETERS
+C rp i0
+C up i1
+C n i2
+C v i3
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+
+define(`p00', `%f8') define(`p16',`%f10') define(`p32',`%f12') define(`p48',`%f14')
+define(`r32',`%f16') define(`r48',`%f18') define(`r64',`%f20') define(`r80',`%f22')
+define(`v00',`%f24') define(`v16',`%f26') define(`v32',`%f28') define(`v48',`%f30')
+define(`u00',`%f32') define(`u32', `%f34')
+define(`a00',`%f36') define(`a16',`%f38') define(`a32',`%f40') define(`a48',`%f42')
+define(`cy',`%g1')
+define(`rlimb',`%g3')
+define(`i00',`%l0') define(`i16',`%l1') define(`i32',`%l2') define(`i48',`%l3')
+define(`xffffffff',`%l7')
+define(`xffff',`%o0')
+
+PROLOGUE(mpn_addmul_1)
+
+C Initialization. (1) Split v operand into four 16-bit chunks and store them
+C as IEEE double in fp registers. (2) Clear upper 32 bits of fp register pairs
+C f2 and f4. (3) Store masks in registers aliased to `xffff' and `xffffffff'.
+
+ save %sp, -256, %sp
+ mov -1, %g4
+ srlx %g4, 48, xffff C store mask in register `xffff'
+ and %i3, xffff, %g2
+ stx %g2, [%sp+2223+0]
+ srlx %i3, 16, %g3
+ and %g3, xffff, %g3
+ stx %g3, [%sp+2223+8]
+ srlx %i3, 32, %g2
+ and %g2, xffff, %g2
+ stx %g2, [%sp+2223+16]
+ srlx %i3, 48, %g3
+ stx %g3, [%sp+2223+24]
+ srlx %g4, 32, xffffffff C store mask in register `xffffffff'
+
+ sllx %i2, 3, %i2
+ mov 0, cy C clear cy
+ add %i0, %i2, %i0
+ add %i1, %i2, %i1
+ neg %i2
+ add %i1, 4, %i5
+ add %i0, -32, %i4
+ add %i0, -16, %i0
+
+ ldd [%sp+2223+0], v00
+ ldd [%sp+2223+8], v16
+ ldd [%sp+2223+16], v32
+ ldd [%sp+2223+24], v48
+ ld [%sp+2223+0],%f2 C zero f2
+ ld [%sp+2223+0],%f4 C zero f4
+ ld [%i5+%i2], %f3 C read low 32 bits of up[i]
+ ld [%i1+%i2], %f5 C read high 32 bits of up[i]
+ fxtod v00, v00
+ fxtod v16, v16
+ fxtod v32, v32
+ fxtod v48, v48
+
+C Start real work. (We sneakingly read f3 and f5 above...)
+C The software pipeline is very deep, requiring 4 feed-in stages.
+
+ fxtod %f2, u00
+ fxtod %f4, u32
+ fmuld u00, v00, a00
+ fmuld u00, v16, a16
+ fmuld u00, v32, p32
+ fmuld u32, v00, r32
+ fmuld u00, v48, p48
+ addcc %i2, 8, %i2
+ bnz,pt %xcc, .L_two_or_more
+ fmuld u32, v16, r48
+
+.L_one:
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ fdtox a00, a00
+ faddd p48, r48, a48
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ fdtox a32, a32
+ fdtox a48, a48
+ std a00, [%sp+2223+0]
+ std a16, [%sp+2223+8]
+ std a32, [%sp+2223+16]
+ std a48, [%sp+2223+24]
+ add %i2, 8, %i2
+
+ fdtox r64, a00
+ ldx [%i0+%i2], rlimb C read rp[i]
+ fdtox r80, a16
+ ldx [%sp+2223+0], i00
+ ldx [%sp+2223+8], i16
+ ldx [%sp+2223+16], i32
+ ldx [%sp+2223+24], i48
+ std a00, [%sp+2223+0]
+ std a16, [%sp+2223+8]
+ add %i2, 8, %i2
+
+ srlx rlimb, 32, %g4 C HI(rlimb)
+ and rlimb, xffffffff, %g5 C LO(rlimb)
+ add i00, %g5, %g5 C i00+ now in g5
+ ldx [%sp+2223+0], i00
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ srlx i48, 16, %l5 C (i48 >> 16)
+ add i32, %g4, %g4 C i32+ now in g4
+ sllx i48, 32, %l6 C (i48 << 32)
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ add %l6, %o2, %o2 C mi64- in %o2
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ b .L_out_1
+ add %i2, 8, %i2
+
+.L_two_or_more:
+ ld [%i5+%i2], %f3 C read low 32 bits of up[i]
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ ld [%i1+%i2], %f5 C read high 32 bits of up[i]
+ fdtox a00, a00
+ faddd p48, r48, a48
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ fdtox a32, a32
+ fxtod %f2, u00
+ fxtod %f4, u32
+ fdtox a48, a48
+ std a00, [%sp+2223+0]
+ fmuld u00, v00, p00
+ std a16, [%sp+2223+8]
+ fmuld u00, v16, p16
+ std a32, [%sp+2223+16]
+ fmuld u00, v32, p32
+ std a48, [%sp+2223+24]
+ faddd p00, r64, a00
+ fmuld u32, v00, r32
+ faddd p16, r80, a16
+ fmuld u00, v48, p48
+ addcc %i2, 8, %i2
+ bnz,pt %xcc, .L_three_or_more
+ fmuld u32, v16, r48
+
+.L_two:
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ fdtox a00, a00
+ ldx [%i0+%i2], rlimb C read rp[i]
+ faddd p48, r48, a48
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ ldx [%sp+2223+0], i00
+ fdtox a32, a32
+ ldx [%sp+2223+8], i16
+ ldx [%sp+2223+16], i32
+ ldx [%sp+2223+24], i48
+ fdtox a48, a48
+ std a00, [%sp+2223+0]
+ std a16, [%sp+2223+8]
+ std a32, [%sp+2223+16]
+ std a48, [%sp+2223+24]
+ add %i2, 8, %i2
+
+ fdtox r64, a00
+ srlx rlimb, 32, %g4 C HI(rlimb)
+ and rlimb, xffffffff, %g5 C LO(rlimb)
+ ldx [%i0+%i2], rlimb C read rp[i]
+ add i00, %g5, %g5 C i00+ now in g5
+ fdtox r80, a16
+ ldx [%sp+2223+0], i00
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ srlx i48, 16, %l5 C (i48 >> 16)
+ add i32, %g4, %g4 C i32+ now in g4
+ ldx [%sp+2223+16], i32
+ sllx i48, 32, %l6 C (i48 << 32)
+ ldx [%sp+2223+24], i48
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ add %l6, %o2, %o2 C mi64- in %o2
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ b .L_out_2
+ add %i2, 8, %i2
+
+.L_three_or_more:
+ ld [%i5+%i2], %f3 C read low 32 bits of up[i]
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ ld [%i1+%i2], %f5 C read high 32 bits of up[i]
+ fdtox a00, a00
+ ldx [%i0+%i2], rlimb C read rp[i]
+ faddd p48, r48, a48
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ ldx [%sp+2223+0], i00
+ fdtox a32, a32
+ ldx [%sp+2223+8], i16
+ fxtod %f2, u00
+ ldx [%sp+2223+16], i32
+ fxtod %f4, u32
+ ldx [%sp+2223+24], i48
+ fdtox a48, a48
+ std a00, [%sp+2223+0]
+ fmuld u00, v00, p00
+ std a16, [%sp+2223+8]
+ fmuld u00, v16, p16
+ std a32, [%sp+2223+16]
+ fmuld u00, v32, p32
+ std a48, [%sp+2223+24]
+ faddd p00, r64, a00
+ fmuld u32, v00, r32
+ faddd p16, r80, a16
+ fmuld u00, v48, p48
+ addcc %i2, 8, %i2
+ bnz,pt %xcc, .L_four_or_more
+ fmuld u32, v16, r48
+
+.L_three:
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ fdtox a00, a00
+ srlx rlimb, 32, %g4 C HI(rlimb)
+ and rlimb, xffffffff, %g5 C LO(rlimb)
+ ldx [%i0+%i2], rlimb C read rp[i]
+ faddd p48, r48, a48
+ add i00, %g5, %g5 C i00+ now in g5
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ ldx [%sp+2223+0], i00
+ fdtox a32, a32
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ srlx i48, 16, %l5 C (i48 >> 16)
+ add i32, %g4, %g4 C i32+ now in g4
+ ldx [%sp+2223+16], i32
+ sllx i48, 32, %l6 C (i48 << 32)
+ ldx [%sp+2223+24], i48
+ fdtox a48, a48
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ std a32, [%sp+2223+16]
+ add %l6, %o2, %o2 C mi64- in %o2
+ std a48, [%sp+2223+24]
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ b .L_out_3
+ add %i2, 8, %i2
+
+.L_four_or_more:
+ ld [%i5+%i2], %f3 C read low 32 bits of up[i]
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ ld [%i1+%i2], %f5 C read high 32 bits of up[i]
+ fdtox a00, a00
+ srlx rlimb, 32, %g4 C HI(rlimb)
+ and rlimb, xffffffff, %g5 C LO(rlimb)
+ ldx [%i0+%i2], rlimb C read rp[i]
+ faddd p48, r48, a48
+ add i00, %g5, %g5 C i00+ now in g5
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ ldx [%sp+2223+0], i00
+ fdtox a32, a32
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ fxtod %f2, u00
+ srlx i48, 16, %l5 C (i48 >> 16)
+ add i32, %g4, %g4 C i32+ now in g4
+ ldx [%sp+2223+16], i32
+ fxtod %f4, u32
+ sllx i48, 32, %l6 C (i48 << 32)
+ ldx [%sp+2223+24], i48
+ fdtox a48, a48
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ fmuld u00, v00, p00
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ fmuld u00, v16, p16
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ std a32, [%sp+2223+16]
+ fmuld u00, v32, p32
+ add %l6, %o2, %o2 C mi64- in %o2
+ std a48, [%sp+2223+24]
+ faddd p00, r64, a00
+ fmuld u32, v00, r32
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ faddd p16, r80, a16
+ fmuld u00, v48, p48
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ addcc %i2, 8, %i2
+ bnz,pt %xcc, .Loop
+ fmuld u32, v16, r48
+
+.L_four:
+ b,a .L_out_4
+
+C BEGIN MAIN LOOP
+ .align 16
+.Loop:
+C 00
+ srlx %o4, 16, %o5 C (x >> 16)
+ ld [%i5+%i2], %f3 C read low 32 bits of up[i]
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+C 01
+ add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
+ and %o4, xffff, %o5 C (x & 0xffff)
+ ld [%i1+%i2], %f5 C read high 32 bits of up[i]
+ fdtox a00, a00
+C 02
+ srlx rlimb, 32, %g4 C HI(rlimb)
+ and rlimb, xffffffff, %g5 C LO(rlimb)
+ ldx [%i0+%i2], rlimb C read rp[i]
+ faddd p48, r48, a48
+C 03
+ srlx %o2, 48, %o7 C (mi64 >> 48)
+ add i00, %g5, %g5 C i00+ now in g5
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+C 04
+ sllx %o2, 16, %i3 C (mi64 << 16)
+ add %o7, %o1, cy C new cy
+ ldx [%sp+2223+0], i00
+ fdtox a32, a32
+C 05
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ fxtod %f2, u00
+C 06
+ srlx i48, 16, %l5 C (i48 >> 16)
+ add i32, %g4, %g4 C i32+ now in g4
+ ldx [%sp+2223+16], i32
+ fxtod %f4, u32
+C 07
+ sllx i48, 32, %l6 C (i48 << 32)
+ or %i3, %o5, %o5
+ ldx [%sp+2223+24], i48
+ fdtox a48, a48
+C 08
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ fmuld u00, v00, p00
+C 09
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ fmuld u00, v16, p16
+C 10
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ std a32, [%sp+2223+16]
+ fmuld u00, v32, p32
+C 11
+ add %l6, %o2, %o2 C mi64- in %o2
+ std a48, [%sp+2223+24]
+ faddd p00, r64, a00
+ fmuld u32, v00, r32
+C 12
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ stx %o5, [%i4+%i2]
+ faddd p16, r80, a16
+ fmuld u00, v48, p48
+C 13
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ addcc %i2, 8, %i2
+ bnz,pt %xcc, .Loop
+ fmuld u32, v16, r48
+C END MAIN LOOP
+
+.L_out_4:
+ srlx %o4, 16, %o5 C (x >> 16)
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
+ and %o4, xffff, %o5 C (x & 0xffff)
+ fdtox a00, a00
+ srlx rlimb, 32, %g4 C HI(rlimb)
+ and rlimb, xffffffff, %g5 C LO(rlimb)
+ ldx [%i0+%i2], rlimb C read rp[i]
+ faddd p48, r48, a48
+ srlx %o2, 48, %o7 C (mi64 >> 48)
+ add i00, %g5, %g5 C i00+ now in g5
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ sllx %o2, 16, %i3 C (mi64 << 16)
+ add %o7, %o1, cy C new cy
+ ldx [%sp+2223+0], i00
+ fdtox a32, a32
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ srlx i48, 16, %l5 C (i48 >> 16)
+ add i32, %g4, %g4 C i32+ now in g4
+ ldx [%sp+2223+16], i32
+ sllx i48, 32, %l6 C (i48 << 32)
+ or %i3, %o5, %o5
+ ldx [%sp+2223+24], i48
+ fdtox a48, a48
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ std a32, [%sp+2223+16]
+ add %l6, %o2, %o2 C mi64- in %o2
+ std a48, [%sp+2223+24]
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ stx %o5, [%i4+%i2]
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ add %i2, 8, %i2
+.L_out_3:
+ srlx %o4, 16, %o5 C (x >> 16)
+ add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
+ and %o4, xffff, %o5 C (x & 0xffff)
+ fdtox r64, a00
+ srlx rlimb, 32, %g4 C HI(rlimb)
+ and rlimb, xffffffff, %g5 C LO(rlimb)
+ ldx [%i0+%i2], rlimb C read rp[i]
+ srlx %o2, 48, %o7 C (mi64 >> 48)
+ add i00, %g5, %g5 C i00+ now in g5
+ fdtox r80, a16
+ sllx %o2, 16, %i3 C (mi64 << 16)
+ add %o7, %o1, cy C new cy
+ ldx [%sp+2223+0], i00
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ srlx i48, 16, %l5 C (i48 >> 16)
+ add i32, %g4, %g4 C i32+ now in g4
+ ldx [%sp+2223+16], i32
+ sllx i48, 32, %l6 C (i48 << 32)
+ or %i3, %o5, %o5
+ ldx [%sp+2223+24], i48
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ add %l6, %o2, %o2 C mi64- in %o2
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ stx %o5, [%i4+%i2]
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ add %i2, 8, %i2
+.L_out_2:
+ srlx %o4, 16, %o5 C (x >> 16)
+ add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
+ and %o4, xffff, %o5 C (x & 0xffff)
+ srlx rlimb, 32, %g4 C HI(rlimb)
+ and rlimb, xffffffff, %g5 C LO(rlimb)
+ srlx %o2, 48, %o7 C (mi64 >> 48)
+ add i00, %g5, %g5 C i00+ now in g5
+ sllx %o2, 16, %i3 C (mi64 << 16)
+ add %o7, %o1, cy C new cy
+ ldx [%sp+2223+0], i00
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ srlx i48, 16, %l5 C (i48 >> 16)
+ add i32, %g4, %g4 C i32+ now in g4
+ sllx i48, 32, %l6 C (i48 << 32)
+ or %i3, %o5, %o5
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ add %l6, %o2, %o2 C mi64- in %o2
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ stx %o5, [%i4+%i2]
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ add %i2, 8, %i2
+.L_out_1:
+ srlx %o4, 16, %o5 C (x >> 16)
+ add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
+ and %o4, xffff, %o5 C (x & 0xffff)
+ srlx %o2, 48, %o7 C (mi64 >> 48)
+ sllx %o2, 16, %i3 C (mi64 << 16)
+ add %o7, %o1, cy C new cy
+ or %i3, %o5, %o5
+ stx %o5, [%i4+%i2]
+
+ sllx i00, 0, %g2
+ add %g2, cy, cy
+ sllx i16, 16, %g3
+ add %g3, cy, cy
+
+ return %i7+8
+ mov cy, %o0
+EPILOGUE(mpn_addmul_1)
--- /dev/null
+dnl SPARC v9 64-bit mpn_addmul_2 -- Multiply an n limb number with 2-limb
+dnl number and add the result to a n limb vector.
+
+dnl Copyright 2002, 2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC 1&2: 9
+C UltraSPARC 3: 10
+
+C Algorithm: We use 16 floating-point multiplies per limb product, with the
+C 2-limb v operand split into eight 16-bit pieces, and the n-limb u operand
+C split into 32-bit pieces. We sum four 48-bit partial products using
+C floating-point add, then convert the resulting four 50-bit quantities and
+C transfer them to the integer unit.
+
+C Possible optimizations:
+C 1. Align the stack area where we transfer the four 50-bit product-sums
+C to a 32-byte boundary. That would minimize the cache collision.
+C (UltraSPARC-1/2 use a direct-mapped cache.) (Perhaps even better would
+C be to align the area to map to the area immediately before up?)
+C 2. Perform two of the fp->int conversions with integer instructions. We
+C can get almost ten free IEU slots, if we clean up bookkeeping and the
+C silly carry-limb code.
+C 3. For an mpn_addmul_1 based on this, we need to fix the silly carry-limb
+C code.
+
+C OSP (Overlapping software pipeline) version of mpn_mul_basecase:
+C Operand swap will require 8 LDDA and 8 FXTOD, which will mean 8 cycles.
+C FI = 20
+C L = 9 x un * vn
+C WDFI = 10 x vn / 2
+C WD = 4
+
+C Instruction classification (as per UltraSPARC functional units).
+C Assuming silly carry code is fixed. Includes bookkeeping.
+C
+C mpn_addmul_X mpn_mul_X
+C 1 2 1 2
+C ========== ==========
+C FM 8 16 8 16
+C FA 10 18 10 18
+C MEM 12 12 10 10
+C ISHIFT 6 6 6 6
+C IADDLOG 11 11 10 10
+C BRANCH 1 1 1 1
+C
+C TOTAL IEU 17 17 16 16
+C TOTAL 48 64 45 61
+C
+C IEU cycles 8.5 8.5 8 8
+C MEM cycles 12 12 10 10
+C ISSUE cycles 12 16 11.25 15.25
+C FPU cycles 10 18 10 18
+C cycles/loop 12 18 12 18
+C cycles/limb 12 9 12 9
+
+
+C INPUT PARAMETERS
+C rp[n + 1] i0
+C up[n] i1
+C n i2
+C vp[2] i3
+
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+
+C Combine registers:
+C u00_hi= u32_hi
+C u00_lo= u32_lo
+C a000 = out000
+C a016 = out016
+C Free: f52 f54
+
+
+define(`p000', `%f8') define(`p016',`%f10')
+define(`p032',`%f12') define(`p048',`%f14')
+define(`p064',`%f16') define(`p080',`%f18')
+define(`p096a',`%f20') define(`p112a',`%f22')
+define(`p096b',`%f56') define(`p112b',`%f58')
+
+define(`out000',`%f0') define(`out016',`%f6')
+
+define(`v000',`%f24') define(`v016',`%f26')
+define(`v032',`%f28') define(`v048',`%f30')
+define(`v064',`%f44') define(`v080',`%f46')
+define(`v096',`%f48') define(`v112',`%f50')
+
+define(`u00',`%f32') define(`u32', `%f34')
+
+define(`a000',`%f36') define(`a016',`%f38')
+define(`a032',`%f40') define(`a048',`%f42')
+define(`a064',`%f60') define(`a080',`%f62')
+
+define(`u00_hi',`%f2') define(`u32_hi',`%f4')
+define(`u00_lo',`%f3') define(`u32_lo',`%f5')
+
+define(`cy',`%g1')
+define(`rlimb',`%g3')
+define(`i00',`%l0') define(`i16',`%l1')
+define(`r00',`%l2') define(`r32',`%l3')
+define(`xffffffff',`%l7')
+define(`xffff',`%o0')
+
+
+PROLOGUE(mpn_addmul_2)
+
+C Initialization. (1) Split v operand into eight 16-bit chunks and store them
+C as IEEE double in fp registers. (2) Clear upper 32 bits of fp register pairs
+C f2 and f4. (3) Store masks in registers aliased to `xffff' and `xffffffff'.
+C This code could be better scheduled.
+
+ save %sp, -256, %sp
+
+ifdef(`HAVE_VIS',
+` mov -1, %g4
+ wr %g0, 0xD2, %asi
+ srlx %g4, 32, xffffffff C store mask in register `xffffffff'
+ ldda [%i3+6] %asi, v000
+ ldda [%i3+4] %asi, v016
+ ldda [%i3+2] %asi, v032
+ ldda [%i3+0] %asi, v048
+ fxtod v000, v000
+ ldda [%i3+14] %asi, v064
+ fxtod v016, v016
+ ldda [%i3+12] %asi, v080
+ fxtod v032, v032
+ ldda [%i3+10] %asi, v096
+ fxtod v048, v048
+ ldda [%i3+8] %asi, v112
+ fxtod v064, v064
+ fxtod v080, v080
+ fxtod v096, v096
+ fxtod v112, v112
+ fzero u00_hi
+ fzero u32_hi
+',
+` mov -1, %g4
+ ldx [%i3+0], %l0 C vp[0]
+ srlx %g4, 48, xffff C store mask in register `xffff'
+ ldx [%i3+8], %l1 C vp[1]
+
+ and %l0, xffff, %g2
+ stx %g2, [%sp+2223+0]
+ srlx %l0, 16, %g3
+ and %g3, xffff, %g3
+ stx %g3, [%sp+2223+8]
+ srlx %l0, 32, %g2
+ and %g2, xffff, %g2
+ stx %g2, [%sp+2223+16]
+ srlx %l0, 48, %g3
+ stx %g3, [%sp+2223+24]
+ and %l1, xffff, %g2
+ stx %g2, [%sp+2223+32]
+ srlx %l1, 16, %g3
+ and %g3, xffff, %g3
+ stx %g3, [%sp+2223+40]
+ srlx %l1, 32, %g2
+ and %g2, xffff, %g2
+ stx %g2, [%sp+2223+48]
+ srlx %l1, 48, %g3
+ stx %g3, [%sp+2223+56]
+
+ srlx %g4, 32, xffffffff C store mask in register `xffffffff'
+
+ ldd [%sp+2223+0], v000
+ ldd [%sp+2223+8], v016
+ ldd [%sp+2223+16], v032
+ ldd [%sp+2223+24], v048
+ fxtod v000, v000
+ ldd [%sp+2223+32], v064
+ fxtod v016, v016
+ ldd [%sp+2223+40], v080
+ fxtod v032, v032
+ ldd [%sp+2223+48], v096
+ fxtod v048, v048
+ ldd [%sp+2223+56], v112
+ fxtod v064, v064
+ ld [%sp+2223+0], u00_hi C zero u00_hi
+ fxtod v080, v080
+ ld [%sp+2223+0], u32_hi C zero u32_hi
+ fxtod v096, v096
+ fxtod v112, v112
+')
+C Initialization done.
+ mov 0, %g2
+ mov 0, rlimb
+ mov 0, %g4
+ add %i0, -8, %i0 C BOOKKEEPING
+
+C Start software pipeline.
+
+ ld [%i1+4], u00_lo C read low 32 bits of up[i]
+ fxtod u00_hi, u00
+C mid
+ ld [%i1+0], u32_lo C read high 32 bits of up[i]
+ fmuld u00, v000, a000
+ fmuld u00, v016, a016
+ fmuld u00, v032, a032
+ fmuld u00, v048, a048
+ add %i2, -1, %i2 C BOOKKEEPING
+ fmuld u00, v064, p064
+ add %i1, 8, %i1 C BOOKKEEPING
+ fxtod u32_hi, u32
+ fmuld u00, v080, p080
+ fmuld u00, v096, p096a
+ brnz,pt %i2, .L_2_or_more
+ fmuld u00, v112, p112a
+
+.L1: fdtox a000, out000
+ fmuld u32, v000, p000
+ fdtox a016, out016
+ fmuld u32, v016, p016
+ fmovd p064, a064
+ fmuld u32, v032, p032
+ fmovd p080, a080
+ fmuld u32, v048, p048
+ std out000, [%sp+2223+16]
+ faddd p000, a032, a000
+ fmuld u32, v064, p064
+ std out016, [%sp+2223+24]
+ fxtod u00_hi, u00
+ faddd p016, a048, a016
+ fmuld u32, v080, p080
+ faddd p032, a064, a032
+ fmuld u32, v096, p096b
+ faddd p048, a080, a048
+ fmuld u32, v112, p112b
+C mid
+ fdtox a000, out000
+ fdtox a016, out016
+ faddd p064, p096a, a064
+ faddd p080, p112a, a080
+ std out000, [%sp+2223+0]
+ b .L_wd2
+ std out016, [%sp+2223+8]
+
+.L_2_or_more:
+ ld [%i1+4], u00_lo C read low 32 bits of up[i]
+ fdtox a000, out000
+ fmuld u32, v000, p000
+ fdtox a016, out016
+ fmuld u32, v016, p016
+ fmovd p064, a064
+ fmuld u32, v032, p032
+ fmovd p080, a080
+ fmuld u32, v048, p048
+ std out000, [%sp+2223+16]
+ faddd p000, a032, a000
+ fmuld u32, v064, p064
+ std out016, [%sp+2223+24]
+ fxtod u00_hi, u00
+ faddd p016, a048, a016
+ fmuld u32, v080, p080
+ faddd p032, a064, a032
+ fmuld u32, v096, p096b
+ faddd p048, a080, a048
+ fmuld u32, v112, p112b
+C mid
+ ld [%i1+0], u32_lo C read high 32 bits of up[i]
+ fdtox a000, out000
+ fmuld u00, v000, p000
+ fdtox a016, out016
+ fmuld u00, v016, p016
+ faddd p064, p096a, a064
+ fmuld u00, v032, p032
+ faddd p080, p112a, a080
+ fmuld u00, v048, p048
+ add %i2, -1, %i2 C BOOKKEEPING
+ std out000, [%sp+2223+0]
+ faddd p000, a032, a000
+ fmuld u00, v064, p064
+ add %i1, 8, %i1 C BOOKKEEPING
+ std out016, [%sp+2223+8]
+ fxtod u32_hi, u32
+ faddd p016, a048, a016
+ fmuld u00, v080, p080
+ faddd p032, a064, a032
+ fmuld u00, v096, p096a
+ faddd p048, a080, a048
+ brnz,pt %i2, .L_3_or_more
+ fmuld u00, v112, p112a
+
+ b .Lend
+ nop
+
+C 64 32 0
+C . . .
+C . |__rXXX_| 32
+C . |___cy___| 34
+C . |_______i00__| 50
+C |_______i16__| . 50
+
+
+C BEGIN MAIN LOOP
+ .align 16
+.L_3_or_more:
+.Loop: ld [%i1+4], u00_lo C read low 32 bits of up[i]
+ and %g2, xffffffff, %g2
+ fdtox a000, out000
+ fmuld u32, v000, p000
+C
+ lduw [%i0+4+8], r00 C read low 32 bits of rp[i]
+ add %g2, rlimb, %l5
+ fdtox a016, out016
+ fmuld u32, v016, p016
+C
+ srlx %l5, 32, cy
+ ldx [%sp+2223+16], i00
+ faddd p064, p096b, a064
+ fmuld u32, v032, p032
+C
+ add %g4, cy, cy C new cy
+ ldx [%sp+2223+24], i16
+ faddd p080, p112b, a080
+ fmuld u32, v048, p048
+C
+ nop
+ std out000, [%sp+2223+16]
+ faddd p000, a032, a000
+ fmuld u32, v064, p064
+C
+ add i00, r00, rlimb
+ add %i0, 8, %i0 C BOOKKEEPING
+ std out016, [%sp+2223+24]
+ fxtod u00_hi, u00
+C
+ sllx i16, 16, %g2
+ add cy, rlimb, rlimb
+ faddd p016, a048, a016
+ fmuld u32, v080, p080
+C
+ srlx i16, 16, %g4
+ add %g2, rlimb, %l5
+ faddd p032, a064, a032
+ fmuld u32, v096, p096b
+C
+ stw %l5, [%i0+4]
+ nop
+ faddd p048, a080, a048
+ fmuld u32, v112, p112b
+C midloop
+ ld [%i1+0], u32_lo C read high 32 bits of up[i]
+ and %g2, xffffffff, %g2
+ fdtox a000, out000
+ fmuld u00, v000, p000
+C
+ lduw [%i0+0], r32 C read high 32 bits of rp[i]
+ add %g2, rlimb, %l5
+ fdtox a016, out016
+ fmuld u00, v016, p016
+C
+ srlx %l5, 32, cy
+ ldx [%sp+2223+0], i00
+ faddd p064, p096a, a064
+ fmuld u00, v032, p032
+C
+ add %g4, cy, cy C new cy
+ ldx [%sp+2223+8], i16
+ faddd p080, p112a, a080
+ fmuld u00, v048, p048
+C
+ add %i2, -1, %i2 C BOOKKEEPING
+ std out000, [%sp+2223+0]
+ faddd p000, a032, a000
+ fmuld u00, v064, p064
+C
+ add i00, r32, rlimb
+ add %i1, 8, %i1 C BOOKKEEPING
+ std out016, [%sp+2223+8]
+ fxtod u32_hi, u32
+C
+ sllx i16, 16, %g2
+ add cy, rlimb, rlimb
+ faddd p016, a048, a016
+ fmuld u00, v080, p080
+C
+ srlx i16, 16, %g4
+ add %g2, rlimb, %l5
+ faddd p032, a064, a032
+ fmuld u00, v096, p096a
+C
+ stw %l5, [%i0+0]
+ faddd p048, a080, a048
+ brnz,pt %i2, .Loop
+ fmuld u00, v112, p112a
+C END MAIN LOOP
+
+C WIND-DOWN PHASE 1
+.Lend: and %g2, xffffffff, %g2
+ fdtox a000, out000
+ fmuld u32, v000, p000
+ lduw [%i0+4+8], r00 C read low 32 bits of rp[i]
+ add %g2, rlimb, %l5
+ fdtox a016, out016
+ fmuld u32, v016, p016
+ srlx %l5, 32, cy
+ ldx [%sp+2223+16], i00
+ faddd p064, p096b, a064
+ fmuld u32, v032, p032
+ add %g4, cy, cy C new cy
+ ldx [%sp+2223+24], i16
+ faddd p080, p112b, a080
+ fmuld u32, v048, p048
+ std out000, [%sp+2223+16]
+ faddd p000, a032, a000
+ fmuld u32, v064, p064
+ add i00, r00, rlimb
+ add %i0, 8, %i0 C BOOKKEEPING
+ std out016, [%sp+2223+24]
+ sllx i16, 16, %g2
+ add cy, rlimb, rlimb
+ faddd p016, a048, a016
+ fmuld u32, v080, p080
+ srlx i16, 16, %g4
+ add %g2, rlimb, %l5
+ faddd p032, a064, a032
+ fmuld u32, v096, p096b
+ stw %l5, [%i0+4]
+ faddd p048, a080, a048
+ fmuld u32, v112, p112b
+C mid
+ and %g2, xffffffff, %g2
+ fdtox a000, out000
+ lduw [%i0+0], r32 C read high 32 bits of rp[i]
+ add %g2, rlimb, %l5
+ fdtox a016, out016
+ srlx %l5, 32, cy
+ ldx [%sp+2223+0], i00
+ faddd p064, p096a, a064
+ add %g4, cy, cy C new cy
+ ldx [%sp+2223+8], i16
+ faddd p080, p112a, a080
+ std out000, [%sp+2223+0]
+ add i00, r32, rlimb
+ std out016, [%sp+2223+8]
+ sllx i16, 16, %g2
+ add cy, rlimb, rlimb
+ srlx i16, 16, %g4
+ add %g2, rlimb, %l5
+ stw %l5, [%i0+0]
+
+C WIND-DOWN PHASE 2
+.L_wd2: and %g2, xffffffff, %g2
+ fdtox a032, out000
+ lduw [%i0+4+8], r00 C read low 32 bits of rp[i]
+ add %g2, rlimb, %l5
+ fdtox a048, out016
+ srlx %l5, 32, cy
+ ldx [%sp+2223+16], i00
+ add %g4, cy, cy C new cy
+ ldx [%sp+2223+24], i16
+ std out000, [%sp+2223+16]
+ add i00, r00, rlimb
+ add %i0, 8, %i0 C BOOKKEEPING
+ std out016, [%sp+2223+24]
+ sllx i16, 16, %g2
+ add cy, rlimb, rlimb
+ srlx i16, 16, %g4
+ add %g2, rlimb, %l5
+ stw %l5, [%i0+4]
+C mid
+ and %g2, xffffffff, %g2
+ fdtox a064, out000
+ lduw [%i0+0], r32 C read high 32 bits of rp[i]
+ add %g2, rlimb, %l5
+ fdtox a080, out016
+ srlx %l5, 32, cy
+ ldx [%sp+2223+0], i00
+ add %g4, cy, cy C new cy
+ ldx [%sp+2223+8], i16
+ std out000, [%sp+2223+0]
+ add i00, r32, rlimb
+ std out016, [%sp+2223+8]
+ sllx i16, 16, %g2
+ add cy, rlimb, rlimb
+ srlx i16, 16, %g4
+ add %g2, rlimb, %l5
+ stw %l5, [%i0+0]
+
+C WIND-DOWN PHASE 3
+.L_wd3: and %g2, xffffffff, %g2
+ fdtox p096b, out000
+ add %g2, rlimb, %l5
+ fdtox p112b, out016
+ srlx %l5, 32, cy
+ ldx [%sp+2223+16], rlimb
+ add %g4, cy, cy C new cy
+ ldx [%sp+2223+24], i16
+ std out000, [%sp+2223+16]
+ add %i0, 8, %i0 C BOOKKEEPING
+ std out016, [%sp+2223+24]
+ sllx i16, 16, %g2
+ add cy, rlimb, rlimb
+ srlx i16, 16, %g4
+ add %g2, rlimb, %l5
+ stw %l5, [%i0+4]
+C mid
+ and %g2, xffffffff, %g2
+ add %g2, rlimb, %l5
+ srlx %l5, 32, cy
+ ldx [%sp+2223+0], rlimb
+ add %g4, cy, cy C new cy
+ ldx [%sp+2223+8], i16
+ sllx i16, 16, %g2
+ add cy, rlimb, rlimb
+ srlx i16, 16, %g4
+ add %g2, rlimb, %l5
+ stw %l5, [%i0+0]
+
+ and %g2, xffffffff, %g2
+ add %g2, rlimb, %l5
+ srlx %l5, 32, cy
+ ldx [%sp+2223+16], i00
+ add %g4, cy, cy C new cy
+ ldx [%sp+2223+24], i16
+
+ sllx i16, 16, %g2
+ add i00, cy, cy
+ return %i7+8
+ add %g2, cy, %o0
+EPILOGUE(mpn_addmul_2)
--- /dev/null
+dnl SPARC v9 mpn_lshift
+
+dnl Copyright 1996, 2000, 2001, 2002, 2003, 2010 Free Software Foundation,
+dnl Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC 1&2: 2
+C UltraSPARC 3: 2.5
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n', `%i2')
+define(`cnt',`%i3')
+
+define(`u0', `%l0')
+define(`u1', `%l2')
+define(`u2', `%l4')
+define(`u3', `%l6')
+
+define(`tnc',`%i4')
+
+define(`fanop',`fitod %f0,%f2') dnl A quasi nop running in the FA pipe
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_lshift)
+ save %sp,-160,%sp
+
+ sllx n,3,%g1
+ sub %g0,cnt,tnc C negate shift count
+ add up,%g1,up C make %o1 point at end of src
+ add rp,%g1,rp C make %o0 point at end of res
+ ldx [up-8],u3 C load first limb
+ subcc n,5,n
+ srlx u3,tnc,%i5 C compute function result
+ bl,pn %xcc,.Lend1234
+ sllx u3,cnt,%g3
+
+ subcc n,4,n
+ ldx [up-16],u0
+ ldx [up-24],u1
+ add up,-32,up
+ ldx [up-0],u2
+ ldx [up-8],u3
+
+ bl,pn %xcc,.Lend5678
+ srlx u0,tnc,%g2
+
+ b,a .Loop
+ ALIGN(16)
+.Loop:
+ sllx u0,cnt,%g1
+ or %g3,%g2,%g3
+ ldx [up-16],u0
+ fanop
+C --
+ srlx u1,tnc,%g2
+ subcc n,4,n
+ stx %g3,[rp-8]
+ fanop
+C --
+ sllx u1,cnt,%g3
+ or %g1,%g2,%g1
+ ldx [up-24],u1
+ fanop
+C --
+ srlx u2,tnc,%g2
+ stx %g1,[rp-16]
+ add up,-32,up
+ fanop
+C --
+ sllx u2,cnt,%g1
+ or %g3,%g2,%g3
+ ldx [up-0],u2
+ fanop
+C --
+ srlx u3,tnc,%g2
+ stx %g3,[rp-24]
+ add rp,-32,rp
+ fanop
+C --
+ sllx u3,cnt,%g3
+ or %g1,%g2,%g1
+ ldx [up-8],u3
+ fanop
+C --
+ srlx u0,tnc,%g2
+ stx %g1,[rp-0]
+ bge,pt %xcc,.Loop
+ fanop
+C --
+.Lend5678:
+ sllx u0,cnt,%g1
+ or %g3,%g2,%g3
+ srlx u1,tnc,%g2
+ stx %g3,[rp-8]
+ sllx u1,cnt,%g3
+ or %g1,%g2,%g1
+ srlx u2,tnc,%g2
+ stx %g1,[rp-16]
+ sllx u2,cnt,%g1
+ or %g3,%g2,%g3
+ srlx u3,tnc,%g2
+ stx %g3,[rp-24]
+ add rp,-32,rp
+ sllx u3,cnt,%g3 C carry...
+ or %g1,%g2,%g1
+ stx %g1,[rp-0]
+
+.Lend1234:
+ addcc n,4,n
+ bz,pn %xcc,.Lret
+ fanop
+.Loop0:
+ add rp,-8,rp
+ subcc n,1,n
+ ldx [up-16],u3
+ add up,-8,up
+ srlx u3,tnc,%g2
+ or %g3,%g2,%g3
+ stx %g3,[rp]
+ sllx u3,cnt,%g3
+ bnz,pt %xcc,.Loop0
+ fanop
+.Lret:
+ stx %g3,[rp-8]
+ mov %i5,%i0
+ ret
+ restore
+EPILOGUE(mpn_lshift)
--- /dev/null
+dnl SPARC v9 mpn_lshiftc
+
+dnl Copyright 1996, 2000, 2001, 2002, 2003, 2010 Free Software Foundation,
+dnl Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC 1&2: ?
+C UltraSPARC 3: 2.67
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n', `%i2')
+define(`cnt',`%i3')
+
+define(`u0', `%l0')
+define(`u1', `%l2')
+define(`u2', `%l4')
+define(`u3', `%l6')
+
+define(`tnc',`%i4')
+
+define(`fanop',`fitod %f0,%f2') dnl A quasi nop running in the FA pipe
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_lshiftc)
+ save %sp,-160,%sp
+
+ sllx n,3,%g1
+ sub %g0,cnt,tnc C negate shift count
+ add up,%g1,up C make %o1 point at end of src
+ add rp,%g1,rp C make %o0 point at end of res
+ ldx [up-8],u3 C load first limb
+ subcc n,5,n
+ srlx u3,tnc,%i5 C compute function result
+ bl,pn %xcc,.Lend1234
+ sllx u3,cnt,%g3
+
+ subcc n,4,n
+ ldx [up-16],u0
+ ldx [up-24],u1
+ add up,-32,up
+ ldx [up-0],u2
+ ldx [up-8],u3
+ srlx u0,tnc,%g2
+ bl,pn %xcc,.Lend5678
+ not %g3, %g3
+
+ b,a .Loop
+ ALIGN(16)
+.Loop:
+ sllx u0,cnt,%g1
+ andn %g3,%g2,%g3
+ ldx [up-16],u0
+ fanop
+C --
+ srlx u1,tnc,%g2
+ subcc n,4,n
+ stx %g3,[rp-8]
+ not %g1, %g1
+C --
+ sllx u1,cnt,%g3
+ andn %g1,%g2,%g1
+ ldx [up-24],u1
+ fanop
+C --
+ srlx u2,tnc,%g2
+ stx %g1,[rp-16]
+ add up,-32,up
+ not %g3, %g3
+C --
+ sllx u2,cnt,%g1
+ andn %g3,%g2,%g3
+ ldx [up-0],u2
+ fanop
+C --
+ srlx u3,tnc,%g2
+ stx %g3,[rp-24]
+ add rp,-32,rp
+ not %g1, %g1
+C --
+ sllx u3,cnt,%g3
+ andn %g1,%g2,%g1
+ ldx [up-8],u3
+ fanop
+C --
+ srlx u0,tnc,%g2
+ stx %g1,[rp-0]
+ bge,pt %xcc,.Loop
+ not %g3, %g3
+C --
+.Lend5678:
+ sllx u0,cnt,%g1
+ andn %g3,%g2,%g3
+ srlx u1,tnc,%g2
+ stx %g3,[rp-8]
+ not %g1, %g1
+ sllx u1,cnt,%g3
+ andn %g1,%g2,%g1
+ srlx u2,tnc,%g2
+ stx %g1,[rp-16]
+ not %g3, %g3
+ sllx u2,cnt,%g1
+ andn %g3,%g2,%g3
+ srlx u3,tnc,%g2
+ stx %g3,[rp-24]
+ add rp,-32,rp
+ not %g1, %g1
+ sllx u3,cnt,%g3 C carry...
+ andn %g1,%g2,%g1
+ stx %g1,[rp-0]
+
+.Lend1234:
+ addcc n,4,n
+ bz,pn %xcc,.Lret
+ fanop
+.Loop0:
+ add rp,-8,rp
+ subcc n,1,n
+ ldx [up-16],u3
+ add up,-8,up
+ srlx u3,tnc,%g2
+ not %g3, %g3
+ andn %g3,%g2,%g3
+ stx %g3,[rp]
+ sllx u3,cnt,%g3
+ bnz,pt %xcc,.Loop0
+ fanop
+.Lret:
+ not %g3, %g3
+ stx %g3,[rp-8]
+ mov %i5,%i0
+ ret
+ restore
+EPILOGUE()
--- /dev/null
+dnl SPARC v9 64-bit mpn_mul_1 -- Multiply a limb vector with a limb and store
+dnl the result in a second limb vector.
+
+dnl Copyright 1998, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC 1&2: 14
+C UltraSPARC 3: 18.5
+
+C Algorithm: We use eight floating-point multiplies per limb product, with the
+C invariant v operand split into four 16-bit pieces, and the s1 operand split
+C into 32-bit pieces. We sum pairs of 48-bit partial products using
+C floating-point add, then convert the four 49-bit product-sums and transfer
+C them to the integer unit.
+
+C Possible optimizations:
+C 1. Align the stack area where we transfer the four 49-bit product-sums
+C to a 32-byte boundary. That would minimize the cache collision.
+C (UltraSPARC-1/2 use a direct-mapped cache.) (Perhaps even better would
+C be to align the area to map to the area immediately before s1?)
+C 2. Sum the 4 49-bit quantities using 32-bit operations, as in the
+C develop mpn_addmul_2. This would save many integer instructions.
+C 3. Unrolling. Questionable if it is worth the code expansion, given that
+C it could only save 1 cycle/limb.
+C 4. Specialize for particular v values. If its upper 32 bits are zero, we
+C could save many operations, in the FPU (fmuld), but more so in the IEU
+C since we'll be summing 48-bit quantities, which might be simpler.
+C 5. Ideally, we should schedule the f2/f3 and f4/f5 RAW further apart, and
+C the i00,i16,i32,i48 RAW less apart. The latter apart-scheduling should
+C not be greater than needed for L2 cache latency, and also not so great
+C that i16 needs to be copied.
+C 6. Avoid performing mem+fa+fm in the same cycle, at least not when we want
+C to get high IEU bandwidth. (12 of the 14 cycles will be free for 2 IEU
+C ops.)
+
+C Instruction classification (as per UltraSPARC-1/2 functional units):
+C 8 FM
+C 10 FA
+C 11 MEM
+C 9 ISHIFT + 10? IADDLOG
+C 1 BRANCH
+C 49 insns totally (plus three mov insns that should be optimized out)
+
+C The loop executes 53 instructions in 14 cycles on UltraSPARC-1/2, i.e we
+C sustain 3.79 instructions/cycle.
+
+C INPUT PARAMETERS
+C rp i0
+C up i1
+C n i2
+C v i3
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+
+define(`p00', `%f8') define(`p16',`%f10') define(`p32',`%f12') define(`p48',`%f14')
+define(`r32',`%f16') define(`r48',`%f18') define(`r64',`%f20') define(`r80',`%f22')
+define(`v00',`%f24') define(`v16',`%f26') define(`v32',`%f28') define(`v48',`%f30')
+define(`u00',`%f32') define(`u32', `%f34')
+define(`a00',`%f36') define(`a16',`%f38') define(`a32',`%f40') define(`a48',`%f42')
+define(`cy',`%g1')
+define(`rlimb',`%g3')
+define(`i00',`%l0') define(`i16',`%l1') define(`i32',`%l2') define(`i48',`%l3')
+define(`xffffffff',`%l7')
+define(`xffff',`%o0')
+
+PROLOGUE(mpn_mul_1)
+
+C Initialization. (1) Split v operand into four 16-bit chunks and store them
+C as IEEE double in fp registers. (2) Clear upper 32 bits of fp register pairs
+C f2 and f4. (3) Store masks in registers aliased to `xffff' and `xffffffff'.
+
+ save %sp, -256, %sp
+ mov -1, %g4
+ srlx %g4, 48, xffff C store mask in register `xffff'
+ and %i3, xffff, %g2
+ stx %g2, [%sp+2223+0]
+ srlx %i3, 16, %g3
+ and %g3, xffff, %g3
+ stx %g3, [%sp+2223+8]
+ srlx %i3, 32, %g2
+ and %g2, xffff, %g2
+ stx %g2, [%sp+2223+16]
+ srlx %i3, 48, %g3
+ stx %g3, [%sp+2223+24]
+ srlx %g4, 32, xffffffff C store mask in register `xffffffff'
+
+ sllx %i2, 3, %i2
+ mov 0, cy C clear cy
+ add %i0, %i2, %i0
+ add %i1, %i2, %i1
+ neg %i2
+ add %i1, 4, %i5
+ add %i0, -32, %i4
+ add %i0, -16, %i0
+
+ ldd [%sp+2223+0], v00
+ ldd [%sp+2223+8], v16
+ ldd [%sp+2223+16], v32
+ ldd [%sp+2223+24], v48
+ ld [%sp+2223+0],%f2 C zero f2
+ ld [%sp+2223+0],%f4 C zero f4
+ ld [%i5+%i2], %f3 C read low 32 bits of up[i]
+ ld [%i1+%i2], %f5 C read high 32 bits of up[i]
+ fxtod v00, v00
+ fxtod v16, v16
+ fxtod v32, v32
+ fxtod v48, v48
+
+C Start real work. (We sneakingly read f3 and f5 above...)
+C The software pipeline is very deep, requiring 4 feed-in stages.
+
+ fxtod %f2, u00
+ fxtod %f4, u32
+ fmuld u00, v00, a00
+ fmuld u00, v16, a16
+ fmuld u00, v32, p32
+ fmuld u32, v00, r32
+ fmuld u00, v48, p48
+ addcc %i2, 8, %i2
+ bnz,pt %xcc, .L_two_or_more
+ fmuld u32, v16, r48
+
+.L_one:
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ fdtox a00, a00
+ faddd p48, r48, a48
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ fdtox a32, a32
+ fdtox a48, a48
+ std a00, [%sp+2223+0]
+ std a16, [%sp+2223+8]
+ std a32, [%sp+2223+16]
+ std a48, [%sp+2223+24]
+ add %i2, 8, %i2
+
+ fdtox r64, a00
+ fdtox r80, a16
+ ldx [%sp+2223+0], i00
+ ldx [%sp+2223+8], i16
+ ldx [%sp+2223+16], i32
+ ldx [%sp+2223+24], i48
+ std a00, [%sp+2223+0]
+ std a16, [%sp+2223+8]
+ add %i2, 8, %i2
+
+ mov i00, %g5 C i00+ now in g5
+ ldx [%sp+2223+0], i00
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ srlx i48, 16, %l5 C (i48 >> 16)
+ mov i32, %g4 C i32+ now in g4
+ sllx i48, 32, %l6 C (i48 << 32)
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ add %l6, %o2, %o2 C mi64- in %o2
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ b .L_out_1
+ add %i2, 8, %i2
+
+.L_two_or_more:
+ ld [%i5+%i2], %f3 C read low 32 bits of up[i]
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ ld [%i1+%i2], %f5 C read high 32 bits of up[i]
+ fdtox a00, a00
+ faddd p48, r48, a48
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ fdtox a32, a32
+ fxtod %f2, u00
+ fxtod %f4, u32
+ fdtox a48, a48
+ std a00, [%sp+2223+0]
+ fmuld u00, v00, p00
+ std a16, [%sp+2223+8]
+ fmuld u00, v16, p16
+ std a32, [%sp+2223+16]
+ fmuld u00, v32, p32
+ std a48, [%sp+2223+24]
+ faddd p00, r64, a00
+ fmuld u32, v00, r32
+ faddd p16, r80, a16
+ fmuld u00, v48, p48
+ addcc %i2, 8, %i2
+ bnz,pt %xcc, .L_three_or_more
+ fmuld u32, v16, r48
+
+.L_two:
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ fdtox a00, a00
+ faddd p48, r48, a48
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ ldx [%sp+2223+0], i00
+ fdtox a32, a32
+ ldx [%sp+2223+8], i16
+ ldx [%sp+2223+16], i32
+ ldx [%sp+2223+24], i48
+ fdtox a48, a48
+ std a00, [%sp+2223+0]
+ std a16, [%sp+2223+8]
+ std a32, [%sp+2223+16]
+ std a48, [%sp+2223+24]
+ add %i2, 8, %i2
+
+ fdtox r64, a00
+ mov i00, %g5 C i00+ now in g5
+ fdtox r80, a16
+ ldx [%sp+2223+0], i00
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ srlx i48, 16, %l5 C (i48 >> 16)
+ mov i32, %g4 C i32+ now in g4
+ ldx [%sp+2223+16], i32
+ sllx i48, 32, %l6 C (i48 << 32)
+ ldx [%sp+2223+24], i48
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ add %l6, %o2, %o2 C mi64- in %o2
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ b .L_out_2
+ add %i2, 8, %i2
+
+.L_three_or_more:
+ ld [%i5+%i2], %f3 C read low 32 bits of up[i]
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ ld [%i1+%i2], %f5 C read high 32 bits of up[i]
+ fdtox a00, a00
+ faddd p48, r48, a48
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ ldx [%sp+2223+0], i00
+ fdtox a32, a32
+ ldx [%sp+2223+8], i16
+ fxtod %f2, u00
+ ldx [%sp+2223+16], i32
+ fxtod %f4, u32
+ ldx [%sp+2223+24], i48
+ fdtox a48, a48
+ std a00, [%sp+2223+0]
+ fmuld u00, v00, p00
+ std a16, [%sp+2223+8]
+ fmuld u00, v16, p16
+ std a32, [%sp+2223+16]
+ fmuld u00, v32, p32
+ std a48, [%sp+2223+24]
+ faddd p00, r64, a00
+ fmuld u32, v00, r32
+ faddd p16, r80, a16
+ fmuld u00, v48, p48
+ addcc %i2, 8, %i2
+ bnz,pt %xcc, .L_four_or_more
+ fmuld u32, v16, r48
+
+.L_three:
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ fdtox a00, a00
+ faddd p48, r48, a48
+ mov i00, %g5 C i00+ now in g5
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ ldx [%sp+2223+0], i00
+ fdtox a32, a32
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ srlx i48, 16, %l5 C (i48 >> 16)
+ mov i32, %g4 C i32+ now in g4
+ ldx [%sp+2223+16], i32
+ sllx i48, 32, %l6 C (i48 << 32)
+ ldx [%sp+2223+24], i48
+ fdtox a48, a48
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ std a32, [%sp+2223+16]
+ add %l6, %o2, %o2 C mi64- in %o2
+ std a48, [%sp+2223+24]
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ b .L_out_3
+ add %i2, 8, %i2
+
+.L_four_or_more:
+ ld [%i5+%i2], %f3 C read low 32 bits of up[i]
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ ld [%i1+%i2], %f5 C read high 32 bits of up[i]
+ fdtox a00, a00
+ faddd p48, r48, a48
+ mov i00, %g5 C i00+ now in g5
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ ldx [%sp+2223+0], i00
+ fdtox a32, a32
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ fxtod %f2, u00
+ srlx i48, 16, %l5 C (i48 >> 16)
+ mov i32, %g4 C i32+ now in g4
+ ldx [%sp+2223+16], i32
+ fxtod %f4, u32
+ sllx i48, 32, %l6 C (i48 << 32)
+ ldx [%sp+2223+24], i48
+ fdtox a48, a48
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ fmuld u00, v00, p00
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ fmuld u00, v16, p16
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ std a32, [%sp+2223+16]
+ fmuld u00, v32, p32
+ add %l6, %o2, %o2 C mi64- in %o2
+ std a48, [%sp+2223+24]
+ faddd p00, r64, a00
+ fmuld u32, v00, r32
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ faddd p16, r80, a16
+ fmuld u00, v48, p48
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ addcc %i2, 8, %i2
+ bnz,pt %xcc, .Loop
+ fmuld u32, v16, r48
+
+.L_four:
+ b,a .L_out_4
+
+C BEGIN MAIN LOOP
+ .align 16
+.Loop:
+C 00
+ srlx %o4, 16, %o5 C (x >> 16)
+ ld [%i5+%i2], %f3 C read low 32 bits of up[i]
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+C 01
+ add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
+ and %o4, xffff, %o5 C (x & 0xffff)
+ ld [%i1+%i2], %f5 C read high 32 bits of up[i]
+ fdtox a00, a00
+C 02
+ faddd p48, r48, a48
+C 03
+ srlx %o2, 48, %o7 C (mi64 >> 48)
+ mov i00, %g5 C i00+ now in g5
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+C 04
+ sllx %o2, 16, %i3 C (mi64 << 16)
+ add %o7, %o1, cy C new cy
+ ldx [%sp+2223+0], i00
+ fdtox a32, a32
+C 05
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ fxtod %f2, u00
+C 06
+ srlx i48, 16, %l5 C (i48 >> 16)
+ mov i32, %g4 C i32+ now in g4
+ ldx [%sp+2223+16], i32
+ fxtod %f4, u32
+C 07
+ sllx i48, 32, %l6 C (i48 << 32)
+ or %i3, %o5, %o5
+ ldx [%sp+2223+24], i48
+ fdtox a48, a48
+C 08
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ fmuld u00, v00, p00
+C 09
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ fmuld u00, v16, p16
+C 10
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ std a32, [%sp+2223+16]
+ fmuld u00, v32, p32
+C 11
+ add %l6, %o2, %o2 C mi64- in %o2
+ std a48, [%sp+2223+24]
+ faddd p00, r64, a00
+ fmuld u32, v00, r32
+C 12
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ stx %o5, [%i4+%i2]
+ faddd p16, r80, a16
+ fmuld u00, v48, p48
+C 13
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ addcc %i2, 8, %i2
+ bnz,pt %xcc, .Loop
+ fmuld u32, v16, r48
+C END MAIN LOOP
+
+.L_out_4:
+ srlx %o4, 16, %o5 C (x >> 16)
+ fmuld u32, v32, r64 C FIXME not urgent
+ faddd p32, r32, a32
+ add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
+ and %o4, xffff, %o5 C (x & 0xffff)
+ fdtox a00, a00
+ faddd p48, r48, a48
+ srlx %o2, 48, %o7 C (mi64 >> 48)
+ mov i00, %g5 C i00+ now in g5
+ fmuld u32, v48, r80 C FIXME not urgent
+ fdtox a16, a16
+ sllx %o2, 16, %i3 C (mi64 << 16)
+ add %o7, %o1, cy C new cy
+ ldx [%sp+2223+0], i00
+ fdtox a32, a32
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ srlx i48, 16, %l5 C (i48 >> 16)
+ mov i32, %g4 C i32+ now in g4
+ ldx [%sp+2223+16], i32
+ sllx i48, 32, %l6 C (i48 << 32)
+ or %i3, %o5, %o5
+ ldx [%sp+2223+24], i48
+ fdtox a48, a48
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ std a32, [%sp+2223+16]
+ add %l6, %o2, %o2 C mi64- in %o2
+ std a48, [%sp+2223+24]
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ stx %o5, [%i4+%i2]
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ add %i2, 8, %i2
+.L_out_3:
+ srlx %o4, 16, %o5 C (x >> 16)
+ add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
+ and %o4, xffff, %o5 C (x & 0xffff)
+ fdtox r64, a00
+ srlx %o2, 48, %o7 C (mi64 >> 48)
+ mov i00, %g5 C i00+ now in g5
+ fdtox r80, a16
+ sllx %o2, 16, %i3 C (mi64 << 16)
+ add %o7, %o1, cy C new cy
+ ldx [%sp+2223+0], i00
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ srlx i48, 16, %l5 C (i48 >> 16)
+ mov i32, %g4 C i32+ now in g4
+ ldx [%sp+2223+16], i32
+ sllx i48, 32, %l6 C (i48 << 32)
+ or %i3, %o5, %o5
+ ldx [%sp+2223+24], i48
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ std a00, [%sp+2223+0]
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ std a16, [%sp+2223+8]
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ add %l6, %o2, %o2 C mi64- in %o2
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ stx %o5, [%i4+%i2]
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ add %i2, 8, %i2
+.L_out_2:
+ srlx %o4, 16, %o5 C (x >> 16)
+ add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
+ and %o4, xffff, %o5 C (x & 0xffff)
+ srlx %o2, 48, %o7 C (mi64 >> 48)
+ mov i00, %g5 C i00+ now in g5
+ sllx %o2, 16, %i3 C (mi64 << 16)
+ add %o7, %o1, cy C new cy
+ ldx [%sp+2223+0], i00
+ srlx i16, 48, %l4 C (i16 >> 48)
+ mov i16, %g2
+ ldx [%sp+2223+8], i16
+ srlx i48, 16, %l5 C (i48 >> 16)
+ mov i32, %g4 C i32+ now in g4
+ sllx i48, 32, %l6 C (i48 << 32)
+ or %i3, %o5, %o5
+ srlx %g4, 32, %o3 C (i32 >> 32)
+ add %l5, %l4, %o1 C hi64- in %o1
+ sllx %g4, 16, %o2 C (i32 << 16)
+ add %o3, %o1, %o1 C hi64 in %o1 1st ASSIGNMENT
+ sllx %o1, 48, %o3 C (hi64 << 48)
+ add %g2, %o2, %o2 C mi64- in %o2
+ add %l6, %o2, %o2 C mi64- in %o2
+ sub %o2, %o3, %o2 C mi64 in %o2 1st ASSIGNMENT
+ stx %o5, [%i4+%i2]
+ add cy, %g5, %o4 C x = prev(i00) + cy
+ add %i2, 8, %i2
+.L_out_1:
+ srlx %o4, 16, %o5 C (x >> 16)
+ add %o5, %o2, %o2 C mi64 in %o2 2nd ASSIGNMENT
+ and %o4, xffff, %o5 C (x & 0xffff)
+ srlx %o2, 48, %o7 C (mi64 >> 48)
+ sllx %o2, 16, %i3 C (mi64 << 16)
+ add %o7, %o1, cy C new cy
+ or %i3, %o5, %o5
+ stx %o5, [%i4+%i2]
+
+ sllx i00, 0, %g2
+ add %g2, cy, cy
+ sllx i16, 16, %g3
+ add %g3, cy, cy
+
+ return %i7+8
+ mov cy, %o0
+EPILOGUE(mpn_mul_1)
--- /dev/null
+dnl SPARC v9 mpn_rshift
+
+dnl Copyright 1996, 2000, 2001, 2002, 2003, 2010 Free Software Foundation,
+dnl Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC 1&2: 2
+C UltraSPARC 3: 2.5 (for some up/rp alignments)
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n', `%i2')
+define(`cnt',`%i3')
+
+define(`u0', `%l0')
+define(`u1', `%l2')
+define(`u2', `%l4')
+define(`u3', `%l6')
+
+define(`tnc',`%i4')
+
+define(`fanop',`fitod %f0,%f2') dnl A quasi nop running in the FA pipe
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_rshift)
+ save %sp,-160,%sp
+
+ sub %g0,cnt,tnc C negate shift count
+ ldx [up],u3 C load first limb
+ subcc n,5,n
+ sllx u3,tnc,%i5 C compute function result
+ bl,pn %xcc,.Lend1234
+ srlx u3,cnt,%g3
+
+ subcc n,4,n
+ ldx [up+8],u0
+ ldx [up+16],u1
+ add up,32,up
+ ldx [up-8],u2
+ ldx [up+0],u3
+
+ bl,pn %xcc,.Lend5678
+ sllx u0,tnc,%g2
+
+ b,a .Loop
+ ALIGN(16)
+.Loop:
+ srlx u0,cnt,%g1
+ or %g3,%g2,%g3
+ ldx [up+8],u0
+ fanop
+C --
+ sllx u1,tnc,%g2
+ subcc n,4,n
+ stx %g3,[rp+0]
+ fanop
+C --
+ srlx u1,cnt,%g3
+ or %g1,%g2,%g1
+ ldx [up+16],u1
+ fanop
+C --
+ sllx u2,tnc,%g2
+ stx %g1,[rp+8]
+ add up,32,up
+ fanop
+C --
+ srlx u2,cnt,%g1
+ or %g3,%g2,%g3
+ ldx [up-8],u2
+ fanop
+C --
+ sllx u3,tnc,%g2
+ stx %g3,[rp+16]
+ add rp,32,rp
+ fanop
+C --
+ srlx u3,cnt,%g3
+ or %g1,%g2,%g1
+ ldx [up+0],u3
+ fanop
+C --
+ sllx u0,tnc,%g2
+ stx %g1,[rp-8]
+ bge,pt %xcc,.Loop
+ fanop
+C --
+.Lend5678:
+ srlx u0,cnt,%g1
+ or %g3,%g2,%g3
+ sllx u1,tnc,%g2
+ stx %g3,[rp+0]
+ srlx u1,cnt,%g3
+ or %g1,%g2,%g1
+ sllx u2,tnc,%g2
+ stx %g1,[rp+8]
+ srlx u2,cnt,%g1
+ or %g3,%g2,%g3
+ sllx u3,tnc,%g2
+ stx %g3,[rp+16]
+ add rp,32,rp
+ srlx u3,cnt,%g3 C carry...
+ or %g1,%g2,%g1
+ stx %g1,[rp-8]
+
+.Lend1234:
+ addcc n,4,n
+ bz,pn %xcc,.Lret
+ fanop
+.Loop0:
+ add rp,8,rp
+ subcc n,1,n
+ ldx [up+8],u3
+ add up,8,up
+ sllx u3,tnc,%g2
+ or %g3,%g2,%g3
+ stx %g3,[rp-8]
+ srlx u3,cnt,%g3
+ bnz,pt %xcc,.Loop0
+ fanop
+.Lret:
+ stx %g3,[rp+0]
+ mov %i5,%i0
+ ret
+ restore
+EPILOGUE(mpn_rshift)
--- /dev/null
+dnl SPARC v9 64-bit mpn_sqr_diagonal.
+
+dnl Copyright 2001, 2002 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC 1&2: 22
+C UltraSPARC 3: 36
+
+C This was generated by the Sun C compiler. It runs at 22 cycles/limb on the
+C UltraSPARC-1/2, three cycles slower than theoretically possible for optimal
+C code using the same algorithm. For 1-3 limbs, a special loop was generated,
+C which causes performance problems in particular for 2 and 3 limbs.
+C Ultimately, this should be replaced by hand-written code in the same software
+C pipeline style as e.g., addmul_1.asm.
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_sqr_diagonal)
+ save %sp, -240, %sp
+
+ sethi %hi(0x1ffc00), %o0
+ sethi %hi(0x3ffc00), %o1
+ add %o0, 1023, %o7
+ cmp %i2, 4
+ add %o1, 1023, %o4
+ or %g0, %i1, %g1
+ or %g0, %i0, %o0
+ bl,pn %xcc, .Lsmall
+ or %g0, 0, %g2
+
+ ldx [%i1], %o1
+ add %i1, 24, %g1
+ or %g0, 3, %g2
+ srlx %o1, 42, %g3
+ stx %g3, [%sp+2279]
+ and %o1, %o7, %o2
+ stx %o2, [%sp+2263]
+ srlx %o1, 21, %o1
+ ldd [%sp+2279], %f0
+ and %o1, %o7, %o1
+ stx %o1, [%sp+2271]
+ ldx [%i1+8], %o2
+ fxtod %f0, %f12
+ srlx %o2, 21, %o1
+ and %o2, %o7, %g3
+ ldd [%sp+2263], %f2
+ fmuld %f12, %f12, %f10
+ srlx %o2, 42, %o2
+ ldd [%sp+2271], %f0
+ and %o1, %o7, %o1
+ fxtod %f2, %f8
+ stx %o2, [%sp+2279]
+ stx %o1, [%sp+2271]
+ fxtod %f0, %f0
+ stx %g3, [%sp+2263]
+ fdtox %f10, %f14
+ fmuld %f12, %f8, %f6
+ ldx [%i1+16], %o2
+ std %f14, [%sp+2255]
+ fmuld %f0, %f0, %f2
+ fmuld %f8, %f8, %f10
+ srlx %o2, 42, %o1
+ faddd %f6, %f6, %f6
+ fmuld %f12, %f0, %f12
+ fmuld %f0, %f8, %f8
+ ldd [%sp+2279], %f0
+ ldd [%sp+2263], %f4
+ fdtox %f10, %f10
+ std %f10, [%sp+2239]
+ faddd %f2, %f6, %f6
+ ldd [%sp+2271], %f2
+ fdtox %f12, %f12
+ std %f12, [%sp+2247]
+ fdtox %f8, %f8
+ std %f8, [%sp+2231]
+ fdtox %f6, %f6
+ std %f6, [%sp+2223]
+
+.Loop: srlx %o2, 21, %g3
+ stx %o1, [%sp+2279]
+ add %g2, 1, %g2
+ and %g3, %o7, %o1
+ ldx [%sp+2255], %g4
+ cmp %g2, %i2
+ stx %o1, [%sp+2271]
+ add %g1, 8, %g1
+ add %o0, 16, %o0
+ ldx [%sp+2239], %o1
+ fxtod %f0, %f10
+ fxtod %f4, %f14
+ ldx [%sp+2231], %i0
+ ldx [%sp+2223], %g5
+ ldx [%sp+2247], %g3
+ and %o2, %o7, %o2
+ fxtod %f2, %f8
+ fmuld %f10, %f10, %f0
+ stx %o2, [%sp+2263]
+ fmuld %f10, %f14, %f6
+ ldx [%g1-8], %o2
+ fmuld %f10, %f8, %f12
+ fdtox %f0, %f2
+ ldd [%sp+2279], %f0
+ fmuld %f8, %f8, %f4
+ faddd %f6, %f6, %f6
+ fmuld %f14, %f14, %f10
+ std %f2, [%sp+2255]
+ sllx %g4, 20, %g4
+ ldd [%sp+2271], %f2
+ fmuld %f8, %f14, %f8
+ sllx %i0, 22, %i1
+ fdtox %f12, %f12
+ std %f12, [%sp+2247]
+ sllx %g5, 42, %i0
+ add %o1, %i1, %o1
+ faddd %f4, %f6, %f6
+ ldd [%sp+2263], %f4
+ add %o1, %i0, %o1
+ add %g3, %g4, %g3
+ fdtox %f10, %f10
+ std %f10, [%sp+2239]
+ srlx %o1, 42, %g4
+ and %g5, %o4, %i0
+ fdtox %f8, %f8
+ std %f8, [%sp+2231]
+ srlx %g5, 22, %g5
+ sub %g4, %i0, %g4
+ fdtox %f6, %f6
+ std %f6, [%sp+2223]
+ srlx %g4, 63, %g4
+ add %g3, %g5, %g3
+ add %g3, %g4, %g3
+ stx %o1, [%o0-16]
+ srlx %o2, 42, %o1
+ bl,pt %xcc, .Loop
+ stx %g3, [%o0-8]
+
+ stx %o1, [%sp+2279]
+ srlx %o2, 21, %o1
+ fxtod %f0, %f16
+ ldx [%sp+2223], %g3
+ fxtod %f4, %f6
+ and %o2, %o7, %o3
+ stx %o3, [%sp+2263]
+ fxtod %f2, %f4
+ and %o1, %o7, %o1
+ ldx [%sp+2231], %o2
+ sllx %g3, 42, %g4
+ fmuld %f16, %f16, %f14
+ stx %o1, [%sp+2271]
+ fmuld %f16, %f6, %f8
+ add %o0, 48, %o0
+ ldx [%sp+2239], %o1
+ sllx %o2, 22, %o2
+ fmuld %f4, %f4, %f10
+ ldx [%sp+2255], %o3
+ fdtox %f14, %f14
+ fmuld %f4, %f6, %f2
+ std %f14, [%sp+2255]
+ faddd %f8, %f8, %f12
+ add %o1, %o2, %o2
+ fmuld %f16, %f4, %f4
+ ldd [%sp+2279], %f0
+ sllx %o3, 20, %g5
+ add %o2, %g4, %o2
+ fmuld %f6, %f6, %f6
+ srlx %o2, 42, %o3
+ and %g3, %o4, %g4
+ srlx %g3, 22, %g3
+ faddd %f10, %f12, %f16
+ ldd [%sp+2271], %f12
+ ldd [%sp+2263], %f8
+ fxtod %f0, %f0
+ sub %o3, %g4, %o3
+ ldx [%sp+2247], %o1
+ srlx %o3, 63, %o3
+ fdtox %f2, %f10
+ fxtod %f8, %f8
+ std %f10, [%sp+2231]
+ fdtox %f6, %f6
+ std %f6, [%sp+2239]
+ add %o1, %g5, %o1
+ fmuld %f0, %f0, %f2
+ fdtox %f16, %f16
+ std %f16, [%sp+2223]
+ add %o1, %g3, %o1
+ fdtox %f4, %f4
+ std %f4, [%sp+2247]
+ fmuld %f0, %f8, %f10
+ fxtod %f12, %f12
+ add %o1, %o3, %o1
+ stx %o2, [%o0-48]
+ fmuld %f8, %f8, %f6
+ stx %o1, [%o0-40]
+ fdtox %f2, %f2
+ ldx [%sp+2231], %o2
+ faddd %f10, %f10, %f10
+ ldx [%sp+2223], %g3
+ fmuld %f12, %f12, %f4
+ fdtox %f6, %f6
+ ldx [%sp+2239], %o1
+ sllx %o2, 22, %o2
+ fmuld %f12, %f8, %f8
+ sllx %g3, 42, %g5
+ ldx [%sp+2255], %o3
+ fmuld %f0, %f12, %f0
+ add %o1, %o2, %o2
+ faddd %f4, %f10, %f4
+ ldx [%sp+2247], %o1
+ add %o2, %g5, %o2
+ and %g3, %o4, %g4
+ fdtox %f8, %f8
+ sllx %o3, 20, %g5
+ std %f8, [%sp+2231]
+ fdtox %f0, %f0
+ srlx %o2, 42, %o3
+ add %o1, %g5, %o1
+ fdtox %f4, %f4
+ srlx %g3, 22, %g3
+ sub %o3, %g4, %o3
+ std %f6, [%sp+2239]
+ std %f4, [%sp+2223]
+ srlx %o3, 63, %o3
+ add %o1, %g3, %o1
+ std %f2, [%sp+2255]
+ add %o1, %o3, %o1
+ std %f0, [%sp+2247]
+ stx %o2, [%o0-32]
+ stx %o1, [%o0-24]
+ ldx [%sp+2231], %o2
+ ldx [%sp+2223], %o3
+ ldx [%sp+2239], %o1
+ sllx %o2, 22, %o2
+ sllx %o3, 42, %g5
+ ldx [%sp+2255], %g4
+ and %o3, %o4, %g3
+ add %o1, %o2, %o2
+ ldx [%sp+2247], %o1
+ add %o2, %g5, %o2
+ stx %o2, [%o0-16]
+ sllx %g4, 20, %g4
+ srlx %o2, 42, %o2
+ add %o1, %g4, %o1
+ srlx %o3, 22, %o3
+ sub %o2, %g3, %o2
+ srlx %o2, 63, %o2
+ add %o1, %o3, %o1
+ add %o1, %o2, %o1
+ stx %o1, [%o0-8]
+ ret
+ restore %g0, %g0, %g0
+.Lsmall:
+ ldx [%g1], %o2
+.Loop0:
+ and %o2, %o7, %o1
+ stx %o1, [%sp+2263]
+ add %g2, 1, %g2
+ srlx %o2, 21, %o1
+ add %g1, 8, %g1
+ srlx %o2, 42, %o2
+ stx %o2, [%sp+2279]
+ and %o1, %o7, %o1
+ ldd [%sp+2263], %f0
+ cmp %g2, %i2
+ stx %o1, [%sp+2271]
+ fxtod %f0, %f6
+ ldd [%sp+2279], %f0
+ ldd [%sp+2271], %f4
+ fxtod %f0, %f2
+ fmuld %f6, %f6, %f0
+ fxtod %f4, %f10
+ fmuld %f2, %f6, %f4
+ fdtox %f0, %f0
+ std %f0, [%sp+2239]
+ fmuld %f10, %f6, %f8
+ fmuld %f10, %f10, %f0
+ faddd %f4, %f4, %f6
+ fmuld %f2, %f2, %f4
+ fdtox %f8, %f8
+ std %f8, [%sp+2231]
+ fmuld %f2, %f10, %f2
+ faddd %f0, %f6, %f0
+ fdtox %f4, %f4
+ std %f4, [%sp+2255]
+ fdtox %f2, %f2
+ std %f2, [%sp+2247]
+ fdtox %f0, %f0
+ std %f0, [%sp+2223]
+ ldx [%sp+2239], %o1
+ ldx [%sp+2255], %g4
+ ldx [%sp+2231], %o2
+ sllx %g4, 20, %g4
+ ldx [%sp+2223], %o3
+ sllx %o2, 22, %o2
+ sllx %o3, 42, %g5
+ add %o1, %o2, %o2
+ ldx [%sp+2247], %o1
+ add %o2, %g5, %o2
+ stx %o2, [%o0]
+ and %o3, %o4, %g3
+ srlx %o2, 42, %o2
+ add %o1, %g4, %o1
+ srlx %o3, 22, %o3
+ sub %o2, %g3, %o2
+ srlx %o2, 63, %o2
+ add %o1, %o3, %o1
+ add %o1, %o2, %o1
+ stx %o1, [%o0+8]
+ add %o0, 16, %o0
+ bl,a,pt %xcc, .Loop0
+ ldx [%g1], %o2
+ ret
+ restore %g0, %g0, %g0
+EPILOGUE(mpn_sqr_diagonal)
--- /dev/null
+dnl SPARC v9 mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+dnl store difference in a third limb vector.
+
+dnl Copyright 2001, 2002, 2003, 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC 1&2: 4
+C UltraSPARC 3: 4.5
+
+C Compute carry-out from the most significant bits of u,v, and r, where
+C r=u-v-carry_in, using logic operations.
+
+C This code runs at 4 cycles/limb on UltraSPARC 1 and 2. It has a 4 insn
+C recurrency, and the UltraSPARC 1 and 2 the IE units are 100% saturated.
+C Therefore, it seems futile to try to optimize this any further...
+
+C INPUT PARAMETERS
+define(`rp',`%i0')
+define(`up',`%i1')
+define(`vp',`%i2')
+define(`n',`%i3')
+
+define(`u0',`%l0')
+define(`u1',`%l2')
+define(`u2',`%l4')
+define(`u3',`%l6')
+define(`v0',`%l1')
+define(`v1',`%l3')
+define(`v2',`%l5')
+define(`v3',`%l7')
+
+define(`cy',`%i4')
+
+define(`fanop',`fitod %f0,%f2') dnl A quasi nop running in the FA pipe
+define(`fmnop',`fmuld %f0,%f0,%f4') dnl A quasi nop running in the FM pipe
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_sub_nc)
+ save %sp,-160,%sp
+
+ fitod %f0,%f0 C make sure f0 contains small, quiet number
+ subcc n,4,%g0
+ bl,pn %xcc,.Loop0
+ nop
+ b,a L(com)
+EPILOGUE()
+
+PROLOGUE(mpn_sub_n)
+ save %sp,-160,%sp
+
+ fitod %f0,%f0 C make sure f0 contains small, quiet number
+ subcc n,4,%g0
+ bl,pn %xcc,.Loop0
+ mov 0,cy
+L(com):
+ ldx [up+0],u0
+ ldx [vp+0],v0
+ add up,32,up
+ ldx [up-24],u1
+ ldx [vp+8],v1
+ add vp,32,vp
+ ldx [up-16],u2
+ ldx [vp-16],v2
+ ldx [up-8],u3
+ ldx [vp-8],v3
+ subcc n,8,n
+ sub u0,v0,%g1 C main sub
+ sub %g1,cy,%g4 C carry sub
+ orn u0,v0,%g2
+ bl,pn %xcc,.Lend4567
+ fanop
+ b,a .Loop
+
+ .align 16
+C START MAIN LOOP
+.Loop: orn %g4,%g2,%g2
+ andn u0,v0,%g3
+ ldx [up+0],u0
+ fanop
+C --
+ andn %g2,%g3,%g2
+ ldx [vp+0],v0
+ add up,32,up
+ fanop
+C --
+ srlx %g2,63,cy
+ sub u1,v1,%g1
+ stx %g4,[rp+0]
+ fanop
+C --
+ sub %g1,cy,%g4
+ orn u1,v1,%g2
+ fmnop
+ fanop
+C --
+ orn %g4,%g2,%g2
+ andn u1,v1,%g3
+ ldx [up-24],u1
+ fanop
+C --
+ andn %g2,%g3,%g2
+ ldx [vp+8],v1
+ add vp,32,vp
+ fanop
+C --
+ srlx %g2,63,cy
+ sub u2,v2,%g1
+ stx %g4,[rp+8]
+ fanop
+C --
+ sub %g1,cy,%g4
+ orn u2,v2,%g2
+ fmnop
+ fanop
+C --
+ orn %g4,%g2,%g2
+ andn u2,v2,%g3
+ ldx [up-16],u2
+ fanop
+C --
+ andn %g2,%g3,%g2
+ ldx [vp-16],v2
+ add rp,32,rp
+ fanop
+C --
+ srlx %g2,63,cy
+ sub u3,v3,%g1
+ stx %g4,[rp-16]
+ fanop
+C --
+ sub %g1,cy,%g4
+ orn u3,v3,%g2
+ fmnop
+ fanop
+C --
+ orn %g4,%g2,%g2
+ andn u3,v3,%g3
+ ldx [up-8],u3
+ fanop
+C --
+ andn %g2,%g3,%g2
+ subcc n,4,n
+ ldx [vp-8],v3
+ fanop
+C --
+ srlx %g2,63,cy
+ sub u0,v0,%g1
+ stx %g4,[rp-8]
+ fanop
+C --
+ sub %g1,cy,%g4
+ orn u0,v0,%g2
+ bge,pt %xcc,.Loop
+ fanop
+C END MAIN LOOP
+.Lend4567:
+ orn %g4,%g2,%g2
+ andn u0,v0,%g3
+ andn %g2,%g3,%g2
+ srlx %g2,63,cy
+ sub u1,v1,%g1
+ stx %g4,[rp+0]
+ sub %g1,cy,%g4
+ orn u1,v1,%g2
+ orn %g4,%g2,%g2
+ andn u1,v1,%g3
+ andn %g2,%g3,%g2
+ srlx %g2,63,cy
+ sub u2,v2,%g1
+ stx %g4,[rp+8]
+ sub %g1,cy,%g4
+ orn u2,v2,%g2
+ orn %g4,%g2,%g2
+ andn u2,v2,%g3
+ andn %g2,%g3,%g2
+ add rp,32,rp
+ srlx %g2,63,cy
+ sub u3,v3,%g1
+ stx %g4,[rp-16]
+ sub %g1,cy,%g4
+ orn u3,v3,%g2
+ orn %g4,%g2,%g2
+ andn u3,v3,%g3
+ andn %g2,%g3,%g2
+ srlx %g2,63,cy
+ stx %g4,[rp-8]
+
+ addcc n,4,n
+ bz,pn %xcc,.Lret
+ fanop
+
+.Loop0: ldx [up],u0
+ add up,8,up
+ ldx [vp],v0
+ add vp,8,vp
+ add rp,8,rp
+ subcc n,1,n
+ sub u0,v0,%g1
+ orn u0,v0,%g2
+ sub %g1,cy,%g4
+ andn u0,v0,%g3
+ orn %g4,%g2,%g2
+ stx %g4,[rp-8]
+ andn %g2,%g3,%g2
+ bnz,pt %xcc,.Loop0
+ srlx %g2,63,cy
+
+.Lret: mov cy,%i0
+ ret
+ restore
+EPILOGUE(mpn_sub_n)
--- /dev/null
+dnl SPARC v9 64-bit mpn_submul_1 -- Multiply a limb vector with a limb and
+dnl subtract the result from a second limb vector.
+
+dnl Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC 1&2: 18
+C UltraSPARC 3: 23
+
+C INPUT PARAMETERS
+C rp i0
+C up i1
+C n i2
+C v i3
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+
+PROLOGUE(mpn_submul_1)
+ save %sp,-176,%sp
+
+ sllx %i2, 3, %g2
+ or %g0, %i1, %o1
+ add %g2, 15, %o0
+ or %g0, %i2, %o2
+ and %o0, -16, %o0
+ sub %sp, %o0, %sp
+ add %sp, 2223, %o0
+ or %g0, %o0, %l0
+ call mpn_mul_1
+ or %g0, %i3, %o3
+ or %g0, %o0, %l1 C preserve carry value from mpn_mul_1
+ or %g0, %i0, %o0
+ or %g0, %i0, %o1
+ or %g0, %l0, %o2
+ call mpn_sub_n
+ or %g0, %i2, %o3
+ ret
+ restore %l1, %o0, %o0 C sum carry values
+EPILOGUE(mpn_submul_1)
#define DIVREM_1_NORM_THRESHOLD 0 /* always */
#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_1P_METHOD 2
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX /* never */
-#define MOD_1U_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD MP_SIZE_T_MAX
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD MP_SIZE_T_MAX
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 10
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 20
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 29
#define USE_PREINV_DIVREM_1 1
-#define DIVREM_2_THRESHOLD 0 /* always */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIVEXACT_1_THRESHOLD 0 /* always */
#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
#define MUL_TOOM22_THRESHOLD 30
#define MUL_TOOM33_THRESHOLD 93
-#define MUL_TOOM44_THRESHOLD 143
+#define MUL_TOOM44_THRESHOLD 139
#define MUL_TOOM6H_THRESHOLD 165
-#define MUL_TOOM8H_THRESHOLD 303
+#define MUL_TOOM8H_THRESHOLD 278
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 93
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 95
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 86
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 105
#define MUL_TOOM42_TO_TOOM53_THRESHOLD 85
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 50
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 68
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 67
-#define SQR_BASECASE_THRESHOLD 10
+#define SQR_BASECASE_THRESHOLD 9
#define SQR_TOOM2_THRESHOLD 72
-#define SQR_TOOM3_THRESHOLD 97
-#define SQR_TOOM4_THRESHOLD 179
-#define SQR_TOOM6_THRESHOLD 191
+#define SQR_TOOM3_THRESHOLD 94
+#define SQR_TOOM4_THRESHOLD 184
+#define SQR_TOOM6_THRESHOLD 0 /* always */
#define SQR_TOOM8_THRESHOLD 339
-#define MULMOD_BNM1_THRESHOLD 14
+#define MULMID_TOOM42_THRESHOLD 40
+
+#define MULMOD_BNM1_THRESHOLD 13
#define SQRMOD_BNM1_THRESHOLD 9
#define MUL_FFT_MODF_THRESHOLD 212 /* k = 5 */
{ 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
{4194304,23}, {8388608,24} }
#define MUL_FFT_TABLE3_SIZE 170
-#define MUL_FFT_THRESHOLD 2240
+#define MUL_FFT_THRESHOLD 1984
-#define SQR_FFT_MODF_THRESHOLD 244 /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD 236 /* k = 5 */
#define SQR_FFT_TABLE3 \
{ { 244, 5}, { 8, 4}, { 17, 5}, { 17, 6}, \
{ 17, 7}, { 9, 6}, { 19, 7}, { 17, 8}, \
{ 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
{4194304,23}, {8388608,24} }
#define SQR_FFT_TABLE3_SIZE 182
-#define SQR_FFT_THRESHOLD 1984
+#define SQR_FFT_THRESHOLD 1728
-#define MULLO_BASECASE_THRESHOLD 13
+#define MULLO_BASECASE_THRESHOLD 12
#define MULLO_DC_THRESHOLD 0 /* never mpn_mullo_basecase */
#define MULLO_MUL_N_THRESHOLD 3791
#define DC_DIV_QR_THRESHOLD 16
#define DC_DIVAPPR_Q_THRESHOLD 66
-#define DC_BDIV_QR_THRESHOLD 26
-#define DC_BDIV_Q_THRESHOLD 92
+#define DC_BDIV_QR_THRESHOLD 27
+#define DC_BDIV_Q_THRESHOLD 86
#define INV_MULMOD_BNM1_THRESHOLD 58
-#define INV_NEWTON_THRESHOLD 17
+#define INV_NEWTON_THRESHOLD 16
#define INV_APPR_THRESHOLD 17
-#define BINV_NEWTON_THRESHOLD 134
-#define REDC_1_TO_REDC_2_THRESHOLD 10
-#define REDC_2_TO_REDC_N_THRESHOLD 117
+#define BINV_NEWTON_THRESHOLD 110
+#define REDC_1_TO_REDC_2_THRESHOLD 0 /* always */
+#define REDC_2_TO_REDC_N_THRESHOLD 115
-#define MU_DIV_QR_THRESHOLD 748
-#define MU_DIVAPPR_Q_THRESHOLD 630
+#define MU_DIV_QR_THRESHOLD 618
+#define MU_DIVAPPR_Q_THRESHOLD 551
#define MUPI_DIV_QR_THRESHOLD 0 /* always */
-#define MU_BDIV_QR_THRESHOLD 748
-#define MU_BDIV_Q_THRESHOLD 807
+#define MU_BDIV_QR_THRESHOLD 562
+#define MU_BDIV_Q_THRESHOLD 748
+
+#define POWM_SEC_TABLE 4,23,130,961,1926
#define MATRIX22_STRASSEN_THRESHOLD 12
#define HGCD_THRESHOLD 39
-#define GCD_DC_THRESHOLD 130
-#define GCDEXT_DC_THRESHOLD 134
-#define JACOBI_BASE_METHOD 2
-
-#define GET_STR_DC_THRESHOLD 18
-#define GET_STR_PRECOMPUTE_THRESHOLD 27
-#define SET_STR_DC_THRESHOLD 315
-#define SET_STR_PRECOMPUTE_THRESHOLD 1037
+#define HGCD_APPR_THRESHOLD 50
+#define HGCD_REDUCE_THRESHOLD 1012
+#define GCD_DC_THRESHOLD 134
+#define GCDEXT_DC_THRESHOLD 132
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 19
+#define GET_STR_PRECOMPUTE_THRESHOLD 28
+#define SET_STR_DC_THRESHOLD 300
+#define SET_STR_PRECOMPUTE_THRESHOLD 1043
+
+#define FAC_DSC_THRESHOLD 462
+#define FAC_ODD_THRESHOLD 0 /* always */
--- /dev/null
+dnl SPARC v9 mpn_add_n for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T1: ?
+C UltraSPARC T2: ?
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`vp', `%o2')
+define(`n', `%o3')
+define(`cy', `%o4')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_add_nc)
+ b,a L(ent)
+EPILOGUE()
+PROLOGUE(mpn_add_n)
+ mov 0, cy
+L(ent): cmp %g0, cy
+L(top): ldx [up+0], %o4
+ add up, 8, up
+ ldx [vp+0], %o5
+ add vp, 8, vp
+ add rp, 8, rp
+ add n, -1, n
+ srlx %o4, 32, %g1
+ srlx %o5, 32, %g2
+ addccc %o4, %o5, %g3
+ addccc %g1, %g2, %g0
+ brgz n, L(top)
+ stx %g3, [rp-8]
+
+ retl
+ addc %g0, %g0, %o0
+EPILOGUE()
--- /dev/null
+dnl SPARC v9 mpn_addlsh1_n for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+define(LSH, 1)
+define(RSH, 63)
+
+define(func, mpn_addlsh1_n)
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n)
+
+include_mpn(`sparc64/ultrasparct1/addlshC_n.asm')
--- /dev/null
+dnl SPARC v9 mpn_addlsh2_n for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+define(LSH, 2)
+define(RSH, 62)
+
+define(func, mpn_addlsh2_n)
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n)
+
+include_mpn(`sparc64/ultrasparct1/addlshC_n.asm')
--- /dev/null
+dnl SPARC v9 mpn_addlshC_n for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+C cycles/limb
+C UltraSPARC T1: 21
+C UltraSPARC T2: ?
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`vp', `%o2')
+define(`n', `%o3')
+define(`cy', `%o4')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(func)
+ mov 0, cy
+ mov 0, %g5
+ cmp %g0, cy
+L(top): ldx [up+0], %o4
+ add up, 8, up
+ ldx [vp+0], %o5
+ add vp, 8, vp
+ add rp, 8, rp
+
+ sllx %o5, LSH, %g4
+ add n, -1, n
+ or %g5, %g4, %g4
+ srlx %o5, RSH, %g5
+
+ srlx %o4, 32, %g1
+ srlx %g4, 32, %g2
+ addccc %o4, %g4, %g3
+ addccc %g1, %g2, %g0
+ brgz n, L(top)
+ stx %g3, [rp-8]
+
+ retl
+ addc %g5, %g0, %o0
+EPILOGUE()
--- /dev/null
+dnl SPARC v9 mpn_addmul_1 for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T1: 74
+C UltraSPARC T2: ?
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n', `%i2')
+define(`v0', `%i3')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_addmul_1)
+ save %sp, -176, %sp
+ mov 1, %o2
+ mov %i0, %g2
+ srlx %i3, 32, %o4
+ sllx %o2, 32, %o2
+ srl %i3, 0, %i3
+ mov 0, %g3
+ mov 0, %i0
+
+L(top): ldx [%i1+%g3], %g1
+ srl %g1, 0, %g4
+ mulx %g4, %i3, %o5
+ srlx %g1, 32, %g1
+ mulx %g1, %i3, %g5
+ mulx %g4, %o4, %g4
+ mulx %g1, %o4, %g1
+ srlx %o5, 32, %o1
+ add %g5, %o1, %o1
+ addcc %o1, %g4, %g4
+ srl %o5, 0, %o0
+ ldx [%g2+%g3], %o5
+ sllx %g4, 32, %o1
+ add %g1, %o2, %l1
+ movlu %xcc, %l1, %g1
+ add %o1, %o0, %l0
+ addcc %l0, %i0, %g5
+ srlx %g4, 32, %i0
+ add %i0, 1, %g4
+ movlu %xcc, %g4, %i0
+ addcc %o5, %g5, %g5
+ stx %g5, [%g2+%g3]
+ add %i0, 1, %g4
+ movlu %xcc, %g4, %i0
+ add %i2, -1, %i2
+ add %i0, %g1, %i0
+ brnz,pt %i2, L(top)
+ add %g3, 8, %g3
+ return %i7+8
+ nop
+EPILOGUE()
--- /dev/null
+/* Sparc64 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2004, 2006, 2008, 2009,
+2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+/* 1000 MHz ultrasparc t1 running GNU/Linux */
+
+#define DIVREM_1_NORM_THRESHOLD 0 /* always */
+#define DIVREM_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1_1P_METHOD 2
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 13
+#define MOD_1U_TO_MOD_1_1_THRESHOLD MP_SIZE_T_MAX
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 34
+#define USE_PREINV_DIVREM_1 1
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always */
+#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
+
+#define MUL_TOOM22_THRESHOLD 8
+#define MUL_TOOM33_THRESHOLD 50
+#define MUL_TOOM44_THRESHOLD 99
+#define MUL_TOOM6H_THRESHOLD 125
+#define MUL_TOOM8H_THRESHOLD 187
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 65
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 77
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 65
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 50
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 34
+
+#define SQR_BASECASE_THRESHOLD 0 /* always */
+#define SQR_TOOM2_THRESHOLD 14
+#define SQR_TOOM3_THRESHOLD 57
+#define SQR_TOOM4_THRESHOLD 133
+#define SQR_TOOM6_THRESHOLD 156
+#define SQR_TOOM8_THRESHOLD 260
+
+#define MULMID_TOOM42_THRESHOLD 12
+
+#define MULMOD_BNM1_THRESHOLD 7
+#define SQRMOD_BNM1_THRESHOLD 7
+
+#define MUL_FFT_MODF_THRESHOLD 176 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 176, 5}, { 7, 6}, { 4, 5}, { 9, 6}, \
+ { 5, 5}, { 11, 6}, { 11, 7}, { 6, 6}, \
+ { 13, 7}, { 7, 6}, { 15, 7}, { 9, 8}, \
+ { 5, 7}, { 13, 8}, { 7, 7}, { 15, 6}, \
+ { 32, 7}, { 24, 8}, { 21, 9}, { 11, 8}, \
+ { 23,10}, { 7, 9}, { 15, 8}, { 33, 9}, \
+ { 19, 8}, { 39, 9}, { 23,10}, { 15, 9}, \
+ { 43,10}, { 23,11}, { 15,10}, { 31, 9}, \
+ { 63, 8}, { 127, 9}, { 67,10}, { 39, 9}, \
+ { 79, 8}, { 159,10}, { 47, 9}, { 95,11}, \
+ { 2048,12}, { 4096,13}, { 8192,14}, { 16384,15}, \
+ { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
+ { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+ {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 53
+#define MUL_FFT_THRESHOLD 1728
+
+
+#define SQR_FFT_MODF_THRESHOLD 148 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 148, 5}, { 7, 6}, { 4, 5}, { 9, 6}, \
+ { 5, 5}, { 11, 6}, { 11, 7}, { 6, 6}, \
+ { 13, 7}, { 7, 6}, { 15, 7}, { 13, 8}, \
+ { 7, 7}, { 16, 8}, { 9, 6}, { 38, 7}, \
+ { 20, 8}, { 11, 7}, { 24, 8}, { 13, 9}, \
+ { 7, 7}, { 30, 8}, { 19, 9}, { 11, 8}, \
+ { 25,10}, { 7, 9}, { 15, 8}, { 31, 9}, \
+ { 19, 8}, { 39, 9}, { 27,10}, { 15, 9}, \
+ { 39,10}, { 23, 9}, { 47, 8}, { 95, 9}, \
+ { 51,11}, { 15,10}, { 31, 8}, { 127,10}, \
+ { 39, 9}, { 79, 8}, { 159,10}, { 47, 9}, \
+ { 95,11}, { 2048,12}, { 4096,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16}, { 65536,17}, { 131072,18}, \
+ { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+ {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 58
+#define SQR_FFT_THRESHOLD 1344
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 28
+#define MULLO_MUL_N_THRESHOLD 3176
+
+#define DC_DIV_QR_THRESHOLD 27
+#define DC_DIVAPPR_Q_THRESHOLD 106
+#define DC_BDIV_QR_THRESHOLD 27
+#define DC_BDIV_Q_THRESHOLD 62
+
+#define INV_MULMOD_BNM1_THRESHOLD 14
+#define INV_NEWTON_THRESHOLD 163
+#define INV_APPR_THRESHOLD 117
+
+#define BINV_NEWTON_THRESHOLD 166
+#define REDC_1_TO_REDC_N_THRESHOLD 31
+
+#define MU_DIV_QR_THRESHOLD 734
+#define MU_DIVAPPR_Q_THRESHOLD 748
+#define MUPI_DIV_QR_THRESHOLD 67
+#define MU_BDIV_QR_THRESHOLD 562
+#define MU_BDIV_Q_THRESHOLD 734
+
+#define POWM_SEC_TABLE 4,29,188,643,2741
+
+#define MATRIX22_STRASSEN_THRESHOLD 11
+#define HGCD_THRESHOLD 58
+#define HGCD_APPR_THRESHOLD 55
+#define HGCD_REDUCE_THRESHOLD 637
+#define GCD_DC_THRESHOLD 186
+#define GCDEXT_DC_THRESHOLD 140
+#define JACOBI_BASE_METHOD 3
+
+#define GET_STR_DC_THRESHOLD 20
+#define GET_STR_PRECOMPUTE_THRESHOLD 33
+#define SET_STR_DC_THRESHOLD 268
+#define SET_STR_PRECOMPUTE_THRESHOLD 960
+
+#define FAC_DSC_THRESHOLD 268
+#define FAC_ODD_THRESHOLD 0 /* always */
--- /dev/null
+dnl SPARC v9 mpn_lshift for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T1: 17
+C UltraSPARC T2: ?
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`vp', `%o2')
+define(`n', `%o3')
+define(`cy', `%o4')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_lshift)
+ add %o1, -8, %o1
+ add %o0, 8, %g1
+ sllx %o2, 3, %g5
+ sub %g0, %o3, %o4
+ ldx [%o1+%g5], %g2
+ add %g5, -8, %g5
+ brz,pn %g5, L(end)
+ sllx %g2, %o3, %g4
+
+L(top): ldx [%o1+%g5], %o5
+ nop
+ add %g5, -8, %g5
+ srlx %o5, %o4, %g3
+ or %g4, %g3, %g3
+ sllx %o5, %o3, %g4
+ stx %g3, [%g1+%g5]
+ brnz %g5, L(top)
+ nop
+
+L(end): stx %g4, [%g1-8]
+ retl
+ srlx %g2, %o4, %o0
+EPILOGUE()
--- /dev/null
+dnl SPARC v9 mpn_lshiftc for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T1: 17
+C UltraSPARC T2: ?
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`vp', `%o2')
+define(`n', `%o3')
+define(`cy', `%o4')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_lshiftc)
+ add %o1, -8, %o1
+ add %o0, 8, %g1
+ sllx %o2, 3, %g5
+ sub %g0, %o3, %o4
+ ldx [%o1+%g5], %g2
+ add %g5, -8, %g5
+ brz,pn %g5, L(end)
+ sllx %g2, %o3, %g4
+
+L(top): ldx [%o1+%g5], %o5
+ not %g4
+ add %g5, -8, %g5
+ srlx %o5, %o4, %g3
+ andn %g4, %g3, %g3
+ sllx %o5, %o3, %g4
+ stx %g3, [%g1+%g5]
+ brnz %g5, L(top)
+ nop
+
+L(end): not %g4
+ stx %g4, [%g1-8]
+ retl
+ srlx %g2, %o4, %o0
+EPILOGUE()
--- /dev/null
+dnl SPARC v9 mpn_mul_1 for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T1: 68
+C UltraSPARC T2: ?
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n', `%i2')
+define(`v0', `%i3')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_mul_1)
+ save %sp, -176, %sp
+ mov 1, %o2
+ mov %i0, %g2
+ srlx %i3, 32, %o4
+ sllx %o2, 32, %o2
+ srl %i3, 0, %i3
+ mov 0, %g3
+ mov 0, %i0
+
+L(top): ldx [%i1+%g3], %g1
+ srl %g1, 0, %g4
+ mulx %g4, %i3, %o5
+ srlx %g1, 32, %g1
+ mulx %g1, %i3, %g5
+ mulx %g4, %o4, %g4
+ mulx %g1, %o4, %g1
+ srlx %o5, 32, %o1
+ add %g5, %o1, %o1
+ addcc %o1, %g4, %g4
+ srl %o5, 0, %o0
+ sllx %g4, 32, %o1
+ add %g1, %o2, %l1
+ movlu %xcc, %l1, %g1
+ add %o1, %o0, %l0
+ addcc %l0, %i0, %g5
+ srlx %g4, 32, %i0
+ add %i0, 1, %g4
+ movlu %xcc, %g4, %i0
+ stx %g5, [%g2+%g3]
+ add %i2, -1, %i2
+ add %i0, %g1, %i0
+ brnz,pt %i2, L(top)
+ add %g3, 8, %g3
+ return %i7+8
+ nop
+EPILOGUE()
--- /dev/null
+dnl SPARC v9 mpn_rsblsh1_n for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+define(LSH, 1)
+define(RSH, 63)
+
+define(func, mpn_rsblsh1_n)
+
+MULFUNC_PROLOGUE(mpn_rsblsh1_n)
+
+include_mpn(`sparc64/ultrasparct1/rsblshC_n.asm')
--- /dev/null
+dnl SPARC v9 mpn_rsblsh2_n for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+define(LSH, 2)
+define(RSH, 62)
+
+define(func, mpn_rsblsh2_n)
+
+MULFUNC_PROLOGUE(mpn_rsblsh2_n)
+
+include_mpn(`sparc64/ultrasparct1/rsblshC_n.asm')
--- /dev/null
+dnl SPARC v9 mpn_rsblshC_n for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+C cycles/limb
+C UltraSPARC T1: 21
+C UltraSPARC T2: ?
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`vp', `%o2')
+define(`n', `%o3')
+define(`cy', `%o4')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(func)
+ mov 0, cy
+ mov 0, %g5
+ cmp %g0, cy
+L(top): ldx [up+0], %o4
+ add up, 8, up
+ ldx [vp+0], %o5
+ add vp, 8, vp
+ add rp, 8, rp
+
+ sllx %o5, LSH, %g4
+ add n, -1, n
+ or %g5, %g4, %g4
+ srlx %o5, RSH, %g5
+
+ srlx %o4, 32, %g1
+ srlx %g4, 32, %g2
+ subccc %g4, %o4, %g3
+ subccc %g2, %g1, %g0
+ brgz n, L(top)
+ stx %g3, [rp-8]
+
+ retl
+ subc %g5, %g0, %o0
+EPILOGUE()
--- /dev/null
+dnl SPARC v9 mpn_rshift for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T1: 17
+C UltraSPARC T2: ?
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`vp', `%o2')
+define(`n', `%o3')
+define(`cy', `%o4')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_rshift)
+ add %o1, 0, %o1
+ add %o0, -16, %g1
+ sllx %o2, 3, %g5
+ add %o1, %g5, %o1
+ add %g1, %g5, %g1
+ neg %g5
+ sub %g0, %o3, %o4
+ ldx [%o1+%g5], %g2
+ add %g5, 8, %g5
+ brz,pn %g5, L(end)
+ srlx %g2, %o3, %g4
+
+L(top): ldx [%o1+%g5], %o5
+ add %g5, 8, %g5
+ sllx %o5, %o4, %g3
+ or %g4, %g3, %g3
+ srlx %o5, %o3, %g4
+ stx %g3, [%g1+%g5]
+ brnz %g5, L(top)
+ nop
+
+L(end): stx %g4, [%g1+8]
+ retl
+ sllx %g2, %o4, %o0
+EPILOGUE()
--- /dev/null
+dnl SPARC v9 mpn_sub_n for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T1: ?
+C UltraSPARC T2: ?
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`vp', `%o2')
+define(`n', `%o3')
+define(`cy', `%o4')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_sub_nc)
+ b,a L(ent)
+EPILOGUE()
+PROLOGUE(mpn_sub_n)
+ mov 0, cy
+L(ent): cmp %g0, cy
+L(top): ldx [up+0], %o4
+ add up, 8, up
+ ldx [vp+0], %o5
+ add vp, 8, vp
+ add rp, 8, rp
+ add n, -1, n
+ srlx %o4, 32, %g1
+ srlx %o5, 32, %g2
+ subccc %o4, %o5, %g3
+ subccc %g1, %g2, %g0
+ brgz n, L(top)
+ stx %g3, [rp-8]
+
+ retl
+ addc %g0, %g0, %o0
+EPILOGUE()
--- /dev/null
+dnl SPARC v9 mpn_sublsh1_n for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+define(LSH, 1)
+define(RSH, 63)
+
+define(func, mpn_sublsh1_n)
+
+MULFUNC_PROLOGUE(mpn_sublsh1_n)
+
+include_mpn(`sparc64/ultrasparct1/sublshC_n.asm')
--- /dev/null
+dnl SPARC v9 mpn_sublsh2_n for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+define(LSH, 2)
+define(RSH, 62)
+
+define(func, mpn_sublsh2_n)
+
+MULFUNC_PROLOGUE(mpn_sublsh2_n)
+
+include_mpn(`sparc64/ultrasparct1/sublshC_n.asm')
--- /dev/null
+dnl SPARC v9 mpn_sublshC_n for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+C cycles/limb
+C UltraSPARC T1: 21
+C UltraSPARC T2: ?
+
+C INPUT PARAMETERS
+define(`rp', `%o0')
+define(`up', `%o1')
+define(`vp', `%o2')
+define(`n', `%o3')
+define(`cy', `%o4')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(func)
+ mov 0, cy
+ mov 0, %g5
+ cmp %g0, cy
+L(top): ldx [up+0], %o4
+ add up, 8, up
+ ldx [vp+0], %o5
+ add vp, 8, vp
+ add rp, 8, rp
+
+ sllx %o5, LSH, %g4
+ add n, -1, n
+ or %g5, %g4, %g4
+ srlx %o5, RSH, %g5
+
+ srlx %o4, 32, %g1
+ srlx %g4, 32, %g2
+ subccc %o4, %g4, %g3
+ subccc %g1, %g2, %g0
+ brgz n, L(top)
+ stx %g3, [rp-8]
+
+ retl
+ addc %g5, %g0, %o0
+EPILOGUE()
--- /dev/null
+dnl SPARC v9 mpn_submul_1 for T1/T2.
+
+dnl Copyright 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C UltraSPARC T1: 74
+C UltraSPARC T2: ?
+
+C INPUT PARAMETERS
+define(`rp', `%i0')
+define(`up', `%i1')
+define(`n', `%i2')
+define(`v0', `%i3')
+
+ASM_START()
+ REGISTER(%g2,#scratch)
+ REGISTER(%g3,#scratch)
+PROLOGUE(mpn_submul_1)
+ save %sp, -176, %sp
+ mov 1, %o2
+ mov %i0, %g2
+ srlx %i3, 32, %o4
+ sllx %o2, 32, %o2
+ srl %i3, 0, %i3
+ mov 0, %g3
+ mov 0, %i0
+
+L(top): ldx [%i1+%g3], %g1
+ srl %g1, 0, %g4
+ mulx %g4, %i3, %o5
+ srlx %g1, 32, %g1
+ mulx %g1, %i3, %g5
+ mulx %g4, %o4, %g4
+ mulx %g1, %o4, %g1
+ srlx %o5, 32, %o1
+ add %g5, %o1, %o1
+ addcc %o1, %g4, %g4
+ srl %o5, 0, %o0
+ ldx [%g2+%g3], %o5
+ sllx %g4, 32, %o1
+ add %g1, %o2, %l1
+ movlu %xcc, %l1, %g1
+ add %o1, %o0, %l0
+ addcc %l0, %i0, %g5
+ srlx %g4, 32, %i0
+ add %i0, 1, %g4
+ movlu %xcc, %g4, %i0
+ subcc %o5, %g5, %g5
+ stx %g5, [%g2+%g3]
+ add %i0, 1, %g4
+ movlu %xcc, %g4, %i0
+ add %i2, -1, %i2
+ add %i0, %g1, %i0
+ brnz,pt %i2, L(top)
+ add %g3, 8, %g3
+ return %i7+8
+ nop
+EPILOGUE()
--- /dev/null
+dnl ARM/Thumb mpn_add_n.
+
+dnl Copyright 1997, 2000, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(`rp', r0)
+define(`up', r1)
+define(`vp', r2)
+define(`n', r3)
+
+ASM_START()
+ .thumb
+PROLOGUE(mpn_add_nc)
+ push {r4, r5, r6}
+ ldr r6, [sp, #12] C init carry save register
+ sub r6, #1
+ b L(top)
+EPILOGUE()
+PROLOGUE(mpn_add_n)
+ push {r4, r5, r6}
+ neg r6, n C init carry save register
+
+L(top): ldmia up!, {r4} C load next limb from S1
+ cmp n, r6 C tricky carry restore
+ ldmia vp!, {r5} C load next limb from S2
+ adc r4, r5
+ stmia rp!, {r4} C store result limb to RES
+ sbc r6, r6 C save negated carry
+ sub n, #1
+ bne L(top)
+
+ add r0, r6, #1
+ pop {r4, r5, r6}
+ bx lr
+EPILOGUE()
--- /dev/null
+dnl ARM/Thumb mpn_sub_n.
+
+dnl Copyright 1997, 2000, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published by
+dnl the Free Software Foundation; either version 3 of the License, or (at your
+dnl option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C INPUT PARAMETERS
+define(`rp', r0)
+define(`up', r1)
+define(`vp', r2)
+define(`n', r3)
+
+ASM_START()
+ .thumb
+PROLOGUE(mpn_sub_nc)
+ push {r4, r5, r6}
+ ldr r6, [sp, #12] C init carry save register
+ neg r6, r6
+ b L(top)
+EPILOGUE()
+PROLOGUE(mpn_sub_n)
+ push {r4, r5, r6}
+ mov r6, n C init carry save register
+
+L(top): ldmia up!, {r4} C load next limb from S1
+ cmp n, r6 C tricky carry restore
+ ldmia vp!, {r5} C load next limb from S2
+ sbc r4, r5
+ stmia rp!, {r4} C store result limb to RES
+ sbc r6, r6 C save negated carry
+ sub n, #1
+ bne L(top)
+
+ neg r0, r6
+ pop {r4, r5, r6}
+ bx lr
+EPILOGUE()
--- /dev/null
+dnl VAX mpn_add_n -- Add two limb vectors of the same length > 0 and store sum
+dnl in a third limb vector.
+
+dnl Copyright 1999, 2000, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_add_n)
+ .word 0x0
+ movl 16(ap), r0
+ movl 12(ap), r1
+ movl 8(ap), r2
+ movl 4(ap), r3
+ mnegl r0, r5
+ addl2 $3, r0
+ ashl $-2, r0, r0 C unroll loop count
+ bicl2 $-4, r5 C mask out low 2 bits
+ movaq (r5)[r5], r5 C 9x
+ jmp L(top)[r5]
+
+L(top): movl (r2)+, r4
+ adwc (r1)+, r4
+ movl r4, (r3)+
+ movl (r2)+, r4
+ adwc (r1)+, r4
+ movl r4, (r3)+
+ movl (r2)+, r4
+ adwc (r1)+, r4
+ movl r4, (r3)+
+ movl (r2)+, r4
+ adwc (r1)+, r4
+ movl r4, (r3)+
+ sobgtr r0, L(top)
+
+ adwc r0, r0
+ ret
+EPILOGUE()
+++ /dev/null
-# VAX __gmpn_add_n -- Add two limb vectors of the same length > 0 and store
-# sum in a third limb vector.
-
-# Copyright 1999, 2000 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-
-# INPUT PARAMETERS
-# res_ptr (sp + 4)
-# s1_ptr (sp + 8)
-# s2_ptr (sp + 12)
-# size (sp + 16)
-
-.text
- .align 1
-.globl ___gmpn_add_n
-___gmpn_add_n:
- .word 0x0
- movl 16(ap),r0
- movl 12(ap),r1
- movl 8(ap),r2
- movl 4(ap),r3
- mnegl r0,r5
- addl2 $3,r0
- ashl $-2,r0,r0 # unroll loop count
- bicl2 $-4,r5 # mask out low 2 bits
- movaq (r5)[r5],r5 # 9x
- jmp Loop(r5)
-
-Loop: movl (r2)+,r4
- adwc (r1)+,r4
- movl r4,(r3)+
- movl (r2)+,r4
- adwc (r1)+,r4
- movl r4,(r3)+
- movl (r2)+,r4
- adwc (r1)+,r4
- movl r4,(r3)+
- movl (r2)+,r4
- adwc (r1)+,r4
- movl r4,(r3)+
- sobgtr r0,Loop
-
- adwc r0,r0
- ret
--- /dev/null
+dnl VAX mpn_addmul_1 -- Multiply a limb vector with a limb and add the result
+dnl to a second limb vector.
+
+dnl Copyright 1992, 1994, 1996, 2000, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_addmul_1)
+ .word 0xfc0
+ movl 12(ap), r4
+ movl 8(ap), r8
+ movl 4(ap), r9
+ clrl r3
+ incl r4
+ ashl $-1, r4, r7
+ clrl r11
+ movl 16(ap), r6
+ jlss L(v0_big)
+ jlbc r4, L(1)
+
+C Loop for v0 < 0x80000000
+L(tp1): movl (r8)+, r1
+ jlss L(1n0)
+ emul r1, r6, $0, r2
+ addl2 r11, r2
+ adwc $0, r3
+ addl2 r2, (r9)+
+ adwc $0, r3
+L(1): movl (r8)+, r1
+ jlss L(1n1)
+L(1p1): emul r1, r6, $0, r10
+ addl2 r3, r10
+ adwc $0, r11
+ addl2 r10, (r9)+
+ adwc $0, r11
+
+ sobgtr r7, L(tp1)
+ movl r11, r0
+ ret
+
+L(1n0): emul r1, r6, $0, r2
+ addl2 r11, r2
+ adwc r6, r3
+ addl2 r2, (r9)+
+ adwc $0, r3
+ movl (r8)+, r1
+ jgeq L(1p1)
+L(1n1): emul r1, r6, $0, r10
+ addl2 r3, r10
+ adwc r6, r11
+ addl2 r10, (r9)+
+ adwc $0, r11
+
+ sobgtr r7, L(tp1)
+ movl r11, r0
+ ret
+
+L(v0_big):
+ jlbc r4, L(2)
+
+C Loop for v0 >= 0x80000000
+L(tp2): movl (r8)+, r1
+ jlss L(2n0)
+ emul r1, r6, $0, r2
+ addl2 r11, r2
+ adwc r1, r3
+ addl2 r2, (r9)+
+ adwc $0, r3
+L(2): movl (r8)+, r1
+ jlss L(2n1)
+L(2p1): emul r1, r6, $0, r10
+ addl2 r3, r10
+ adwc r1, r11
+ addl2 r10, (r9)+
+ adwc $0, r11
+
+ sobgtr r7, L(tp2)
+ movl r11, r0
+ ret
+
+L(2n0): emul r1, r6, $0, r2
+ addl2 r11, r2
+ adwc r6, r3
+ addl2 r2, (r9)+
+ adwc r1, r3
+ movl (r8)+, r1
+ jgeq L(2p1)
+L(2n1): emul r1, r6, $0, r10
+ addl2 r3, r10
+ adwc r6, r11
+ addl2 r10, (r9)+
+ adwc r1, r11
+
+ sobgtr r7, L(tp2)
+ movl r11, r0
+ ret
+EPILOGUE()
+++ /dev/null
-# VAX __gmpn_addmul_1 -- Multiply a limb vector with a limb and add
-# the result to a second limb vector.
-
-# Copyright 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-
-# INPUT PARAMETERS
-# res_ptr (sp + 4)
-# s1_ptr (sp + 8)
-# size (sp + 12)
-# s2_limb (sp + 16)
-
-.text
- .align 1
-.globl ___gmpn_addmul_1
-___gmpn_addmul_1:
- .word 0xfc0
- movl 12(ap),r4
- movl 8(ap),r8
- movl 4(ap),r9
- movl 16(ap),r6
- jlss s2_big
-
- clrl r3
- incl r4
- ashl $-1,r4,r7
- jlbc r4,L1
- clrl r11
-
-# Loop for S2_LIMB < 0x80000000
-Loop1: movl (r8)+,r1
- jlss L1n0
- emul r1,r6,$0,r2
- addl2 r11,r2
- adwc $0,r3
- addl2 r2,(r9)+
- adwc $0,r3
-L1: movl (r8)+,r1
- jlss L1n1
-L1p1: emul r1,r6,$0,r10
- addl2 r3,r10
- adwc $0,r11
- addl2 r10,(r9)+
- adwc $0,r11
-
- sobgtr r7,Loop1
- movl r11,r0
- ret
-
-L1n0: emul r1,r6,$0,r2
- addl2 r11,r2
- adwc r6,r3
- addl2 r2,(r9)+
- adwc $0,r3
- movl (r8)+,r1
- jgeq L1p1
-L1n1: emul r1,r6,$0,r10
- addl2 r3,r10
- adwc r6,r11
- addl2 r10,(r9)+
- adwc $0,r11
-
- sobgtr r7,Loop1
- movl r11,r0
- ret
-
-
-s2_big: clrl r3
- incl r4
- ashl $-1,r4,r7
- jlbc r4,L2
- clrl r11
-
-# Loop for S2_LIMB >= 0x80000000
-Loop2: movl (r8)+,r1
- jlss L2n0
- emul r1,r6,$0,r2
- addl2 r11,r2
- adwc r1,r3
- addl2 r2,(r9)+
- adwc $0,r3
-L2: movl (r8)+,r1
- jlss L2n1
-L2p1: emul r1,r6,$0,r10
- addl2 r3,r10
- adwc r1,r11
- addl2 r10,(r9)+
- adwc $0,r11
-
- sobgtr r7,Loop2
- movl r11,r0
- ret
-
-L2n0: emul r1,r6,$0,r2
- addl2 r11,r2
- adwc r6,r3
- addl2 r2,(r9)+
- adwc r1,r3
- movl (r8)+,r1
- jgeq L2p1
-L2n1: emul r1,r6,$0,r10
- addl2 r3,r10
- adwc r6,r11
- addl2 r10,(r9)+
- adwc r1,r11
-
- sobgtr r7,Loop2
- movl r11,r0
- ret
--- /dev/null
+divert(-1)
+
+dnl m4 macros for VAX assembler.
+
+dnl Copyright 2001, 2012 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+
+defreg(r0,`%r``''0')
+defreg(r1,`%r``''1')
+defreg(r2,`%r``''2')
+defreg(r3,`%r``''3')
+defreg(r4,`%r``''4')
+defreg(r5,`%r``''5')
+defreg(r6,`%r``''6')
+defreg(r7,`%r``''7')
+defreg(r8,`%r``''8')
+defreg(r9,`%r``''9')
+defreg(r10,`%r``''10')
+defreg(r11,`%r``''11')
+defreg(r12,`%r``''12')
+defreg(r13,`%r``''13')
+defreg(r14,`%r``''14')
+defreg(r15,`%r``''15')
+defreg(ap,`%a``''p')
+
+define(`foo', blablabla)
+
+divert
--- /dev/null
+dnl VAX mpn_lshift -- left shift.
+
+dnl Copyright 1999, 2000, 2001, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_lshift)
+ .word 0x1c0
+ movl 4(ap), r7
+ movl 8(ap), r6
+ movl 12(ap), r1
+ movl 16(ap), r8
+
+ moval (r6)[r1], r6
+ moval (r7)[r1], r7
+ clrl r3
+ movl -(r6), r2
+ ashq r8, r2, r4
+ movl r5, r0
+ movl r2, r3
+ decl r1
+ jeql L(end)
+
+L(top): movl -(r6), r2
+ ashq r8, r2, r4
+ movl r5, -(r7)
+ movl r2, r3
+ sobgtr r1, L(top)
+
+L(end): movl r4, -4(r7)
+ ret
+EPILOGUE()
+++ /dev/null
-# VAX mpn_lshift -- left shift.
-
-# Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-
-# INPUT PARAMETERS
-# rptr (sp + 4)
-# sptr (sp + 8)
-# size (sp + 12)
-# cnt (sp + 16)
-# r0=retval r1=size r2,r3=itmp r4,r5=otmp call-used registers
-# r6=sptr r7=rptr r8=cnt r9 r10 r11 call-saved registers
-
-.text
- .align 1
-.globl ___gmpn_lshift
-___gmpn_lshift:
- .word 0x1c0
- movl 4(ap),r7
- movl 8(ap),r6
- movl 12(ap),r1
- movl 16(ap),r8
-
- moval (r6)[r1],r6
- moval (r7)[r1],r7
- clrl r3
- movl -(r6),r2
- ashq r8,r2,r4
- movl r5,r0
- movl r2,r3
- decl r1
- jeql Lend
-
-Loop: movl -(r6),r2
- ashq r8,r2,r4
- movl r5,-(r7)
- movl r2,r3
- sobgtr r1,Loop
-
-Lend: movl r4,-4(r7)
- ret
--- /dev/null
+dnl VAX mpn_mul_1 -- Multiply a limb vector with a limb and store the result
+dnl in a second limb vector.
+
+dnl Copyright 1992, 1994, 1996, 2000, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_mul_1)
+ .word 0xfc0
+ movl 12(ap), r4
+ movl 8(ap), r8
+ movl 4(ap), r9
+ clrl r3
+ incl r4
+ ashl $-1, r4, r7
+ clrl r11
+ movl 16(ap), r6
+ jlss L(v0_big)
+ jlbc r4, L(1)
+
+C Loop for v0 < 0x80000000
+L(tp1): movl (r8)+, r1
+ jlss L(1n0)
+ emul r1, r6, $0, r2
+ addl2 r11, r2
+ adwc $0, r3
+ movl r2, (r9)+
+L(1): movl (r8)+, r1
+ jlss L(1n1)
+L(1p1): emul r1, r6, $0, r10
+ addl2 r3, r10
+ adwc $0, r11
+ movl r10, (r9)+
+
+ sobgtr r7, L(tp1)
+ movl r11, r0
+ ret
+
+L(1n0): emul r1, r6, $0, r2
+ addl2 r11, r2
+ adwc r6, r3
+ movl r2, (r9)+
+ movl (r8)+, r1
+ jgeq L(1p1)
+L(1n1): emul r1, r6, $0, r10
+ addl2 r3, r10
+ adwc r6, r11
+ movl r10, (r9)+
+
+ sobgtr r7, L(tp1)
+ movl r11, r0
+ ret
+
+L(v0_big):
+ jlbc r4, L(2)
+
+C Loop for v0 >= 0x80000000
+L(tp2): movl (r8)+, r1
+ jlss L(2n0)
+ emul r1, r6, $0, r2
+ addl2 r11, r2
+ adwc r1, r3
+ movl r2, (r9)+
+L(2): movl (r8)+, r1
+ jlss L(2n1)
+L(2p1): emul r1, r6, $0, r10
+ addl2 r3, r10
+ adwc r1, r11
+ movl r10, (r9)+
+
+ sobgtr r7, L(tp2)
+ movl r11, r0
+ ret
+
+L(2n0): emul r1, r6, $0, r2
+ addl2 r1, r3
+ addl2 r11, r2
+ adwc r6, r3
+ movl r2, (r9)+
+ movl (r8)+, r1
+ jgeq L(2p1)
+L(2n1): emul r1, r6, $0, r10
+ addl2 r1, r11
+ addl2 r3, r10
+ adwc r6, r11
+ movl r10, (r9)+
+
+ sobgtr r7, L(tp2)
+ movl r11, r0
+ ret
+EPILOGUE()
+++ /dev/null
-# VAX __gmpn_mul_1 -- Multiply a limb vector with a limb and store
-# the result in a second limb vector.
-
-# Copyright 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-
-# INPUT PARAMETERS
-# res_ptr (sp + 4)
-# s1_ptr (sp + 8)
-# size (sp + 12)
-# s2_limb (sp + 16)
-
-.text
- .align 1
-.globl ___gmpn_mul_1
-___gmpn_mul_1:
- .word 0xfc0
- movl 12(ap),r4
- movl 8(ap),r8
- movl 4(ap),r9
- movl 16(ap),r6
- jlss s2_big
-
-# One might want to combine the addl2 and the store below, but that
-# is actually just slower according to my timing tests. (VAX 3600)
-
- clrl r3
- incl r4
- ashl $-1,r4,r7
- jlbc r4,L1
- clrl r11
-
-# Loop for S2_LIMB < 0x80000000
-Loop1: movl (r8)+,r1
- jlss L1n0
- emul r1,r6,$0,r2
- addl2 r11,r2
- adwc $0,r3
- movl r2,(r9)+
-L1: movl (r8)+,r1
- jlss L1n1
-L1p1: emul r1,r6,$0,r10
- addl2 r3,r10
- adwc $0,r11
- movl r10,(r9)+
-
- sobgtr r7,Loop1
- movl r11,r0
- ret
-
-L1n0: emul r1,r6,$0,r2
- addl2 r11,r2
- adwc r6,r3
- movl r2,(r9)+
- movl (r8)+,r1
- jgeq L1p1
-L1n1: emul r1,r6,$0,r10
- addl2 r3,r10
- adwc r6,r11
- movl r10,(r9)+
-
- sobgtr r7,Loop1
- movl r11,r0
- ret
-
-
-s2_big: clrl r3
- incl r4
- ashl $-1,r4,r7
- jlbc r4,L2
- clrl r11
-
-# Loop for S2_LIMB >= 0x80000000
-Loop2: movl (r8)+,r1
- jlss L2n0
- emul r1,r6,$0,r2
- addl2 r11,r2
- adwc r1,r3
- movl r2,(r9)+
-L2: movl (r8)+,r1
- jlss L2n1
-L2p1: emul r1,r6,$0,r10
- addl2 r3,r10
- adwc r1,r11
- movl r10,(r9)+
-
- sobgtr r7,Loop2
- movl r11,r0
- ret
-
-L2n0: emul r1,r6,$0,r2
- addl2 r1,r3
- addl2 r11,r2
- adwc r6,r3
- movl r2,(r9)+
- movl (r8)+,r1
- jgeq L2p1
-L2n1: emul r1,r6,$0,r10
- addl2 r1,r11
- addl2 r3,r10
- adwc r6,r11
- movl r10,(r9)+
-
- sobgtr r7,Loop2
- movl r11,r0
- ret
--- /dev/null
+dnl VAX mpn_rshift -- right shift.
+
+dnl Copyright 1999, 2000, 2001, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_rshift)
+ .word 0x1c0
+ movl 4(ap), r7
+ movl 8(ap), r6
+ movl 12(ap), r1
+ movl 16(ap), r8
+
+ movl (r6)+, r2
+ subl3 r8, $32, r8
+ ashl r8, r2, r0
+ decl r1
+ jeql L(end)
+
+L(top): movl (r6)+, r3
+ ashq r8, r2, r4
+ movl r5, (r7)+
+ movl r3, r2
+ sobgtr r1, L(top)
+
+L(end): clrl r3
+ ashq r8, r2, r4
+ movl r5, (r7)
+ ret
+EPILOGUE()
+++ /dev/null
-# VAX mpn_rshift -- right shift.
-
-# Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-
-# INPUT PARAMETERS
-# rptr (sp + 4)
-# sptr (sp + 8)
-# size (sp + 12)
-# cnt (sp + 16)
-# r0=retval r1=size r2,r3=itmp r4,r5=otmp call-used registers
-# r6=sptr r7=rptr r8=cnt r9 r10 r11 call-saved registers
-
-.text
- .align 1
-.globl ___gmpn_rshift
-___gmpn_rshift:
- .word 0x1c0
- movl 4(ap),r7
- movl 8(ap),r6
- movl 12(ap),r1
- movl 16(ap),r8
-
- movl (r6)+,r2
- subl3 r8,$32,r8
- ashl r8,r2,r0
- decl r1
- jeql Lend
-
-Loop: movl (r6)+,r3
- ashq r8,r2,r4
- movl r5,(r7)+
- movl r3,r2
- sobgtr r1,Loop
-
-Lend: clrl r3
- ashq r8,r2,r4
- movl r5,(r7)
- ret
--- /dev/null
+dnl VAX mpn_sub_n -- Subtract two limb vectors of the same length > 0 and
+dnl store difference in a third limb vector.
+
+dnl Copyright 1999, 2000, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_sub_n)
+ .word 0x0
+ movl 16(ap), r0
+ movl 12(ap), r1
+ movl 8(ap), r2
+ movl 4(ap), r3
+ mnegl r0, r5
+ addl2 $3, r0
+ ashl $-2, r0, r0 C unroll loop count
+ bicl2 $-4, r5 C mask out low 2 bits
+ movaq (r5)[r5], r5 C 9x
+ jmp L(top)[r5]
+
+L(top): movl (r2)+, r4
+ sbwc (r1)+, r4
+ movl r4, (r3)+
+ movl (r2)+, r4
+ sbwc (r1)+, r4
+ movl r4, (r3)+
+ movl (r2)+, r4
+ sbwc (r1)+, r4
+ movl r4, (r3)+
+ movl (r2)+, r4
+ sbwc (r1)+, r4
+ movl r4, (r3)+
+ sobgtr r0, L(top)
+
+ adwc r0, r0
+ ret
+EPILOGUE()
+++ /dev/null
-# VAX __gmpn_sub_n -- Subtract two limb vectors of the same length > 0 and store
-# difference in a third limb vector.
-
-# Copyright 1999, 2000 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-
-# INPUT PARAMETERS
-# res_ptr (sp + 4)
-# s1_ptr (sp + 8)
-# s2_ptr (sp + 12)
-# size (sp + 16)
-
-.text
- .align 1
-.globl ___gmpn_sub_n
-___gmpn_sub_n:
- .word 0x0
- movl 16(ap),r0
- movl 12(ap),r1
- movl 8(ap),r2
- movl 4(ap),r3
- mnegl r0,r5
- addl2 $3,r0
- ashl $-2,r0,r0 # unroll loop count
- bicl2 $-4,r5 # mask out low 2 bits
- movaq (r5)[r5],r5 # 9x
- jmp Loop(r5)
-
-Loop: movl (r2)+,r4
- sbwc (r1)+,r4
- movl r4,(r3)+
- movl (r2)+,r4
- sbwc (r1)+,r4
- movl r4,(r3)+
- movl (r2)+,r4
- sbwc (r1)+,r4
- movl r4,(r3)+
- movl (r2)+,r4
- sbwc (r1)+,r4
- movl r4,(r3)+
- sobgtr r0,Loop
-
- adwc r0,r0
- ret
--- /dev/null
+dnl VAX mpn_submul_1 -- Multiply a limb vector with a limb and subtract the
+dnl result from a second limb vector.
+
+dnl Copyright 1992, 1994, 1996, 2000, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ASM_START()
+PROLOGUE(mpn_submul_1)
+ .word 0xfc0
+ movl 12(ap), r4
+ movl 8(ap), r8
+ movl 4(ap), r9
+ clrl r3
+ incl r4
+ ashl $-1, r4, r7
+ clrl r11
+ movl 16(ap), r6
+ jlss L(v0_big)
+ jlbc r4, L(1)
+
+C Loop for v0 < 0x80000000
+L(tp1): movl (r8)+, r1
+ jlss L(1n0)
+ emul r1, r6, $0, r2
+ addl2 r11, r2
+ adwc $0, r3
+ subl2 r2, (r9)+
+ adwc $0, r3
+L(1): movl (r8)+, r1
+ jlss L(1n1)
+L(1p1): emul r1, r6, $0, r10
+ addl2 r3, r10
+ adwc $0, r11
+ subl2 r10, (r9)+
+ adwc $0, r11
+
+ sobgtr r7, L(tp1)
+ movl r11, r0
+ ret
+
+L(1n0): emul r1, r6, $0, r2
+ addl2 r11, r2
+ adwc r6, r3
+ subl2 r2, (r9)+
+ adwc $0, r3
+ movl (r8)+, r1
+ jgeq L(1p1)
+L(1n1): emul r1, r6, $0, r10
+ addl2 r3, r10
+ adwc r6, r11
+ subl2 r10, (r9)+
+ adwc $0, r11
+
+ sobgtr r7, L(tp1)
+ movl r11, r0
+ ret
+
+L(v0_big):
+ jlbc r4, L(2)
+
+C Loop for v0 >= 0x80000000
+L(tp2): movl (r8)+, r1
+ jlss L(2n0)
+ emul r1, r6, $0, r2
+ addl2 r11, r2
+ adwc r1, r3
+ subl2 r2, (r9)+
+ adwc $0, r3
+L(2): movl (r8)+, r1
+ jlss L(2n1)
+L(2p1): emul r1, r6, $0, r10
+ addl2 r3, r10
+ adwc r1, r11
+ subl2 r10, (r9)+
+ adwc $0, r11
+
+ sobgtr r7, L(tp2)
+ movl r11, r0
+ ret
+
+L(2n0): emul r1, r6, $0, r2
+ addl2 r11, r2
+ adwc r6, r3
+ subl2 r2, (r9)+
+ adwc r1, r3
+ movl (r8)+, r1
+ jgeq L(2p1)
+L(2n1): emul r1, r6, $0, r10
+ addl2 r3, r10
+ adwc r6, r11
+ subl2 r10, (r9)+
+ adwc r1, r11
+
+ sobgtr r7, L(tp2)
+ movl r11, r0
+ ret
+EPILOGUE()
+++ /dev/null
-# VAX __gmpn_submul_1 -- Multiply a limb vector with a limb and subtract
-# the result from a second limb vector.
-
-# Copyright 1992, 1994, 1996, 2000 Free Software Foundation, Inc.
-
-# This file is part of the GNU MP Library.
-
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-
-# INPUT PARAMETERS
-# res_ptr (sp + 4)
-# s1_ptr (sp + 8)
-# size (sp + 12)
-# s2_limb (sp + 16)
-
-.text
- .align 1
-.globl ___gmpn_submul_1
-___gmpn_submul_1:
- .word 0xfc0
- movl 12(ap),r4
- movl 8(ap),r8
- movl 4(ap),r9
- movl 16(ap),r6
- jlss s2_big
-
- clrl r3
- incl r4
- ashl $-1,r4,r7
- jlbc r4,L1
- clrl r11
-
-# Loop for S2_LIMB < 0x80000000
-Loop1: movl (r8)+,r1
- jlss L1n0
- emul r1,r6,$0,r2
- addl2 r11,r2
- adwc $0,r3
- subl2 r2,(r9)+
- adwc $0,r3
-L1: movl (r8)+,r1
- jlss L1n1
-L1p1: emul r1,r6,$0,r10
- addl2 r3,r10
- adwc $0,r11
- subl2 r10,(r9)+
- adwc $0,r11
-
- sobgtr r7,Loop1
- movl r11,r0
- ret
-
-L1n0: emul r1,r6,$0,r2
- addl2 r11,r2
- adwc r6,r3
- subl2 r2,(r9)+
- adwc $0,r3
- movl (r8)+,r1
- jgeq L1p1
-L1n1: emul r1,r6,$0,r10
- addl2 r3,r10
- adwc r6,r11
- subl2 r10,(r9)+
- adwc $0,r11
-
- sobgtr r7,Loop1
- movl r11,r0
- ret
-
-
-s2_big: clrl r3
- incl r4
- ashl $-1,r4,r7
- jlbc r4,L2
- clrl r11
-
-# Loop for S2_LIMB >= 0x80000000
-Loop2: movl (r8)+,r1
- jlss L2n0
- emul r1,r6,$0,r2
- addl2 r11,r2
- adwc r1,r3
- subl2 r2,(r9)+
- adwc $0,r3
-L2: movl (r8)+,r1
- jlss L2n1
-L2p1: emul r1,r6,$0,r10
- addl2 r3,r10
- adwc r1,r11
- subl2 r10,(r9)+
- adwc $0,r11
-
- sobgtr r7,Loop2
- movl r11,r0
- ret
-
-L2n0: emul r1,r6,$0,r2
- addl2 r11,r2
- adwc r6,r3
- subl2 r2,(r9)+
- adwc r1,r3
- movl (r8)+,r1
- jgeq L2p1
-L2n1: emul r1,r6,$0,r10
- addl2 r3,r10
- adwc r6,r11
- subl2 r10,(r9)+
- adwc r1,r11
-
- sobgtr r7,Loop2
- movl r11,r0
- ret
C cycles/limb
-C P5: 3.375
-C P6: 3.125
-C K6: 3.5
-C K7: 2.25
-C P4: 8.75
+C P5 3.375
+C P6 3.125
+C K6 3.5
+C K7 2.25
+C P4 8.75
ifdef(`OPERATION_add_n',`
C possible to simplify.
pushl %ebp FRAME_pushl()
movl PARAM_CARRY,%ebp
- shrl $1,%ebp C shift bit 0 into carry
+ shrl %ebp C shift bit 0 into carry
popl %ebp FRAME_popl()
jmp *%eax C jump into loop
L(oopgo):
pushl %ebp FRAME_pushl()
movl PARAM_CARRY,%ebp
- shrl $1,%ebp C shift bit 0 into carry
+ shrl %ebp C shift bit 0 into carry
popl %ebp FRAME_popl()
ALIGN(16)
include(`../config.m4')
-
-C cycles/limb
-C P5: 14.75
-C P6 model 0-8,10-12) 7.5
-C P6 model 9 (Banias)
-C P6 model 13 (Dothan) 6.75
-C P4 model 0 (Willamette) 24.0
-C P4 model 1 (?) 24.0
-C P4 model 2 (Northwood) 24.0
+C cycles/limb
+C P5 14.75
+C P6 model 0-8,10-12 7.5
+C P6 model 9 (Banias) 6.7
+C P6 model 13 (Dothan) 6.75
+C P4 model 0 (Willamette) 24.0
+C P4 model 1 (?) 24.0
+C P4 model 2 (Northwood) 24.0
C P4 model 3 (Prescott)
C P4 model 4 (Nocona)
-C K6: 12.5
-C K7: 5.25
-C K8:
+C Intel Atom
+C AMD K6 12.5
+C AMD K7 5.25
+C AMD K8
+C AMD K10
ifdef(`OPERATION_addmul_1',`
--- /dev/null
+dnl Intel Atom mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
+
+dnl Contributed to the GNU project by Marco Bodrato.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 1)
+define(RSH, 31)
+
+ifdef(`OPERATION_addlsh1_n', `
+ define(M4_inst, adc)
+ define(M4_opp, sub)
+ define(M4_function, mpn_addlsh1_n)
+ define(M4_function_c, mpn_addlsh1_nc)
+',`ifdef(`OPERATION_rsblsh1_n', `
+ define(M4_inst, sbb)
+ define(M4_opp, add)
+ define(M4_function, mpn_rsblsh1_n)
+ define(M4_function_c, mpn_rsblsh1_nc)
+',`m4_error(`Need OPERATION_addlsh1_n or OPERATION_rsblsh1_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_addlsh1_nc mpn_rsblsh1_n mpn_rsblsh1_nc)
+
+include_mpn(`x86/atom/aorrlshC_n.asm')
--- /dev/null
+dnl Intel Atom mpn_addlsh2_n/mpn_rsblsh2_n -- rp[] = (vp[] << 2) +- up[]
+
+dnl Contributed to the GNU project by Marco Bodrato.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 2)
+define(RSH, 30)
+
+ifdef(`OPERATION_addlsh2_n', `
+ define(M4_inst, adcl)
+ define(M4_opp, subl)
+ define(M4_function, mpn_addlsh2_n)
+ define(M4_function_c, mpn_addlsh2_nc)
+',`ifdef(`OPERATION_rsblsh2_n', `
+ define(M4_inst, sbbl)
+ define(M4_opp, addl)
+ define(M4_function, mpn_rsblsh2_n)
+ define(M4_function_c, mpn_rsblsh2_nc)
+',`m4_error(`Need OPERATION_addlsh2_n or OPERATION_rsblsh2_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_addlsh2_nc mpn_rsblsh2_n mpn_rsblsh2_nc)
+
+include_mpn(`x86/atom/aorrlshC_n.asm')
--- /dev/null
+dnl Intel Atom mpn_addlshC_n/mpn_rsblshC_n -- rp[] = (vp[] << C) +- up[]
+
+dnl Contributed to the GNU project by Marco Bodrato.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C mp_limb_t mpn_addlshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size);
+C mp_limb_t mpn_addlshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size, mp_limb_t carry);
+C mp_limb_t mpn_rsblshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size);
+C mp_limb_t mpn_rsblshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size, mp_signed_limb_t carry);
+
+C cycles/limb
+C P5
+C P6 model 0-8,10-12
+C P6 model 9 (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0 (Willamette)
+C P4 model 1 (?)
+C P4 model 2 (Northwood)
+C P4 model 3 (Prescott)
+C P4 model 4 (Nocona)
+C Intel Atom 6
+C AMD K6
+C AMD K7
+C AMD K8
+C AMD K10
+
+defframe(PARAM_CORB, 20)
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_DBLD, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+dnl re-use parameter space
+define(VAR_COUNT,`PARAM_SIZE')
+define(SAVE_EBP,`PARAM_DBLD')
+define(SAVE_VP,`PARAM_SRC')
+define(SAVE_UP,`PARAM_DST')
+
+define(M, eval(m4_lshift(1,LSH)))
+define(`rp', `%edi')
+define(`up', `%esi')
+define(`vp', `%ebx')
+
+ASM_START()
+ TEXT
+ ALIGN(8)
+
+PROLOGUE(M4_function_c)
+deflit(`FRAME',0)
+ movl PARAM_CORB, %eax
+ movl %eax, %edx
+ shr $LSH, %edx
+ andl $1, %edx
+ M4_opp %edx, %eax
+ jmp L(start_nc)
+EPILOGUE()
+
+PROLOGUE(M4_function)
+deflit(`FRAME',0)
+
+ xor %eax, %eax
+ xor %edx, %edx
+L(start_nc):
+ push rp FRAME_pushl()
+
+ mov PARAM_SIZE, %ecx C size
+ mov PARAM_DST, rp
+ mov up, SAVE_UP
+ incl %ecx C size + 1
+ mov PARAM_SRC, up
+ mov vp, SAVE_VP
+ shr %ecx C (size+1)\2
+ mov PARAM_DBLD, vp
+ mov %ebp, SAVE_EBP
+ mov %ecx, VAR_COUNT
+ jnc L(entry) C size odd
+
+ shr %edx C size even
+ mov (vp), %ecx
+ lea 4(vp), vp
+ lea (%eax,%ecx,M), %edx
+ mov %ecx, %eax
+ lea -4(up), up
+ lea -4(rp), rp
+ jmp L(enteven)
+
+ ALIGN(16)
+L(oop):
+ lea (%eax,%ecx,M), %ebp
+ shr $RSH, %ecx
+ mov 4(vp), %eax
+ shr %edx
+ lea 8(vp), vp
+ M4_inst (up), %ebp
+ lea (%ecx,%eax,M), %edx
+ mov %ebp, (rp)
+L(enteven):
+ M4_inst 4(up), %edx
+ lea 8(up), up
+ mov %edx, 4(rp)
+ adc %edx, %edx
+ shr $RSH, %eax
+ lea 8(rp), rp
+L(entry):
+ mov (vp), %ecx
+ decl VAR_COUNT
+ jnz L(oop)
+
+ lea (%eax,%ecx,M), %ebp
+ shr $RSH, %ecx
+ shr %edx
+ mov SAVE_VP, vp
+ M4_inst (up), %ebp
+ mov %ecx, %eax
+ mov SAVE_UP, up
+ M4_inst $0, %eax
+ mov %ebp, (rp)
+ mov SAVE_EBP, %ebp
+ pop rp FRAME_popl()
+ ret
+EPILOGUE()
+
+ASM_END()
--- /dev/null
+dnl Intel Atom mpn_add_n/mpn_sub_n -- rp[] = up[] +- vp[].
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl Contributed to the GNU project by Marco Bodrato.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C P5
+C P6 model 0-8,10-12
+C P6 model 9 (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0 (Willamette)
+C P4 model 1 (?)
+C P4 model 2 (Northwood)
+C P4 model 3 (Prescott)
+C P4 model 4 (Nocona)
+C Intel Atom 3
+C AMD K6
+C AMD K7
+C AMD K8
+C AMD K10
+
+ifdef(`OPERATION_add_n', `
+ define(M4_inst, adcl)
+ define(M4_function_n, mpn_add_n)
+ define(M4_function_nc, mpn_add_nc)
+ define(M4_description, add)
+',`ifdef(`OPERATION_sub_n', `
+ define(M4_inst, sbbl)
+ define(M4_function_n, mpn_sub_n)
+ define(M4_function_nc, mpn_sub_nc)
+ define(M4_description, subtract)
+',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size);
+C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size, mp_limb_t carry);
+C
+C Calculate src1,size M4_description src2,size, and store the result in
+C dst,size. The return value is the carry bit from the top of the result (1
+C or 0).
+C
+C The _nc version accepts 1 or 0 for an initial carry into the low limb of
+C the calculation. Note values other than 1 or 0 here will lead to garbage
+C results.
+
+defframe(PARAM_CARRY,20)
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_SRC2, 12)
+defframe(PARAM_SRC1, 8)
+defframe(PARAM_DST, 4)
+
+dnl re-use parameter space
+define(SAVE_RP,`PARAM_SIZE')
+define(SAVE_VP,`PARAM_SRC1')
+define(SAVE_UP,`PARAM_DST')
+
+define(`rp', `%edi')
+define(`up', `%esi')
+define(`vp', `%ebx')
+define(`cy', `%ecx')
+define(`r1', `%ecx')
+define(`r2', `%edx')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+deflit(`FRAME',0)
+
+PROLOGUE(M4_function_n)
+ xor cy, cy C carry
+L(start):
+ mov PARAM_SIZE, %eax C size
+ mov rp, SAVE_RP
+ mov PARAM_DST, rp
+ mov up, SAVE_UP
+ mov PARAM_SRC1, up
+ shr %eax C size >> 1
+ mov vp, SAVE_VP
+ mov PARAM_SRC2, vp
+ jz L(one) C size == 1
+ jc L(three) C size % 2 == 1
+
+ shr cy
+ mov (up), r2
+ lea 4(up), up
+ lea 4(vp), vp
+ lea -4(rp), rp
+ jmp L(entry)
+L(one):
+ shr cy
+ mov (up), r1
+ jmp L(end)
+L(three):
+ shr cy
+ mov (up), r1
+
+ ALIGN(16)
+L(oop):
+ M4_inst (vp), r1
+ lea 8(up), up
+ mov -4(up), r2
+ lea 8(vp), vp
+ mov r1, (rp)
+L(entry):
+ M4_inst -4(vp), r2
+ lea 8(rp), rp
+ dec %eax
+ mov (up), r1
+ mov r2, -4(rp)
+ jnz L(oop)
+
+L(end): C %eax is zero here
+ mov SAVE_UP, up
+ M4_inst (vp), r1
+ mov SAVE_VP, vp
+ mov r1, (rp)
+ adc %eax, %eax
+ mov SAVE_RP, rp
+ ret
+EPILOGUE()
+
+PROLOGUE(M4_function_nc)
+ mov PARAM_CARRY, cy C carry
+ jmp L(start)
+EPILOGUE()
+ASM_END()
--- /dev/null
+dnl Intel Atom mpn_addlshC_n/mpn_sublshC_n -- rp[] = up[] +- (vp[] << C)
+
+dnl Contributed to the GNU project by Marco Bodrato.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C mp_limb_t mpn_addlshC_n_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size);
+C mp_limb_t mpn_addlshC_nc_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t carry);
+C mp_limb_t mpn_sublshC_n_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size,);
+C mp_limb_t mpn_sublshC_nc_ip1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_signed_limb_t borrow);
+
+defframe(PARAM_CORB, 16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+C mp_limb_t mpn_addlshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size,);
+C mp_limb_t mpn_addlshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size, mp_limb_t carry);
+C mp_limb_t mpn_sublshC_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size,);
+C mp_limb_t mpn_sublshC_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+C mp_size_t size, mp_limb_t borrow);
+
+C if src1 == dst, _ip1 is used
+
+C cycles/limb
+C dst!=src1,src2 dst==src1
+C P5
+C P6 model 0-8,10-12
+C P6 model 9 (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0 (Willamette)
+C P4 model 1 (?)
+C P4 model 2 (Northwood)
+C P4 model 3 (Prescott)
+C P4 model 4 (Nocona)
+C Intel Atom 7 6
+C AMD K6
+C AMD K7
+C AMD K8
+C AMD K10
+
+defframe(GPARAM_CORB, 20)
+defframe(GPARAM_SIZE, 16)
+defframe(GPARAM_SRC2, 12)
+
+dnl re-use parameter space
+define(SAVE_EBP,`PARAM_SIZE')
+define(SAVE_EBX,`PARAM_SRC')
+define(SAVE_UP,`PARAM_DST')
+
+define(M, eval(m4_lshift(1,LSH)))
+define(`rp', `%edi')
+define(`up', `%esi')
+
+ASM_START()
+ TEXT
+ ALIGN(8)
+
+PROLOGUE(M4_ip_function_c)
+deflit(`FRAME',0)
+ movl PARAM_CORB, %ecx
+ movl %ecx, %edx
+ shr $LSH, %edx
+ andl $1, %edx
+ M4_opp %edx, %ecx
+ jmp L(start_nc)
+EPILOGUE()
+
+PROLOGUE(M4_ip_function)
+deflit(`FRAME',0)
+
+ xor %ecx, %ecx
+ xor %edx, %edx
+L(start_nc):
+ push rp FRAME_pushl()
+ mov PARAM_DST, rp
+ mov up, SAVE_UP
+ mov PARAM_SRC, up
+ mov %ebx, SAVE_EBX
+ mov PARAM_SIZE, %ebx C size
+L(inplace):
+ incl %ebx C size + 1
+ shr %ebx C (size+1)\2
+ mov %ebp, SAVE_EBP
+ jnc L(entry) C size odd
+
+ add %edx, %edx C size even
+ mov %ecx, %ebp
+ mov (up), %ecx
+ lea -4(rp), rp
+ lea (%ebp,%ecx,M), %eax
+ lea 4(up), up
+ jmp L(enteven)
+
+ ALIGN(16)
+L(oop):
+ lea (%ecx,%eax,M), %ebp
+ shr $RSH, %eax
+ mov 4(up), %ecx
+ add %edx, %edx
+ lea 8(up), up
+ M4_inst %ebp, (rp)
+ lea (%eax,%ecx,M), %eax
+
+L(enteven):
+ M4_inst %eax, 4(rp)
+ lea 8(rp), rp
+
+ sbb %edx, %edx
+ shr $RSH, %ecx
+
+L(entry):
+ mov (up), %eax
+ decl %ebx
+ jnz L(oop)
+
+ lea (%ecx,%eax,M), %ebp
+ shr $RSH, %eax
+ shr %edx
+ M4_inst %ebp, (rp)
+ mov SAVE_UP, up
+ adc $0, %eax
+ mov SAVE_EBP, %ebp
+ mov SAVE_EBX, %ebx
+ pop rp FRAME_popl()
+ ret
+EPILOGUE()
+
+PROLOGUE(M4_function_c)
+deflit(`FRAME',0)
+ movl GPARAM_CORB, %ecx
+ movl %ecx, %edx
+ shr $LSH, %edx
+ andl $1, %edx
+ M4_opp %edx, %ecx
+ jmp L(generic_nc)
+EPILOGUE()
+
+PROLOGUE(M4_function)
+deflit(`FRAME',0)
+
+ xor %ecx, %ecx
+ xor %edx, %edx
+L(generic_nc):
+ push rp FRAME_pushl()
+ mov PARAM_DST, rp
+ mov up, SAVE_UP
+ mov PARAM_SRC, up
+ cmp rp, up
+ mov %ebx, SAVE_EBX
+ jne L(general)
+ mov GPARAM_SIZE, %ebx C size
+ mov GPARAM_SRC2, up
+ jmp L(inplace)
+
+L(general):
+ mov GPARAM_SIZE, %eax C size
+ mov %ebx, SAVE_EBX
+ incl %eax C size + 1
+ mov up, %ebx C vp
+ mov GPARAM_SRC2, up C up
+ shr %eax C (size+1)\2
+ mov %ebp, SAVE_EBP
+ mov %eax, GPARAM_SIZE
+ jnc L(entry2) C size odd
+
+ add %edx, %edx C size even
+ mov %ecx, %ebp
+ mov (up), %ecx
+ lea -4(rp), rp
+ lea -4(%ebx), %ebx
+ lea (%ebp,%ecx,M), %eax
+ lea 4(up), up
+ jmp L(enteven2)
+
+ ALIGN(16)
+L(oop2):
+ lea (%ecx,%eax,M), %ebp
+ shr $RSH, %eax
+ mov 4(up), %ecx
+ add %edx, %edx
+ lea 8(up), up
+ mov (%ebx), %edx
+ M4_inst %ebp, %edx
+ lea (%eax,%ecx,M), %eax
+ mov %edx, (rp)
+L(enteven2):
+ mov 4(%ebx), %edx
+ lea 8(%ebx), %ebx
+ M4_inst %eax, %edx
+ mov %edx, 4(rp)
+ sbb %edx, %edx
+ shr $RSH, %ecx
+ lea 8(rp), rp
+L(entry2):
+ mov (up), %eax
+ decl GPARAM_SIZE
+ jnz L(oop2)
+
+ lea (%ecx,%eax,M), %ebp
+ shr $RSH, %eax
+ shr %edx
+ mov (%ebx), %edx
+ M4_inst %ebp, %edx
+ mov %edx, (rp)
+ mov SAVE_UP, up
+ adc $0, %eax
+ mov SAVE_EBP, %ebp
+ mov SAVE_EBX, %ebx
+ pop rp FRAME_popl()
+ ret
+EPILOGUE()
+
+ASM_END()
--- /dev/null
+dnl Intel Atom mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- schoolbook Hensel
+dnl division by 1-limb divisor, returning quotient only.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
+include_mpn(`x86/pentium/bdiv_q_1.asm')
--- /dev/null
+dnl Intel Atom mpn_divexact_1 -- mpn by limb exact division.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_divexact_1)
+include_mpn(`x86/pentium/dive_1.asm')
/* Intel Atom/32 gmp-mparam.h -- Compiler/machine parameter header file.
Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
-2008, 2009, 2010 Free Software Foundation, Inc.
+2008, 2009, 2010, 2011 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
/* Generated by tuneup.c */
-#define MOD_1_NORM_THRESHOLD 3
-#define MOD_1_UNNORM_THRESHOLD 9
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 13
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 6
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 16
+#define MOD_1_NORM_THRESHOLD 4
+#define MOD_1_UNNORM_THRESHOLD 8
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 4
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 10
#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 31
-#define USE_PREINV_DIVREM_1 1
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 11
+#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 102
+#define BMOD_1_TO_MOD_1_THRESHOLD 33
-#define MUL_TOOM22_THRESHOLD 16
-#define MUL_TOOM33_THRESHOLD 66
-#define MUL_TOOM44_THRESHOLD 171
-#define MUL_TOOM6H_THRESHOLD 258
-#define MUL_TOOM8H_THRESHOLD 357
+#define MUL_TOOM22_THRESHOLD 22
+#define MUL_TOOM33_THRESHOLD 81
+#define MUL_TOOM44_THRESHOLD 178
+#define MUL_TOOM6H_THRESHOLD 270
+#define MUL_TOOM8H_THRESHOLD 406
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 113
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 129
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 115
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 85
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 126
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 121
#define MUL_TOOM42_TO_TOOM63_THRESHOLD 129
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 113
#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 16
-#define SQR_TOOM3_THRESHOLD 113
-#define SQR_TOOM4_THRESHOLD 193
-#define SQR_TOOM6_THRESHOLD 254
-#define SQR_TOOM8_THRESHOLD 381
+#define SQR_TOOM2_THRESHOLD 32
+#define SQR_TOOM3_THRESHOLD 109
+#define SQR_TOOM4_THRESHOLD 262
+#define SQR_TOOM6_THRESHOLD 396
+#define SQR_TOOM8_THRESHOLD 547
-#define MULMOD_BNM1_THRESHOLD 13
-#define SQRMOD_BNM1_THRESHOLD 11
+#define MULMID_TOOM42_THRESHOLD 54
-#define MUL_FFT_MODF_THRESHOLD 332 /* k = 5 */
+#define MULMOD_BNM1_THRESHOLD 16
+#define SQRMOD_BNM1_THRESHOLD 18
+
+#define MUL_FFT_MODF_THRESHOLD 404 /* k = 5 */
#define MUL_FFT_TABLE3 \
- { { 332, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
- { 11, 5}, { 23, 6}, { 17, 7}, { 9, 6}, \
- { 19, 7}, { 11, 6}, { 25, 7}, { 15, 6}, \
- { 31, 7}, { 19, 8}, { 11, 7}, { 27, 8}, \
- { 15, 7}, { 33, 8}, { 19, 7}, { 39, 8}, \
- { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \
- { 35, 7}, { 71, 8}, { 39, 9}, { 23, 8}, \
+ { { 376, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
+ { 21, 7}, { 11, 6}, { 25, 7}, { 13, 6}, \
+ { 27, 7}, { 15, 6}, { 31, 7}, { 21, 8}, \
+ { 11, 7}, { 27, 8}, { 15, 7}, { 35, 8}, \
+ { 19, 7}, { 39, 8}, { 23, 7}, { 47, 8}, \
+ { 27, 9}, { 15, 8}, { 39, 9}, { 23, 8}, \
{ 51,10}, { 15, 9}, { 31, 8}, { 67, 9}, \
- { 39, 8}, { 79, 9}, { 55,10}, { 31, 9}, \
- { 79,10}, { 47, 9}, { 95,11}, { 31,10}, \
- { 63, 9}, { 127, 8}, { 255, 9}, { 135,10}, \
- { 79, 9}, { 159,10}, { 95, 9}, { 191, 8}, \
- { 383,11}, { 63,10}, { 127, 9}, { 255, 8}, \
- { 511, 9}, { 271,10}, { 143, 9}, { 287, 8}, \
- { 607,10}, { 159, 9}, { 319,11}, { 95,10}, \
- { 191, 9}, { 383,12}, { 63,11}, { 127,10}, \
- { 255, 9}, { 511,10}, { 271, 9}, { 543, 8}, \
- { 1087,10}, { 287, 9}, { 607,11}, { 159,10}, \
- { 351, 9}, { 703, 8}, { 1407,11}, { 191,10}, \
- { 415, 9}, { 831,11}, { 223,10}, { 479, 9}, \
- { 959,12}, { 127,11}, { 255,10}, { 543, 9}, \
- { 1087,11}, { 287,10}, { 607, 9}, { 1215,11}, \
- { 351,10}, { 703, 9}, { 1407,12}, { 191,11}, \
- { 383,10}, { 767,11}, { 415,10}, { 831,11}, \
- { 479,10}, { 959,13}, { 127,12}, { 255,11}, \
- { 543,10}, { 1087,11}, { 607,10}, { 1215,12}, \
- { 319,11}, { 703,10}, { 1407,11}, { 735,10}, \
- { 1471,12}, { 383,11}, { 831,12}, { 447,11}, \
- { 959,10}, { 1919,13}, { 255,12}, { 511,11}, \
- { 1087,12}, { 575,11}, { 1215,10}, { 2431,12}, \
- { 703,11}, { 1471,13}, { 383,12}, { 959,11}, \
- { 1919,14}, { 255,13}, { 511,12}, { 1215,11}, \
- { 2431,13}, { 639,12}, { 1471,11}, { 2943,10}, \
- { 5887,13}, { 767,12}, { 1599,13}, { 895,12}, \
- { 1919,11}, { 3839,14}, { 511,13}, { 1023,12}, \
- { 2111,13}, { 1151,12}, { 2431,13}, { 1407,12}, \
- { 2943,11}, { 5887,14}, { 767,13}, { 1919,12}, \
- { 3839,15}, { 511,14}, { 1023,13}, { 2431,14}, \
- { 1279,13}, { 2943,12}, { 5887,14}, { 1535,13}, \
- { 3199,14}, { 1791,13}, { 3839,12}, { 7679,15}, \
- { 1023,14}, { 16384,15}, { 32768,16} }
-#define MUL_FFT_TABLE3_SIZE 163
-#define MUL_FFT_THRESHOLD 3456
-
-#define SQR_FFT_MODF_THRESHOLD 308 /* k = 5 */
+ { 39, 8}, { 79, 9}, { 47, 8}, { 95,10}, \
+ { 31, 9}, { 79,10}, { 47, 9}, { 95,11}, \
+ { 31,10}, { 63, 9}, { 127, 8}, { 255, 9}, \
+ { 135,10}, { 79, 9}, { 159,10}, { 95, 9}, \
+ { 191,10}, { 111,11}, { 63,10}, { 127, 9}, \
+ { 255, 8}, { 511, 9}, { 271,10}, { 143, 9}, \
+ { 287, 8}, { 575, 9}, { 303,10}, { 159, 9}, \
+ { 319,11}, { 95,10}, { 191, 9}, { 383,12}, \
+ { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \
+ { 271, 9}, { 543,10}, { 287, 9}, { 575,10}, \
+ { 303,11}, { 159,10}, { 319, 9}, { 639,10}, \
+ { 335, 9}, { 671,10}, { 351, 9}, { 703,11}, \
+ { 191,10}, { 383, 9}, { 767,10}, { 415,11}, \
+ { 223,10}, { 447,12}, { 4096,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 86
+#define MUL_FFT_THRESHOLD 4544
+
+#define SQR_FFT_MODF_THRESHOLD 340 /* k = 5 */
#define SQR_FFT_TABLE3 \
- { { 308, 5}, { 13, 6}, { 7, 5}, { 17, 6}, \
- { 9, 5}, { 19, 6}, { 13, 7}, { 7, 6}, \
- { 17, 7}, { 9, 6}, { 19, 7}, { 11, 6}, \
- { 24, 7}, { 15, 6}, { 31, 7}, { 21, 8}, \
- { 11, 7}, { 25, 8}, { 15, 7}, { 31, 8}, \
+ { { 280, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \
+ { 17, 7}, { 9, 6}, { 21, 7}, { 11, 6}, \
+ { 24, 7}, { 13, 6}, { 27, 7}, { 21, 8}, \
+ { 11, 7}, { 27, 8}, { 15, 7}, { 33, 8}, \
{ 19, 7}, { 39, 8}, { 23, 7}, { 47, 8}, \
- { 27, 9}, { 15, 8}, { 31, 4}, { 607, 5}, \
- { 319, 7}, { 95, 8}, { 55, 9}, { 31, 8}, \
- { 63, 9}, { 39, 8}, { 79, 9}, { 47,10}, \
- { 31, 9}, { 79,10}, { 47,11}, { 31,10}, \
+ { 27, 9}, { 15, 8}, { 39, 9}, { 23, 8}, \
+ { 47,10}, { 15, 9}, { 31, 8}, { 63, 9}, \
+ { 39, 8}, { 79, 9}, { 47,10}, { 31, 9}, \
+ { 79,10}, { 47, 9}, { 95,11}, { 31,10}, \
{ 63, 9}, { 127, 8}, { 255, 7}, { 511,10}, \
- { 79, 9}, { 159, 8}, { 319,10}, { 95, 9}, \
- { 191, 8}, { 383,11}, { 63,10}, { 127, 9}, \
- { 255, 8}, { 543, 7}, { 1087, 9}, { 287, 8}, \
- { 607,10}, { 159, 9}, { 319,11}, { 95,10}, \
- { 191, 9}, { 383,12}, { 63,11}, { 127,10}, \
- { 255, 9}, { 543, 8}, { 1087,10}, { 287, 9}, \
- { 607,11}, { 159,10}, { 351, 9}, { 703, 8}, \
- { 1407, 9}, { 735,11}, { 191,10}, { 415, 9}, \
- { 831,11}, { 223,10}, { 479, 9}, { 959, 8}, \
- { 1919,12}, { 127,11}, { 255,10}, { 543, 9}, \
- { 1087,11}, { 287,10}, { 607, 9}, { 1215,11}, \
- { 351,10}, { 703, 9}, { 1407,12}, { 191,11}, \
- { 415,10}, { 831,11}, { 479,10}, { 959, 9}, \
- { 1919,13}, { 127,12}, { 255,11}, { 543,10}, \
- { 1087,11}, { 607,10}, { 1215,12}, { 319,11}, \
- { 703,10}, { 1407,11}, { 735,12}, { 383,11}, \
- { 831,12}, { 447,11}, { 959,10}, { 1919, 9}, \
- { 3839,13}, { 255,12}, { 511,11}, { 1087,12}, \
- { 575,11}, { 1215,10}, { 2431,12}, { 703,11}, \
- { 1407,13}, { 383,12}, { 959,11}, { 1919,10}, \
- { 3839,14}, { 255,13}, { 511,12}, { 1215,11}, \
- { 2431,13}, { 639,12}, { 1471,11}, { 2943,13}, \
- { 767,12}, { 1599,13}, { 895,12}, { 1919,11}, \
- { 3839,14}, { 511,13}, { 1151,12}, { 2431,13}, \
- { 1407,12}, { 2943,14}, { 767,13}, { 1919,12}, \
- { 3839,15}, { 511,14}, { 1023,13}, { 2431,14}, \
- { 1279,13}, { 2943,14}, { 1791,13}, { 3839,15}, \
- { 1023,14}, { 16384,15}, { 32768,16} }
-#define SQR_FFT_TABLE3_SIZE 155
-#define SQR_FFT_THRESHOLD 2368
-
-#define MULLO_BASECASE_THRESHOLD 0 /* always */
-#define MULLO_DC_THRESHOLD 56
-#define MULLO_MUL_N_THRESHOLD 5240
-
-#define DC_DIV_QR_THRESHOLD 59
-#define DC_DIVAPPR_Q_THRESHOLD 216
-#define DC_BDIV_QR_THRESHOLD 56
-#define DC_BDIV_Q_THRESHOLD 136
-
-#define INV_MULMOD_BNM1_THRESHOLD 30
-#define INV_NEWTON_THRESHOLD 260
-#define INV_APPR_THRESHOLD 244
-
-#define BINV_NEWTON_THRESHOLD 266
-#define REDC_1_TO_REDC_N_THRESHOLD 62
-
-#define MU_DIV_QR_THRESHOLD 1308
-#define MU_DIVAPPR_Q_THRESHOLD 1334
-#define MUPI_DIV_QR_THRESHOLD 130
-#define MU_BDIV_QR_THRESHOLD 1017
-#define MU_BDIV_Q_THRESHOLD 1308
+ { 79, 9}, { 159, 8}, { 319, 9}, { 175,10}, \
+ { 95, 9}, { 191, 8}, { 383, 9}, { 207,11}, \
+ { 63,10}, { 127, 9}, { 255, 8}, { 511, 9}, \
+ { 271,10}, { 143, 9}, { 287,10}, { 159, 9}, \
+ { 319,10}, { 175,11}, { 95,10}, { 191, 9}, \
+ { 383,10}, { 207,12}, { 63,11}, { 127,10}, \
+ { 255, 9}, { 511,10}, { 271, 9}, { 543,10}, \
+ { 287,11}, { 159,10}, { 319, 9}, { 639,10}, \
+ { 351, 9}, { 703,11}, { 191,10}, { 415,11}, \
+ { 223,10}, { 479,12}, { 4096,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 82
+#define SQR_FFT_THRESHOLD 3712
+
+#define MULLO_BASECASE_THRESHOLD 6
+#define MULLO_DC_THRESHOLD 53
+#define MULLO_MUL_N_THRESHOLD 8907
+
+#define DC_DIV_QR_THRESHOLD 63
+#define DC_DIVAPPR_Q_THRESHOLD 266
+#define DC_BDIV_QR_THRESHOLD 63
+#define DC_BDIV_Q_THRESHOLD 175
+
+#define INV_MULMOD_BNM1_THRESHOLD 42
+#define INV_NEWTON_THRESHOLD 250
+#define INV_APPR_THRESHOLD 250
+
+#define BINV_NEWTON_THRESHOLD 274
+#define REDC_1_TO_REDC_N_THRESHOLD 68
+
+#define MU_DIV_QR_THRESHOLD 1334
+#define MU_DIVAPPR_Q_THRESHOLD 1442
+#define MUPI_DIV_QR_THRESHOLD 114
+#define MU_BDIV_QR_THRESHOLD 1078
+#define MU_BDIV_Q_THRESHOLD 1334
+
+#define POWM_SEC_TABLE 4,35,258,1084
#define MATRIX22_STRASSEN_THRESHOLD 15
-#define HGCD_THRESHOLD 111
-#define GCD_DC_THRESHOLD 606
-#define GCDEXT_DC_THRESHOLD 273
+#define HGCD_THRESHOLD 135
+#define HGCD_APPR_THRESHOLD 164
+#define HGCD_REDUCE_THRESHOLD 2384
+#define GCD_DC_THRESHOLD 487
+#define GCDEXT_DC_THRESHOLD 342
#define JACOBI_BASE_METHOD 3
-#define GET_STR_DC_THRESHOLD 14
-#define GET_STR_PRECOMPUTE_THRESHOLD 26
-#define SET_STR_DC_THRESHOLD 270
-#define SET_STR_PRECOMPUTE_THRESHOLD 860
+#define GET_STR_DC_THRESHOLD 12
+#define GET_STR_PRECOMPUTE_THRESHOLD 27
+#define SET_STR_DC_THRESHOLD 324
+#define SET_STR_PRECOMPUTE_THRESHOLD 1290
+
+#define FAC_DSC_THRESHOLD 250
+#define FAC_ODD_THRESHOLD 34
--- /dev/null
+dnl Intel Atom mpn_and_n,...,mpn_xnor_n -- bitwise logical operations.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl Contributed to the GNU project by Marco Bodrato.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C op nop opn
+C P5
+C P6 model 0-8,10-12
+C P6 model 9 (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0 (Willamette)
+C P4 model 1 (?)
+C P4 model 2 (Northwood)
+C P4 model 3 (Prescott)
+C P4 model 4 (Nocona)
+C Intel Atom 3 3.5 3.5
+C AMD K6
+C AMD K7
+C AMD K8
+C AMD K10
+
+define(M4_choose_op,
+`ifdef(`OPERATION_$1',`
+define(`M4_function', `mpn_$1')
+define(`M4_want_pre', `$4')
+define(`M4_inst', `$3')
+define(`M4_want_post',`$2')
+')')
+define(M4pre, `ifelse(M4_want_pre, yes,`$1')')
+define(M4post,`ifelse(M4_want_post,yes,`$1')')
+
+M4_choose_op( and_n, , andl, )
+M4_choose_op( andn_n, , andl, yes)
+M4_choose_op( nand_n, yes, andl, )
+M4_choose_op( ior_n, , orl, )
+M4_choose_op( iorn_n, , orl, yes)
+M4_choose_op( nior_n, yes, orl, )
+M4_choose_op( xor_n, , xorl, )
+M4_choose_op( xnor_n, yes, xorl, )
+
+ifdef(`M4_function',,
+`m4_error(`Unrecognised or undefined OPERATION symbol
+')')
+
+MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
+
+C void M4_function (mp_ptr dst, mp_srcptr src2, mp_srcptr src1, mp_size_t size);
+C
+
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_SRC1, 12)
+defframe(PARAM_SRC2, 8)
+defframe(PARAM_DST, 4)
+
+dnl re-use parameter space
+define(SAVE_RP,`PARAM_SIZE')
+define(SAVE_VP,`PARAM_SRC1')
+define(SAVE_UP,`PARAM_DST')
+
+define(`rp', `%edi')
+define(`up', `%esi')
+define(`vp', `%ebx')
+define(`cnt', `%eax')
+define(`r1', `%ecx')
+define(`r2', `%edx')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+deflit(`FRAME',0)
+
+PROLOGUE(M4_function)
+ mov PARAM_SIZE, cnt C size
+ mov rp, SAVE_RP
+ mov PARAM_DST, rp
+ mov up, SAVE_UP
+ mov PARAM_SRC1, up
+ shr cnt C size >> 1
+ mov vp, SAVE_VP
+ mov PARAM_SRC2, vp
+ mov (up), r1
+ jz L(end) C size == 1
+ jnc L(even) C size % 2 == 0
+
+ ALIGN(16)
+L(oop):
+M4pre(` notl_or_xorl_GMP_NUMB_MASK(r1)')
+ M4_inst (vp), r1
+ lea 8(up), up
+ mov -4(up), r2
+M4post(` notl_or_xorl_GMP_NUMB_MASK(r1)')
+ lea 8(vp), vp
+ mov r1, (rp)
+L(entry):
+M4pre(` notl_or_xorl_GMP_NUMB_MASK(r2)')
+ M4_inst -4(vp), r2
+ lea 8(rp), rp
+M4post(` notl_or_xorl_GMP_NUMB_MASK(r2)')
+ dec cnt
+ mov (up), r1
+ mov r2, -4(rp)
+ jnz L(oop)
+
+L(end):
+M4pre(` notl_or_xorl_GMP_NUMB_MASK(r1)')
+ mov SAVE_UP, up
+ M4_inst (vp), r1
+M4post(`notl_or_xorl_GMP_NUMB_MASK(r1)')
+ mov SAVE_VP, vp
+ mov r1, (rp)
+ mov SAVE_RP, rp
+ ret
+
+L(even):
+ mov r1, r2
+ lea 4(up), up
+ lea 4(vp), vp
+ lea -4(rp), rp
+ jmp L(entry)
+EPILOGUE()
+ASM_END()
--- /dev/null
+dnl Intel Atom mpn_lshift -- mpn left shift.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned cnt);
+
+C cycles/limb
+C cnt!=1 cnt==1
+C P5
+C P6 model 0-8,10-12
+C P6 model 9 (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0 (Willamette)
+C P4 model 1 (?)
+C P4 model 2 (Northwood)
+C P4 model 3 (Prescott)
+C P4 model 4 (Nocona)
+C Intel Atom 5 2.5
+C AMD K6
+C AMD K7
+C AMD K8
+C AMD K10
+
+defframe(PARAM_CNT, 16)
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+dnl re-use parameter space
+define(SAVE_UP,`PARAM_CNT')
+define(VAR_COUNT,`PARAM_SIZE')
+define(SAVE_EBX,`PARAM_SRC')
+define(SAVE_EBP,`PARAM_DST')
+
+define(`rp', `%edi')
+define(`up', `%esi')
+define(`cnt', `%ecx')
+
+ASM_START()
+ TEXT
+ ALIGN(8)
+deflit(`FRAME',0)
+PROLOGUE(mpn_lshift)
+ mov PARAM_CNT, cnt
+ mov PARAM_SIZE, %edx
+ mov up, SAVE_UP
+ mov PARAM_SRC, up
+ push rp FRAME_pushl()
+ mov PARAM_DST, rp
+
+C We can use faster code for shift-by-1 under certain conditions.
+ cmp $1,cnt
+ jne L(normal)
+ cmpl rp, up
+ jnc L(special) C jump if s_ptr + 1 >= res_ptr
+ leal (up,%edx,4),%eax
+ cmpl %eax,rp
+ jnc L(special) C jump if res_ptr >= s_ptr + size
+
+L(normal):
+ lea -4(up,%edx,4), up
+ mov %ebx, SAVE_EBX
+ lea -4(rp,%edx,4), rp
+
+ shr %edx
+ mov (up), %eax
+ mov %edx, VAR_COUNT
+ jnc L(evn)
+
+ mov %eax, %ebx
+ shl %cl, %ebx
+ neg cnt
+ shr %cl, %eax
+ test %edx, %edx
+ jnz L(gt1)
+ mov %ebx, (rp)
+ jmp L(quit)
+
+L(gt1): mov %ebp, SAVE_EBP
+ push %eax
+ mov -4(up), %eax
+ mov %eax, %ebp
+ shr %cl, %eax
+ jmp L(lo1)
+
+L(evn): mov %ebp, SAVE_EBP
+ neg cnt
+ mov %eax, %ebp
+ mov -4(up), %edx
+ shr %cl, %eax
+ mov %edx, %ebx
+ shr %cl, %edx
+ neg cnt
+ decl VAR_COUNT
+ lea 4(rp), rp
+ lea -4(up), up
+ jz L(end)
+ push %eax FRAME_pushl()
+
+ ALIGN(8)
+L(top): shl %cl, %ebp
+ or %ebp, %edx
+ shl %cl, %ebx
+ neg cnt
+ mov -4(up), %eax
+ mov %eax, %ebp
+ mov %edx, -4(rp)
+ shr %cl, %eax
+ lea -8(rp), rp
+L(lo1): mov -8(up), %edx
+ or %ebx, %eax
+ mov %edx, %ebx
+ shr %cl, %edx
+ lea -8(up), up
+ neg cnt
+ mov %eax, (rp)
+ decl VAR_COUNT
+ jg L(top)
+
+ pop %eax FRAME_popl()
+L(end):
+ shl %cl, %ebp
+ shl %cl, %ebx
+ or %ebp, %edx
+ mov SAVE_EBP, %ebp
+ mov %edx, -4(rp)
+ mov %ebx, -8(rp)
+
+L(quit):
+ mov SAVE_UP, up
+ mov SAVE_EBX, %ebx
+ pop rp FRAME_popl()
+ ret
+
+L(special):
+deflit(`FRAME',4)
+ lea 3(%edx), %eax C size + 3
+ dec %edx C size - 1
+ mov (up), %ecx
+ shr $2, %eax C (size + 3) / 4
+ and $3, %edx C (size - 1) % 4
+ jz L(goloop) C jmp if size == 1 (mod 4)
+ shr %edx
+ jnc L(odd) C jum if size == 3 (mod 4)
+
+ add %ecx, %ecx
+ lea 4(up), up
+ mov %ecx, (rp)
+ mov (up), %ecx
+ lea 4(rp), rp
+
+ dec %edx
+ jnz L(goloop) C jump if size == 0 (mod 4)
+L(odd): lea -8(up), up
+ lea -8(rp), rp
+ jmp L(sentry) C reached if size == 2 or 3 (mod 4)
+
+L(sloop):
+ adc %ecx, %ecx
+ mov 4(up), %edx
+ mov %ecx, (rp)
+ adc %edx, %edx
+ mov 8(up), %ecx
+ mov %edx, 4(rp)
+L(sentry):
+ adc %ecx, %ecx
+ mov 12(up), %edx
+ mov %ecx, 8(rp)
+ adc %edx, %edx
+ lea 16(up), up
+ mov %edx, 12(rp)
+ lea 16(rp), rp
+ mov (up), %ecx
+L(goloop):
+ decl %eax
+ jnz L(sloop)
+
+L(squit):
+ adc %ecx, %ecx
+ mov %ecx, (rp)
+ adc %eax, %eax
+
+ mov SAVE_UP, up
+ pop rp FRAME_popl()
+ ret
+EPILOGUE()
+ASM_END()
--- /dev/null
+dnl Intel Atom mpn_lshiftc -- mpn left shift with complement.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C mp_limb_t mpn_lshiftc (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned cnt);
+
+C cycles/limb
+C P5
+C P6 model 0-8,10-12
+C P6 model 9 (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0 (Willamette)
+C P4 model 1 (?)
+C P4 model 2 (Northwood)
+C P4 model 3 (Prescott)
+C P4 model 4 (Nocona)
+C Intel Atom 5.5
+C AMD K6
+C AMD K7
+C AMD K8
+C AMD K10
+
+defframe(PARAM_CNT, 16)
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+dnl re-use parameter space
+define(SAVE_UP,`PARAM_CNT')
+define(VAR_COUNT,`PARAM_SIZE')
+define(SAVE_EBX,`PARAM_SRC')
+define(SAVE_EBP,`PARAM_DST')
+
+define(`rp', `%edi')
+define(`up', `%esi')
+define(`cnt', `%ecx')
+
+ASM_START()
+ TEXT
+
+PROLOGUE(mpn_lshiftc)
+deflit(`FRAME',0)
+ mov PARAM_CNT, cnt
+ mov PARAM_SIZE, %edx
+ mov up, SAVE_UP
+ mov PARAM_SRC, up
+ push rp FRAME_pushl()
+ mov PARAM_DST, rp
+
+ lea -4(up,%edx,4), up
+ mov %ebx, SAVE_EBX
+ lea -4(rp,%edx,4), rp
+
+ shr %edx
+ mov (up), %eax
+ mov %edx, VAR_COUNT
+ jnc L(evn)
+
+ mov %eax, %ebx
+ shl %cl, %ebx
+ neg cnt
+ shr %cl, %eax
+ test %edx, %edx
+ jnz L(gt1)
+ not %ebx
+ mov %ebx, (rp)
+ jmp L(quit)
+
+L(gt1): mov %ebp, SAVE_EBP
+ push %eax
+ mov -4(up), %eax
+ mov %eax, %ebp
+ shr %cl, %eax
+ jmp L(lo1)
+
+L(evn): mov %ebp, SAVE_EBP
+ neg cnt
+ mov %eax, %ebp
+ mov -4(up), %edx
+ shr %cl, %eax
+ mov %edx, %ebx
+ shr %cl, %edx
+ neg cnt
+ decl VAR_COUNT
+ lea 4(rp), rp
+ lea -4(up), up
+ jz L(end)
+ push %eax FRAME_pushl()
+
+L(top): shl %cl, %ebp
+ or %ebp, %edx
+ shl %cl, %ebx
+ neg cnt
+ not %edx
+ mov -4(up), %eax
+ mov %eax, %ebp
+ mov %edx, -4(rp)
+ shr %cl, %eax
+ lea -8(rp), rp
+L(lo1): mov -8(up), %edx
+ or %ebx, %eax
+ mov %edx, %ebx
+ shr %cl, %edx
+ not %eax
+ lea -8(up), up
+ neg cnt
+ mov %eax, (rp)
+ decl VAR_COUNT
+ jg L(top)
+
+ pop %eax FRAME_popl()
+L(end):
+ shl %cl, %ebp
+ shl %cl, %ebx
+ or %ebp, %edx
+ mov SAVE_EBP, %ebp
+ not %edx
+ not %ebx
+ mov %edx, -4(rp)
+ mov %ebx, -8(rp)
+
+L(quit):
+ mov SAVE_UP, up
+ mov SAVE_EBX, %ebx
+ pop rp FRAME_popl()
+ ret
+EPILOGUE()
+ASM_END()
--- /dev/null
+dnl Intel Atom mpn_copyd -- copy limb vector, decrementing.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_copyd)
+include_mpn(`x86/k7/mmx/copyd.asm')
--- /dev/null
+dnl Intel Atom mpn_copyi -- copy limb vector, incrementing.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_copyi)
+include_mpn(`x86/k7/mmx/copyi.asm')
--- /dev/null
+dnl Intel Atom mpn_hamdist -- hamming distance.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_hamdist)
+include_mpn(`x86/k7/mmx/popham.asm')
--- /dev/null
+dnl Intel Atom mpn_mod_34lsub1 -- remainder modulo 2^24-1.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_mod_34lsub1)
+include_mpn(`x86/p6/mod_34lsub1.asm')
--- /dev/null
+dnl Intel Atom mpn_modexact_1_odd -- exact division style remainder.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_modexact_1_odd mpn_modexact_1c_odd)
+include_mpn(`x86/pentium/mode1o.asm')
--- /dev/null
+dnl Intel Atom mpn_rshift -- mpn right shift.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl Converted from AMD64 by Marco Bodrato.
+
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C unsigned cnt);
+
+C cycles/limb
+C P5
+C P6 model 0-8,10-12
+C P6 model 9 (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0 (Willamette)
+C P4 model 1 (?)
+C P4 model 2 (Northwood)
+C P4 model 3 (Prescott)
+C P4 model 4 (Nocona)
+C Intel Atom 5
+C AMD K6
+C AMD K7
+C AMD K8
+C AMD K10
+
+defframe(PARAM_CNT, 16)
+defframe(PARAM_SIZE,12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+dnl re-use parameter space
+define(SAVE_UP,`PARAM_CNT')
+define(VAR_COUNT,`PARAM_SIZE')
+define(SAVE_EBX,`PARAM_SRC')
+define(SAVE_EBP,`PARAM_DST')
+
+define(`rp', `%edi')
+define(`up', `%esi')
+define(`cnt', `%ecx')
+
+ASM_START()
+ TEXT
+ ALIGN(8)
+deflit(`FRAME',0)
+PROLOGUE(mpn_rshift)
+ mov PARAM_CNT, cnt
+ mov PARAM_SIZE, %edx
+ mov up, SAVE_UP
+ mov PARAM_SRC, up
+ push rp FRAME_pushl()
+ mov PARAM_DST, rp
+ mov %ebx, SAVE_EBX
+
+ shr %edx
+ mov (up), %eax
+ mov %edx, VAR_COUNT
+ jnc L(evn)
+
+ mov %eax, %ebx
+ shr %cl, %ebx
+ neg cnt
+ shl %cl, %eax
+ test %edx, %edx
+ jnz L(gt1)
+ mov %ebx, (rp)
+ jmp L(quit)
+
+L(gt1): mov %ebp, SAVE_EBP
+ push %eax
+ mov 4(up), %eax
+ mov %eax, %ebp
+ shl %cl, %eax
+ jmp L(lo1)
+
+L(evn): mov %ebp, SAVE_EBP
+ neg cnt
+ mov %eax, %ebp
+ mov 4(up), %edx
+ shl %cl, %eax
+ mov %edx, %ebx
+ shl %cl, %edx
+ neg cnt
+ decl VAR_COUNT
+ lea -4(rp), rp
+ lea 4(up), up
+ jz L(end)
+ push %eax FRAME_pushl()
+
+ ALIGN(8)
+L(top): shr %cl, %ebp
+ or %ebp, %edx
+ shr %cl, %ebx
+ neg cnt
+ mov 4(up), %eax
+ mov %eax, %ebp
+ mov %edx, 4(rp)
+ shl %cl, %eax
+ lea 8(rp), rp
+L(lo1): mov 8(up), %edx
+ or %ebx, %eax
+ mov %edx, %ebx
+ shl %cl, %edx
+ lea 8(up), up
+ neg cnt
+ mov %eax, (rp)
+ decl VAR_COUNT
+ jg L(top)
+
+ pop %eax FRAME_popl()
+L(end):
+ shr %cl, %ebp
+ shr %cl, %ebx
+ or %ebp, %edx
+ mov SAVE_EBP, %ebp
+ mov %edx, 4(rp)
+ mov %ebx, 8(rp)
+
+L(quit):
+ mov SAVE_UP, up
+ mov SAVE_EBX, %ebx
+ pop rp FRAME_popl()
+ ret
+EPILOGUE()
+ASM_END()
--- /dev/null
+dnl x86-32 mpn_addmul_1 and mpn_submul_1 optimised for Intel Atom.
+
+dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+dnl
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C cycles/limb
+C P5 -
+C P6 model 0-8,10-12 -
+C P6 model 9 (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0 (Willamette)
+C P4 model 1 (?)
+C P4 model 2 (Northwood)
+C P4 model 3 (Prescott)
+C P4 model 4 (Nocona)
+C Intel Atom 8
+C AMD K6
+C AMD K7 -
+C AMD K8
+C AMD K10
+
+define(`rp', `%edi')
+define(`up', `%esi')
+define(`n', `%ecx')
+
+ifdef(`OPERATION_addmul_1',`
+ define(ADDSUB, add)
+ define(func_1, mpn_addmul_1)
+ define(func_1c, mpn_addmul_1c)')
+ifdef(`OPERATION_submul_1',`
+ define(ADDSUB, sub)
+ define(func_1, mpn_submul_1)
+ define(func_1c, mpn_submul_1c)')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c)
+
+ TEXT
+ ALIGN(16)
+PROLOGUE(func_1)
+ xor %edx, %edx
+L(ent): push %edi
+ push %esi
+ push %ebx
+ mov 16(%esp), rp
+ mov 20(%esp), up
+ mov 24(%esp), n
+ movd 28(%esp), %mm7
+ test $1, n
+ jz L(fi0or2)
+ movd (up), %mm0
+ pmuludq %mm7, %mm0
+ shr $2, n
+ jnc L(fi1)
+
+L(fi3): lea -8(up), up
+ lea -8(rp), rp
+ movd 12(up), %mm1
+ movd %mm0, %ebx
+ pmuludq %mm7, %mm1
+ add $1, n C increment and clear carry
+ jmp L(lo3)
+
+L(fi1): movd %mm0, %ebx
+ jz L(wd1)
+ movd 4(up), %mm1
+ pmuludq %mm7, %mm1
+ jmp L(lo1)
+
+L(fi0or2):
+ movd (up), %mm1
+ pmuludq %mm7, %mm1
+ shr $2, n
+ movd 4(up), %mm0
+ jc L(fi2)
+ lea -4(up), up
+ lea -4(rp), rp
+ movd %mm1, %eax
+ pmuludq %mm7, %mm0
+ jmp L(lo0)
+
+L(fi2): lea 4(up), up
+ add $1, n C increment and clear carry
+ movd %mm1, %eax
+ lea -12(rp), rp
+ jmp L(lo2)
+
+C ALIGN(16) C alignment seems irrelevant
+L(top): movd 4(up), %mm1
+ adc $0, %edx
+ ADDSUB %eax, 12(rp)
+ movd %mm0, %ebx
+ pmuludq %mm7, %mm1
+ lea 16(rp), rp
+L(lo1): psrlq $32, %mm0
+ adc %edx, %ebx
+ movd %mm0, %edx
+ movd %mm1, %eax
+ movd 8(up), %mm0
+ pmuludq %mm7, %mm0
+ adc $0, %edx
+ ADDSUB %ebx, (rp)
+L(lo0): psrlq $32, %mm1
+ adc %edx, %eax
+ movd %mm1, %edx
+ movd %mm0, %ebx
+ movd 12(up), %mm1
+ pmuludq %mm7, %mm1
+ adc $0, %edx
+ ADDSUB %eax, 4(rp)
+L(lo3): psrlq $32, %mm0
+ adc %edx, %ebx
+ movd %mm0, %edx
+ movd %mm1, %eax
+ lea 16(up), up
+ movd (up), %mm0
+ adc $0, %edx
+ ADDSUB %ebx, 8(rp)
+L(lo2): psrlq $32, %mm1
+ adc %edx, %eax
+ movd %mm1, %edx
+ pmuludq %mm7, %mm0
+ dec n
+ jnz L(top)
+
+L(end): adc n, %edx C n is zero here
+ ADDSUB %eax, 12(rp)
+ movd %mm0, %ebx
+ lea 16(rp), rp
+L(wd1): psrlq $32, %mm0
+ adc %edx, %ebx
+ movd %mm0, %eax
+ adc n, %eax
+ ADDSUB %ebx, (rp)
+ emms
+ adc n, %eax
+ pop %ebx
+ pop %esi
+ pop %edi
+ ret
+EPILOGUE()
+PROLOGUE(func_1c)
+ mov 20(%esp), %edx C carry
+ jmp L(ent)
+EPILOGUE()
--- /dev/null
+dnl Intel Atom mpn_bdiv_dbm1.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_bdiv_dbm1c)
+include_mpn(`x86/pentium4/sse2/bdiv_dbm1c.asm')
--- /dev/null
+dnl Intel Atom mpn_divrem_1 -- mpn by limb division.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_preinv_divrem_1 mpn_divrem_1c mpn_divrem_1)
+include_mpn(`x86/pentium4/sse2/divrem_1.asm')
--- /dev/null
+dnl Intel Atom/SSE2 mpn_mod_1_1.
+
+dnl Copyright 2009, 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_mod_1_1p)
+include_mpn(`x86/pentium4/sse2/mod_1_1.asm')
--- /dev/null
+dnl Intel Atom/SSE2 mpn_mod_1_4.
+
+dnl Copyright 2009, 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_mod_1s_4p)
+include_mpn(`x86/pentium4/sse2/mod_1_4.asm')
--- /dev/null
+dnl Intel Atom mpn_mul_1.
+
+dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+dnl
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C cycles/limb
+C P5 -
+C P6 model 0-8,10-12 -
+C P6 model 9 (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0 (Willamette)
+C P4 model 1 (?)
+C P4 model 2 (Northwood)
+C P4 model 3 (Prescott)
+C P4 model 4 (Nocona)
+C Intel Atom 7.5
+C AMD K6 -
+C AMD K7 -
+C AMD K8
+C AMD K10
+
+defframe(PARAM_CARRY,20)
+defframe(PARAM_MUL, 16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+define(`rp', `%edx')
+define(`up', `%esi')
+define(`n', `%ecx')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+deflit(`FRAME',0)
+
+PROLOGUE(mpn_mul_1c)
+ movd PARAM_CARRY, %mm6 C carry
+ jmp L(ent)
+EPILOGUE()
+
+ ALIGN(8) C for compact code
+PROLOGUE(mpn_mul_1)
+ pxor %mm6, %mm6
+L(ent): push %esi FRAME_pushl()
+ mov PARAM_SRC, up
+ mov PARAM_SIZE, %eax C size
+ movd PARAM_MUL, %mm7
+ movd (up), %mm0
+ mov %eax, n
+ and $3, %eax
+ pmuludq %mm7, %mm0
+ mov PARAM_DST, rp
+ jz L(lo0)
+ cmp $2, %eax
+ lea -16(up,%eax,4),up
+ lea -16(rp,%eax,4),rp
+ jc L(lo1)
+ jz L(lo2)
+ jmp L(lo3)
+
+ ALIGN(16)
+L(top): movd (up), %mm0
+ pmuludq %mm7, %mm0
+ psrlq $32, %mm6
+ lea 16(rp), rp
+L(lo0): paddq %mm0, %mm6
+ movd 4(up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, (rp)
+ psrlq $32, %mm6
+L(lo3): paddq %mm0, %mm6
+ movd 8(up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, 4(rp)
+ psrlq $32, %mm6
+L(lo2): paddq %mm0, %mm6
+ movd 12(up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, 8(rp)
+ psrlq $32, %mm6
+L(lo1): paddq %mm0, %mm6
+ sub $4, n
+ movd %mm6, 12(rp)
+ lea 16(up), up
+ ja L(top)
+
+ psrlq $32, %mm6
+ movd %mm6, %eax
+ emms
+ pop %esi FRAME_popl()
+ ret
+EPILOGUE()
+ASM_END()
--- /dev/null
+dnl x86 mpn_mul_basecase -- Multiply two limb vectors and store the result in
+dnl a third limb vector.
+
+dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+dnl
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C TODO
+C * Check if 'jmp N(%esp)' is well-predicted enough to allow us to combine the
+C 4 large loops into one; we could use it for the outer loop branch.
+C * Optimise code outside of inner loops.
+C * Write combined addmul_1 feed-in a wind-down code, and use when iterating
+C outer each loop. ("Overlapping software pipelining")
+C * Postpone push of ebx until we know vn > 1. Perhaps use caller-saves regs
+C for inlined mul_1, allowing us to postpone all pushes.
+C * Perhaps write special code for vn <= un < M, for some small M.
+
+C void mpn_mul_basecase (mp_ptr wp,
+C mp_srcptr xp, mp_size_t xn,
+C mp_srcptr yp, mp_size_t yn);
+C
+
+define(`rp', `%edi')
+define(`up', `%esi')
+define(`un', `%ecx')
+define(`vp', `%ebp')
+define(`vn', `36(%esp)')
+
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_mul_basecase)
+ push %edi
+ push %esi
+ push %ebx
+ push %ebp
+ mov 20(%esp), rp
+ mov 24(%esp), up
+ mov 28(%esp), un
+ mov 32(%esp), vp
+
+ movd (up), %mm0
+ movd (vp), %mm7
+ pmuludq %mm7, %mm0
+ pxor %mm6, %mm6
+
+ mov un, %eax
+ and $3, %eax
+ jz L(of0)
+ cmp $2, %eax
+ jc L(of1)
+ jz L(of2)
+
+C ================================================================
+ jmp L(m3)
+ ALIGN(16)
+L(lm3): movd -4(up), %mm0
+ pmuludq %mm7, %mm0
+ psrlq $32, %mm6
+ lea 16(rp), rp
+ paddq %mm0, %mm6
+ movd (up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, -4(rp)
+ psrlq $32, %mm6
+L(m3): paddq %mm0, %mm6
+ movd 4(up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, (rp)
+ psrlq $32, %mm6
+ paddq %mm0, %mm6
+ movd 8(up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, 4(rp)
+ psrlq $32, %mm6
+ paddq %mm0, %mm6
+ sub $4, un
+ movd %mm6, 8(rp)
+ lea 16(up), up
+ ja L(lm3)
+
+ psrlq $32, %mm6
+ movd %mm6, 12(rp)
+
+ decl vn
+ jz L(done)
+ lea -8(rp), rp
+
+L(ol3): mov 28(%esp), un
+ neg un
+ lea 4(vp), vp
+ movd (vp), %mm7 C read next V limb
+ mov 24(%esp), up
+ lea 16(rp,un,4), rp
+
+ movd (up), %mm0
+ pmuludq %mm7, %mm0
+ sar $2, un
+ movd 4(up), %mm1
+ movd %mm0, %ebx
+ pmuludq %mm7, %mm1
+ lea -8(up), up
+ xor %edx, %edx C zero edx and CF
+ jmp L(a3)
+
+L(la3): movd 4(up), %mm1
+ adc $0, %edx
+ add %eax, 12(rp)
+ movd %mm0, %ebx
+ pmuludq %mm7, %mm1
+ lea 16(rp), rp
+ psrlq $32, %mm0
+ adc %edx, %ebx
+ movd %mm0, %edx
+ movd %mm1, %eax
+ movd 8(up), %mm0
+ pmuludq %mm7, %mm0
+ adc $0, %edx
+ add %ebx, (rp)
+ psrlq $32, %mm1
+ adc %edx, %eax
+ movd %mm1, %edx
+ movd %mm0, %ebx
+ movd 12(up), %mm1
+ pmuludq %mm7, %mm1
+ adc $0, %edx
+ add %eax, 4(rp)
+L(a3): psrlq $32, %mm0
+ adc %edx, %ebx
+ movd %mm0, %edx
+ movd %mm1, %eax
+ lea 16(up), up
+ movd (up), %mm0
+ adc $0, %edx
+ add %ebx, 8(rp)
+ psrlq $32, %mm1
+ adc %edx, %eax
+ movd %mm1, %edx
+ pmuludq %mm7, %mm0
+ inc un
+ jnz L(la3)
+
+ adc un, %edx C un is zero here
+ add %eax, 12(rp)
+ movd %mm0, %ebx
+ psrlq $32, %mm0
+ adc %edx, %ebx
+ movd %mm0, %eax
+ adc un, %eax
+ add %ebx, 16(rp)
+ adc un, %eax
+ mov %eax, 20(rp)
+
+ decl vn
+ jnz L(ol3)
+ jmp L(done)
+
+C ================================================================
+ ALIGN(16)
+L(lm0): movd (up), %mm0
+ pmuludq %mm7, %mm0
+ psrlq $32, %mm6
+ lea 16(rp), rp
+L(of0): paddq %mm0, %mm6
+ movd 4(up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, (rp)
+ psrlq $32, %mm6
+ paddq %mm0, %mm6
+ movd 8(up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, 4(rp)
+ psrlq $32, %mm6
+ paddq %mm0, %mm6
+ movd 12(up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, 8(rp)
+ psrlq $32, %mm6
+ paddq %mm0, %mm6
+ sub $4, un
+ movd %mm6, 12(rp)
+ lea 16(up), up
+ ja L(lm0)
+
+ psrlq $32, %mm6
+ movd %mm6, 16(rp)
+
+ decl vn
+ jz L(done)
+ lea -4(rp), rp
+
+L(ol0): mov 28(%esp), un
+ neg un
+ lea 4(vp), vp
+ movd (vp), %mm7 C read next V limb
+ mov 24(%esp), up
+ lea 20(rp,un,4), rp
+
+ movd (up), %mm1
+ pmuludq %mm7, %mm1
+ sar $2, un
+ movd 4(up), %mm0
+ lea -4(up), up
+ movd %mm1, %eax
+ pmuludq %mm7, %mm0
+ xor %edx, %edx C zero edx and CF
+ jmp L(a0)
+
+L(la0): movd 4(up), %mm1
+ adc $0, %edx
+ add %eax, 12(rp)
+ movd %mm0, %ebx
+ pmuludq %mm7, %mm1
+ lea 16(rp), rp
+ psrlq $32, %mm0
+ adc %edx, %ebx
+ movd %mm0, %edx
+ movd %mm1, %eax
+ movd 8(up), %mm0
+ pmuludq %mm7, %mm0
+ adc $0, %edx
+ add %ebx, (rp)
+L(a0): psrlq $32, %mm1
+ adc %edx, %eax
+ movd %mm1, %edx
+ movd %mm0, %ebx
+ movd 12(up), %mm1
+ pmuludq %mm7, %mm1
+ adc $0, %edx
+ add %eax, 4(rp)
+ psrlq $32, %mm0
+ adc %edx, %ebx
+ movd %mm0, %edx
+ movd %mm1, %eax
+ lea 16(up), up
+ movd (up), %mm0
+ adc $0, %edx
+ add %ebx, 8(rp)
+ psrlq $32, %mm1
+ adc %edx, %eax
+ movd %mm1, %edx
+ pmuludq %mm7, %mm0
+ inc un
+ jnz L(la0)
+
+ adc un, %edx C un is zero here
+ add %eax, 12(rp)
+ movd %mm0, %ebx
+ psrlq $32, %mm0
+ adc %edx, %ebx
+ movd %mm0, %eax
+ adc un, %eax
+ add %ebx, 16(rp)
+ adc un, %eax
+ mov %eax, 20(rp)
+
+ decl vn
+ jnz L(ol0)
+ jmp L(done)
+
+C ================================================================
+ ALIGN(16)
+L(lm1): movd -12(up), %mm0
+ pmuludq %mm7, %mm0
+ psrlq $32, %mm6
+ lea 16(rp), rp
+ paddq %mm0, %mm6
+ movd -8(up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, -12(rp)
+ psrlq $32, %mm6
+ paddq %mm0, %mm6
+ movd -4(up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, -8(rp)
+ psrlq $32, %mm6
+ paddq %mm0, %mm6
+ movd (up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, -4(rp)
+ psrlq $32, %mm6
+L(of1): paddq %mm0, %mm6
+ sub $4, un
+ movd %mm6, (rp)
+ lea 16(up), up
+ ja L(lm1)
+
+ psrlq $32, %mm6
+ movd %mm6, 4(rp)
+
+ decl vn
+ jz L(done)
+ lea -16(rp), rp
+
+L(ol1): mov 28(%esp), un
+ neg un
+ lea 4(vp), vp
+ movd (vp), %mm7 C read next V limb
+ mov 24(%esp), up
+ lea 24(rp,un,4), rp
+
+ movd (up), %mm0
+ pmuludq %mm7, %mm0
+ sar $2, un
+ movd %mm0, %ebx
+ movd 4(up), %mm1
+ pmuludq %mm7, %mm1
+ xor %edx, %edx C zero edx and CF
+ inc un
+ jmp L(a1)
+
+L(la1): movd 4(up), %mm1
+ adc $0, %edx
+ add %eax, 12(rp)
+ movd %mm0, %ebx
+ pmuludq %mm7, %mm1
+ lea 16(rp), rp
+L(a1): psrlq $32, %mm0
+ adc %edx, %ebx
+ movd %mm0, %edx
+ movd %mm1, %eax
+ movd 8(up), %mm0
+ pmuludq %mm7, %mm0
+ adc $0, %edx
+ add %ebx, (rp)
+ psrlq $32, %mm1
+ adc %edx, %eax
+ movd %mm1, %edx
+ movd %mm0, %ebx
+ movd 12(up), %mm1
+ pmuludq %mm7, %mm1
+ adc $0, %edx
+ add %eax, 4(rp)
+ psrlq $32, %mm0
+ adc %edx, %ebx
+ movd %mm0, %edx
+ movd %mm1, %eax
+ lea 16(up), up
+ movd (up), %mm0
+ adc $0, %edx
+ add %ebx, 8(rp)
+ psrlq $32, %mm1
+ adc %edx, %eax
+ movd %mm1, %edx
+ pmuludq %mm7, %mm0
+ inc un
+ jnz L(la1)
+
+ adc un, %edx C un is zero here
+ add %eax, 12(rp)
+ movd %mm0, %ebx
+ psrlq $32, %mm0
+ adc %edx, %ebx
+ movd %mm0, %eax
+ adc un, %eax
+ add %ebx, 16(rp)
+ adc un, %eax
+ mov %eax, 20(rp)
+
+ decl vn
+ jnz L(ol1)
+ jmp L(done)
+
+C ================================================================
+ ALIGN(16)
+L(lm2): movd -8(up), %mm0
+ pmuludq %mm7, %mm0
+ psrlq $32, %mm6
+ lea 16(rp), rp
+ paddq %mm0, %mm6
+ movd -4(up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, -8(rp)
+ psrlq $32, %mm6
+ paddq %mm0, %mm6
+ movd (up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, -4(rp)
+ psrlq $32, %mm6
+L(of2): paddq %mm0, %mm6
+ movd 4(up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, (rp)
+ psrlq $32, %mm6
+ paddq %mm0, %mm6
+ sub $4, un
+ movd %mm6, 4(rp)
+ lea 16(up), up
+ ja L(lm2)
+
+ psrlq $32, %mm6
+ movd %mm6, 8(rp)
+
+ decl vn
+ jz L(done)
+ lea -12(rp), rp
+
+L(ol2): mov 28(%esp), un
+ neg un
+ lea 4(vp), vp
+ movd (vp), %mm7 C read next V limb
+ mov 24(%esp), up
+ lea 12(rp,un,4), rp
+
+ movd (up), %mm1
+ pmuludq %mm7, %mm1
+ sar $2, un
+ movd 4(up), %mm0
+ lea 4(up), up
+ movd %mm1, %eax
+ xor %edx, %edx C zero edx and CF
+ jmp L(lo2)
+
+L(la2): movd 4(up), %mm1
+ adc $0, %edx
+ add %eax, 12(rp)
+ movd %mm0, %ebx
+ pmuludq %mm7, %mm1
+ lea 16(rp), rp
+ psrlq $32, %mm0
+ adc %edx, %ebx
+ movd %mm0, %edx
+ movd %mm1, %eax
+ movd 8(up), %mm0
+ pmuludq %mm7, %mm0
+ adc $0, %edx
+ add %ebx, (rp)
+ psrlq $32, %mm1
+ adc %edx, %eax
+ movd %mm1, %edx
+ movd %mm0, %ebx
+ movd 12(up), %mm1
+ pmuludq %mm7, %mm1
+ adc $0, %edx
+ add %eax, 4(rp)
+ psrlq $32, %mm0
+ adc %edx, %ebx
+ movd %mm0, %edx
+ movd %mm1, %eax
+ lea 16(up), up
+ movd (up), %mm0
+ adc $0, %edx
+ add %ebx, 8(rp)
+L(lo2): psrlq $32, %mm1
+ adc %edx, %eax
+ movd %mm1, %edx
+ pmuludq %mm7, %mm0
+ inc un
+ jnz L(la2)
+
+ adc un, %edx C un is zero here
+ add %eax, 12(rp)
+ movd %mm0, %ebx
+ psrlq $32, %mm0
+ adc %edx, %ebx
+ movd %mm0, %eax
+ adc un, %eax
+ add %ebx, 16(rp)
+ adc un, %eax
+ mov %eax, 20(rp)
+
+ decl vn
+ jnz L(ol2)
+C jmp L(done)
+
+C ================================================================
+L(done):
+ emms
+ pop %ebp
+ pop %ebx
+ pop %esi
+ pop %edi
+ ret
+EPILOGUE()
--- /dev/null
+dnl Intel Atom mpn_popcount -- population count.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+MULFUNC_PROLOGUE(mpn_popcount)
+include_mpn(`x86/pentium4/sse2/popcount.asm')
--- /dev/null
+dnl x86 mpn_sqr_basecase -- square an mpn number, optimised for atom.
+
+dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+dnl
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C TODO
+C * Check if 'jmp N(%esp)' is well-predicted enough to allow us to combine the
+C 4 large loops into one; we could use it for the outer loop branch.
+C * Optimise code outside of inner loops.
+C * Write combined addmul_1 feed-in a wind-down code, and use when iterating
+C outer each loop. ("Overlapping software pipelining")
+C * Perhaps use caller-saves regs for inlined mul_1, allowing us to postpone
+C all pushes.
+C * Perhaps write special code for n < M, for some small M.
+C * Replace inlined addmul_1 with smaller code from aorsmul_1.asm, or perhaps
+C with even less pipelined code.
+C * We run the outer loop until we have a 2-limb by 1-limb addmul_1 left.
+C Consider breaking out earlier, saving high the cost of short loops.
+
+C void mpn_sqr_basecase (mp_ptr wp,
+C mp_srcptr xp, mp_size_t xn);
+
+define(`rp', `%edi')
+define(`up', `%esi')
+define(`n', `%ecx')
+
+define(`un', `%ebp')
+
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_sqr_basecase)
+ push %edi
+ push %esi
+ mov 12(%esp), rp
+ mov 16(%esp), up
+ mov 20(%esp), n
+
+ lea 4(rp), rp C write triangular product starting at rp[1]
+ dec n
+ movd (up), %mm7
+
+ jz L(one)
+ lea 4(up), up
+ push %ebx
+ push %ebp
+ mov n, %eax
+
+ movd (up), %mm0
+ neg n
+ pmuludq %mm7, %mm0
+ pxor %mm6, %mm6
+ mov n, un
+
+ and $3, %eax
+ jz L(of0)
+ cmp $2, %eax
+ jc L(of1)
+ jz L(of2)
+
+C ================================================================
+ jmp L(m3)
+ ALIGN(16)
+L(lm3): movd -4(up), %mm0
+ pmuludq %mm7, %mm0
+ psrlq $32, %mm6
+ lea 16(rp), rp
+ paddq %mm0, %mm6
+ movd (up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, -4(rp)
+ psrlq $32, %mm6
+L(m3): paddq %mm0, %mm6
+ movd 4(up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, (rp)
+ psrlq $32, %mm6
+ paddq %mm0, %mm6
+ movd 8(up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, 4(rp)
+ psrlq $32, %mm6
+ paddq %mm0, %mm6
+ add $4, un
+ movd %mm6, 8(rp)
+ lea 16(up), up
+ js L(lm3)
+
+ psrlq $32, %mm6
+ movd %mm6, 12(rp)
+
+ inc n
+C jz L(done)
+ lea -12(up), up
+ lea 4(rp), rp
+ jmp L(ol2)
+
+C ================================================================
+ ALIGN(16)
+L(lm0): movd (up), %mm0
+ pmuludq %mm7, %mm0
+ psrlq $32, %mm6
+ lea 16(rp), rp
+L(of0): paddq %mm0, %mm6
+ movd 4(up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, (rp)
+ psrlq $32, %mm6
+ paddq %mm0, %mm6
+ movd 8(up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, 4(rp)
+ psrlq $32, %mm6
+ paddq %mm0, %mm6
+ movd 12(up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, 8(rp)
+ psrlq $32, %mm6
+ paddq %mm0, %mm6
+ add $4, un
+ movd %mm6, 12(rp)
+ lea 16(up), up
+ js L(lm0)
+
+ psrlq $32, %mm6
+ movd %mm6, 16(rp)
+
+ inc n
+C jz L(done)
+ lea -8(up), up
+ lea 8(rp), rp
+ jmp L(ol3)
+
+C ================================================================
+ ALIGN(16)
+L(lm1): movd -12(up), %mm0
+ pmuludq %mm7, %mm0
+ psrlq $32, %mm6
+ lea 16(rp), rp
+ paddq %mm0, %mm6
+ movd -8(up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, -12(rp)
+ psrlq $32, %mm6
+ paddq %mm0, %mm6
+ movd -4(up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, -8(rp)
+ psrlq $32, %mm6
+ paddq %mm0, %mm6
+ movd (up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, -4(rp)
+ psrlq $32, %mm6
+L(of1): paddq %mm0, %mm6
+ add $4, un
+ movd %mm6, (rp)
+ lea 16(up), up
+ js L(lm1)
+
+ psrlq $32, %mm6
+ movd %mm6, 4(rp)
+
+ inc n
+ jz L(done) C goes away when we add special n=2 code
+ lea -20(up), up
+ lea -4(rp), rp
+ jmp L(ol0)
+
+C ================================================================
+ ALIGN(16)
+L(lm2): movd -8(up), %mm0
+ pmuludq %mm7, %mm0
+ psrlq $32, %mm6
+ lea 16(rp), rp
+ paddq %mm0, %mm6
+ movd -4(up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, -8(rp)
+ psrlq $32, %mm6
+ paddq %mm0, %mm6
+ movd (up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, -4(rp)
+ psrlq $32, %mm6
+L(of2): paddq %mm0, %mm6
+ movd 4(up), %mm0
+ pmuludq %mm7, %mm0
+ movd %mm6, (rp)
+ psrlq $32, %mm6
+ paddq %mm0, %mm6
+ add $4, un
+ movd %mm6, 4(rp)
+ lea 16(up), up
+ js L(lm2)
+
+ psrlq $32, %mm6
+ movd %mm6, 8(rp)
+
+ inc n
+C jz L(done)
+ lea -16(up), up
+C lea (rp), rp
+C jmp L(ol1)
+
+C ================================================================
+
+L(ol1): lea 4(up,n,4), up
+ movd (up), %mm7 C read next U invariant limb
+ lea 8(rp,n,4), rp
+ mov n, un
+
+ movd 4(up), %mm1
+ pmuludq %mm7, %mm1
+ sar $2, un
+ movd %mm1, %ebx
+ inc un
+ jz L(re1)
+
+ movd 8(up), %mm0
+ pmuludq %mm7, %mm0
+ xor %edx, %edx C zero edx and CF
+ jmp L(a1)
+
+L(la1): adc $0, %edx
+ add %ebx, 12(rp)
+ movd %mm0, %eax
+ pmuludq %mm7, %mm1
+ lea 16(rp), rp
+ psrlq $32, %mm0
+ adc %edx, %eax
+ movd %mm0, %edx
+ movd %mm1, %ebx
+ movd 8(up), %mm0
+ pmuludq %mm7, %mm0
+ adc $0, %edx
+ add %eax, (rp)
+L(a1): psrlq $32, %mm1
+ adc %edx, %ebx
+ movd %mm1, %edx
+ movd %mm0, %eax
+ movd 12(up), %mm1
+ pmuludq %mm7, %mm1
+ adc $0, %edx
+ add %ebx, 4(rp)
+ psrlq $32, %mm0
+ adc %edx, %eax
+ movd %mm0, %edx
+ movd %mm1, %ebx
+ lea 16(up), up
+ movd (up), %mm0
+ adc $0, %edx
+ add %eax, 8(rp)
+ psrlq $32, %mm1
+ adc %edx, %ebx
+ movd %mm1, %edx
+ pmuludq %mm7, %mm0
+ inc un
+ movd 4(up), %mm1
+ jnz L(la1)
+
+ adc un, %edx C un is zero here
+ add %ebx, 12(rp)
+ movd %mm0, %eax
+ pmuludq %mm7, %mm1
+ lea 16(rp), rp
+ psrlq $32, %mm0
+ adc %edx, %eax
+ movd %mm0, %edx
+ movd %mm1, %ebx
+ adc un, %edx
+ add %eax, (rp)
+ psrlq $32, %mm1
+ adc %edx, %ebx
+ movd %mm1, %eax
+ adc un, %eax
+ add %ebx, 4(rp)
+ adc un, %eax
+ mov %eax, 8(rp)
+
+ inc n
+
+C ================================================================
+
+L(ol0): lea (up,n,4), up
+ movd 4(up), %mm7 C read next U invariant limb
+ lea 4(rp,n,4), rp
+ mov n, un
+
+ movd 8(up), %mm0
+ pmuludq %mm7, %mm0
+ sar $2, un
+ movd 12(up), %mm1
+ movd %mm0, %eax
+ pmuludq %mm7, %mm1
+ xor %edx, %edx C zero edx and CF
+ jmp L(a0)
+
+L(la0): adc $0, %edx
+ add %ebx, 12(rp)
+ movd %mm0, %eax
+ pmuludq %mm7, %mm1
+ lea 16(rp), rp
+ psrlq $32, %mm0
+ adc %edx, %eax
+ movd %mm0, %edx
+ movd %mm1, %ebx
+ movd 8(up), %mm0
+ pmuludq %mm7, %mm0
+ adc $0, %edx
+ add %eax, (rp)
+ psrlq $32, %mm1
+ adc %edx, %ebx
+ movd %mm1, %edx
+ movd %mm0, %eax
+ movd 12(up), %mm1
+ pmuludq %mm7, %mm1
+ adc $0, %edx
+ add %ebx, 4(rp)
+L(a0): psrlq $32, %mm0
+ adc %edx, %eax
+ movd %mm0, %edx
+ movd %mm1, %ebx
+ lea 16(up), up
+ movd (up), %mm0
+ adc $0, %edx
+ add %eax, 8(rp)
+ psrlq $32, %mm1
+ adc %edx, %ebx
+ movd %mm1, %edx
+ pmuludq %mm7, %mm0
+ inc un
+ movd 4(up), %mm1
+ jnz L(la0)
+
+ adc un, %edx C un is zero here
+ add %ebx, 12(rp)
+ movd %mm0, %eax
+ pmuludq %mm7, %mm1
+ lea 16(rp), rp
+ psrlq $32, %mm0
+ adc %edx, %eax
+ movd %mm0, %edx
+ movd %mm1, %ebx
+ adc un, %edx
+ add %eax, (rp)
+ psrlq $32, %mm1
+ adc %edx, %ebx
+ movd %mm1, %eax
+ adc un, %eax
+ add %ebx, 4(rp)
+ adc un, %eax
+ mov %eax, 8(rp)
+
+ inc n
+
+C ================================================================
+
+L(ol3): lea 12(up,n,4), up
+ movd -8(up), %mm7 C read next U invariant limb
+ lea (rp,n,4), rp C put rp back
+ mov n, un
+
+ movd -4(up), %mm1
+ pmuludq %mm7, %mm1
+ sar $2, un
+ movd %mm1, %ebx
+ movd (up), %mm0
+ xor %edx, %edx C zero edx and CF
+ jmp L(a3)
+
+L(la3): adc $0, %edx
+ add %ebx, 12(rp)
+ movd %mm0, %eax
+ pmuludq %mm7, %mm1
+ lea 16(rp), rp
+ psrlq $32, %mm0
+ adc %edx, %eax
+ movd %mm0, %edx
+ movd %mm1, %ebx
+ movd 8(up), %mm0
+ pmuludq %mm7, %mm0
+ adc $0, %edx
+ add %eax, (rp)
+ psrlq $32, %mm1
+ adc %edx, %ebx
+ movd %mm1, %edx
+ movd %mm0, %eax
+ movd 12(up), %mm1
+ pmuludq %mm7, %mm1
+ adc $0, %edx
+ add %ebx, 4(rp)
+ psrlq $32, %mm0
+ adc %edx, %eax
+ movd %mm0, %edx
+ movd %mm1, %ebx
+ lea 16(up), up
+ movd (up), %mm0
+ adc $0, %edx
+ add %eax, 8(rp)
+L(a3): psrlq $32, %mm1
+ adc %edx, %ebx
+ movd %mm1, %edx
+ pmuludq %mm7, %mm0
+ inc un
+ movd 4(up), %mm1
+ jnz L(la3)
+
+ adc un, %edx C un is zero here
+ add %ebx, 12(rp)
+ movd %mm0, %eax
+ pmuludq %mm7, %mm1
+ lea 16(rp), rp
+ psrlq $32, %mm0
+ adc %edx, %eax
+ movd %mm0, %edx
+ movd %mm1, %ebx
+ adc un, %edx
+ add %eax, (rp)
+ psrlq $32, %mm1
+ adc %edx, %ebx
+ movd %mm1, %eax
+ adc un, %eax
+ add %ebx, 4(rp)
+ adc un, %eax
+ mov %eax, 8(rp)
+
+ inc n
+
+C ================================================================
+
+L(ol2): lea 8(up,n,4), up
+ movd -4(up), %mm7 C read next U invariant limb
+ lea 12(rp,n,4), rp
+ mov n, un
+
+ movd (up), %mm0
+ pmuludq %mm7, %mm0
+ xor %edx, %edx
+ sar $2, un
+ movd 4(up), %mm1
+ test un, un C clear carry
+ movd %mm0, %eax
+ pmuludq %mm7, %mm1
+ inc un
+ jnz L(a2)
+ jmp L(re2)
+
+L(la2): adc $0, %edx
+ add %ebx, 12(rp)
+ movd %mm0, %eax
+ pmuludq %mm7, %mm1
+ lea 16(rp), rp
+L(a2): psrlq $32, %mm0
+ adc %edx, %eax
+ movd %mm0, %edx
+ movd %mm1, %ebx
+ movd 8(up), %mm0
+ pmuludq %mm7, %mm0
+ adc $0, %edx
+ add %eax, (rp)
+ psrlq $32, %mm1
+ adc %edx, %ebx
+ movd %mm1, %edx
+ movd %mm0, %eax
+ movd 12(up), %mm1
+ pmuludq %mm7, %mm1
+ adc $0, %edx
+ add %ebx, 4(rp)
+ psrlq $32, %mm0
+ adc %edx, %eax
+ movd %mm0, %edx
+ movd %mm1, %ebx
+ lea 16(up), up
+ movd (up), %mm0
+ adc $0, %edx
+ add %eax, 8(rp)
+ psrlq $32, %mm1
+ adc %edx, %ebx
+ movd %mm1, %edx
+ pmuludq %mm7, %mm0
+ inc un
+ movd 4(up), %mm1
+ jnz L(la2)
+
+ adc un, %edx C un is zero here
+ add %ebx, 12(rp)
+ movd %mm0, %eax
+ pmuludq %mm7, %mm1
+ lea 16(rp), rp
+ psrlq $32, %mm0
+ adc %edx, %eax
+ movd %mm0, %edx
+ movd %mm1, %ebx
+ adc un, %edx
+ add %eax, (rp)
+ psrlq $32, %mm1
+ adc %edx, %ebx
+ movd %mm1, %eax
+ adc un, %eax
+ add %ebx, 4(rp)
+ adc un, %eax
+ mov %eax, 8(rp)
+
+ inc n
+ jmp L(ol1)
+
+C ================================================================
+L(re2): psrlq $32, %mm0
+ movd (up), %mm7 C read next U invariant limb
+ adc %edx, %eax
+ movd %mm0, %edx
+ movd %mm1, %ebx
+ adc un, %edx
+ add %eax, (rp)
+ lea 4(rp), rp
+ psrlq $32, %mm1
+ adc %edx, %ebx
+ movd %mm1, %eax
+ movd 4(up), %mm1
+ adc un, %eax
+ add %ebx, (rp)
+ pmuludq %mm7, %mm1
+ adc un, %eax
+ mov %eax, 4(rp)
+ movd %mm1, %ebx
+
+L(re1): psrlq $32, %mm1
+ add %ebx, 4(rp)
+ movd %mm1, %eax
+ adc un, %eax
+ xor n, n C make n zeroness assumption below true
+ mov %eax, 8(rp)
+
+L(done): C n is zero here
+ mov 24(%esp), up
+ mov 28(%esp), %eax
+
+ movd (up), %mm0
+ inc %eax
+ pmuludq %mm0, %mm0
+ lea 4(up), up
+ mov 20(%esp), rp
+ shr %eax
+ movd %mm0, (rp)
+ psrlq $32, %mm0
+ lea -12(rp), rp
+ mov %eax, 28(%esp)
+ jnc L(odd)
+
+ movd %mm0, %ebp
+ movd (up), %mm0
+ lea 8(rp), rp
+ pmuludq %mm0, %mm0
+ lea -4(up), up
+ add 8(rp), %ebp
+ movd %mm0, %edx
+ adc 12(rp), %edx
+ rcr n
+ jmp L(ent)
+
+C ALIGN(16) C alignment seems irrelevant
+L(top): movd (up), %mm1
+ adc n, n
+ movd %mm0, %eax
+ pmuludq %mm1, %mm1
+ movd 4(up), %mm0
+ adc (rp), %eax
+ movd %mm1, %ebx
+ pmuludq %mm0, %mm0
+ psrlq $32, %mm1
+ adc 4(rp), %ebx
+ movd %mm1, %ebp
+ movd %mm0, %edx
+ adc 8(rp), %ebp
+ adc 12(rp), %edx
+ rcr n C FIXME: isn't this awfully slow on atom???
+ adc %eax, (rp)
+ adc %ebx, 4(rp)
+L(ent): lea 8(up), up
+ adc %ebp, 8(rp)
+ psrlq $32, %mm0
+ adc %edx, 12(rp)
+L(odd): decl 28(%esp)
+ lea 16(rp), rp
+ jnz L(top)
+
+L(end): adc n, n
+ movd %mm0, %eax
+ adc n, %eax
+ mov %eax, (rp)
+
+L(rtn): emms
+ pop %ebp
+ pop %ebx
+ pop %esi
+ pop %edi
+ ret
+
+L(one): pmuludq %mm7, %mm7
+ movq %mm7, -4(rp)
+ emms
+ pop %esi
+ pop %edi
+ ret
+EPILOGUE()
--- /dev/null
+dnl Intel Atom mpn_sublsh1_n -- rp[] = up[] - (vp[] << 1)
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_sublsh1_n_ip1)
+include_mpn(`x86/k7/sublsh1_n.asm')
--- /dev/null
+dnl Intel Atom mpn_addlsh2_n/mpn_sublsh2_n -- rp[] = up[] +- (vp[] << 2).
+
+dnl Contributed to the GNU project by Marco Bodrato.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 2)
+define(RSH, 30)
+
+ifdef(`OPERATION_addlsh2_n', `
+ define(M4_inst, adcl)
+ define(M4_opp, subl)
+ define(M4_function, mpn_addlsh2_n)
+ define(M4_function_c, mpn_addlsh2_nc)
+ define(M4_ip_function_c, mpn_addlsh2_nc_ip1)
+ define(M4_ip_function, mpn_addlsh2_n_ip1)
+',`ifdef(`OPERATION_sublsh2_n', `
+ define(M4_inst, sbbl)
+ define(M4_opp, addl)
+ define(M4_function, mpn_sublsh2_n)
+ define(M4_function_c, mpn_sublsh2_nc)
+ define(M4_ip_function_c, mpn_sublsh2_nc_ip1)
+ define(M4_ip_function, mpn_sublsh2_n_ip1)
+',`m4_error(`Need OPERATION_addlsh2_n or OPERATION_sublsh2_n
+')')')
+
+MULFUNC_PROLOGUE(mpn_sublsh2_n mpn_sublsh2_nc mpn_sublsh2_n_ip1 mpn_sublsh2_nc_ip1)
+
+include_mpn(`x86/atom/aorslshC_n.asm')
dnl x86 mpn_bdiv_dbm1.
-dnl Copyright 2008 Free Software Foundation, Inc.
+dnl Copyright 2008, 2011 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
-C cycles/limb
-C K7: 3.5
-C P4 m0: ?
-C P4 m1: ?
-C P4 m2: 13.67
-C P4 m3: ?
-C P4 m4: ?
-C P6-13: 5.1
+C cycles/limb
+C P5
+C P6 model 0-8,10-12)
+C P6 model 9 (Banias)
+C P6 model 13 (Dothan) 5.1
+C P4 model 0 (Willamette)
+C P4 model 1 (?)
+C P4 model 2 (Northwood) 13.67
+C P4 model 3 (Prescott)
+C P4 model 4 (Nocona)
+C Intel Atom
+C AMD K6
+C AMD K7 3.5
+C AMD K8
+C AMD K10
+
C TODO
C * Optimize for more x86 processors
cmp $2, %eax
jc L(b1)
jz L(b2)
- jmp L(b3)
+
+L(b3): lea -8(%esi), %esi
+ lea 8(%edi), %edi
+ add $-3, %ebp
+ jmp L(3)
L(b0): mov 4(%esi), %eax
lea -4(%esi), %esi
lea 12(%edi), %edi
add $-4, %ebp
jmp L(0)
-L(b3):
- lea -8(%esi), %esi
- lea 8(%edi), %edi
- add $-3, %ebp
- jmp L(3)
L(b2): mov 4(%esi), %eax
lea 4(%esi), %esi
jmp L(2)
ALIGN(8)
-L(top):
- mov 4(%esi), %eax
+L(top): mov 4(%esi), %eax
mul %ecx
lea 16(%edi), %edi
sub %eax, %ebx
--- /dev/null
+dnl x86 mpn_bdiv_q_1 -- mpn by limb exact division.
+
+dnl Copyright 2001, 2002, 2007, 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl Rearranged from mpn/x86/dive_1.asm by Marco Bodrato.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb
+C P54 30.0
+C P55 29.0
+C P6 13.0 odd divisor, 12.0 even (strangely)
+C K6 14.0
+C K7 12.0
+C P4 42.0
+
+MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
+
+defframe(PARAM_SHIFT, 24)
+defframe(PARAM_INVERSE,20)
+defframe(PARAM_DIVISOR,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+dnl re-use parameter space
+define(VAR_INVERSE,`PARAM_SRC')
+
+ TEXT
+
+C mp_limb_t
+C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C mp_limb_t inverse, int shift)
+
+ ALIGN(16)
+PROLOGUE(mpn_pi1_bdiv_q_1)
+deflit(`FRAME',0)
+
+ movl PARAM_SHIFT, %ecx
+ pushl %ebp FRAME_pushl()
+
+ movl PARAM_INVERSE, %eax
+ movl PARAM_SIZE, %ebp
+ pushl %ebx FRAME_pushl()
+L(common):
+ pushl %edi FRAME_pushl()
+ pushl %esi FRAME_pushl()
+
+ movl PARAM_SRC, %esi
+ movl PARAM_DST, %edi
+
+ leal (%esi,%ebp,4), %esi C src end
+ leal (%edi,%ebp,4), %edi C dst end
+ negl %ebp C -size
+
+ movl %eax, VAR_INVERSE
+ movl (%esi,%ebp,4), %eax C src[0]
+
+ xorl %ebx, %ebx
+ xorl %edx, %edx
+
+ incl %ebp
+ jz L(one)
+
+ movl (%esi,%ebp,4), %edx C src[1]
+
+ shrdl( %cl, %edx, %eax)
+
+ movl VAR_INVERSE, %edx
+ jmp L(entry)
+
+
+ ALIGN(8)
+ nop C k6 code alignment
+ nop
+L(top):
+ C eax q
+ C ebx carry bit, 0 or -1
+ C ecx shift
+ C edx carry limb
+ C esi src end
+ C edi dst end
+ C ebp counter, limbs, negative
+
+ movl -4(%esi,%ebp,4), %eax
+ subl %ebx, %edx C accumulate carry bit
+
+ movl (%esi,%ebp,4), %ebx
+
+ shrdl( %cl, %ebx, %eax)
+
+ subl %edx, %eax C apply carry limb
+ movl VAR_INVERSE, %edx
+
+ sbbl %ebx, %ebx
+
+L(entry):
+ imull %edx, %eax
+
+ movl %eax, -4(%edi,%ebp,4)
+ movl PARAM_DIVISOR, %edx
+
+ mull %edx
+
+ incl %ebp
+ jnz L(top)
+
+
+ movl -4(%esi), %eax C src high limb
+L(one):
+ shrl %cl, %eax
+ popl %esi FRAME_popl()
+
+ addl %ebx, %eax C apply carry bit
+
+ subl %edx, %eax C apply carry limb
+
+ imull VAR_INVERSE, %eax
+
+ movl %eax, -4(%edi)
+
+ popl %edi
+ popl %ebx
+ popl %ebp
+
+ ret
+
+EPILOGUE()
+
+C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t divisor);
+C
+
+ ALIGN(16)
+PROLOGUE(mpn_bdiv_q_1)
+deflit(`FRAME',0)
+
+ movl PARAM_DIVISOR, %eax
+ pushl %ebp FRAME_pushl()
+
+ movl $-1, %ecx C shift count
+ movl PARAM_SIZE, %ebp
+
+ pushl %ebx FRAME_pushl()
+
+L(strip_twos):
+ incl %ecx
+
+ shrl %eax
+ jnc L(strip_twos)
+
+ leal 1(%eax,%eax), %ebx C d without twos
+ andl $127, %eax C d/2, 7 bits
+
+ifdef(`PIC',`
+ LEA( binvert_limb_table, %edx)
+ movzbl (%eax,%edx), %eax C inv 8 bits
+',`
+ movzbl binvert_limb_table(%eax), %eax C inv 8 bits
+')
+
+ leal (%eax,%eax), %edx C 2*inv
+ movl %ebx, PARAM_DIVISOR C d without twos
+ imull %eax, %eax C inv*inv
+ imull %ebx, %eax C inv*inv*d
+ subl %eax, %edx C inv = 2*inv - inv*inv*d
+
+ leal (%edx,%edx), %eax C 2*inv
+ imull %edx, %edx C inv*inv
+ imull %ebx, %edx C inv*inv*d
+ subl %edx, %eax C inv = 2*inv - inv*inv*d
+
+ ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+ pushl %eax FRAME_pushl()
+ imull PARAM_DIVISOR, %eax
+ cmpl $1, %eax
+ popl %eax FRAME_popl()')
+
+ jmp L(common)
+EPILOGUE()
+
--- /dev/null
+/* x86/bobcat gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* Generated by tuneup.c, 2011-11-25, gcc 4.2 */
+
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 9
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 12
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 23
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 13
+#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD 42
+
+#define MUL_TOOM22_THRESHOLD 28
+#define MUL_TOOM33_THRESHOLD 90
+#define MUL_TOOM44_THRESHOLD 147
+#define MUL_TOOM6H_THRESHOLD 274
+#define MUL_TOOM8H_THRESHOLD 454
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 89
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 122
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 93
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 113
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 38
+#define SQR_TOOM3_THRESHOLD 89
+#define SQR_TOOM4_THRESHOLD 220
+#define SQR_TOOM6_THRESHOLD 303
+#define SQR_TOOM8_THRESHOLD 454
+
+#define MULMID_TOOM42_THRESHOLD 76
+
+#define MULMOD_BNM1_THRESHOLD 19
+#define SQRMOD_BNM1_THRESHOLD 23
+
+#define POWM_SEC_TABLE 4,14,290,357,2178
+
+#define MUL_FFT_MODF_THRESHOLD 888 /* k = 6 */
+#define MUL_FFT_TABLE3 \
+ { { 888, 6}, { 25, 7}, { 13, 6}, { 27, 7}, \
+ { 15, 6}, { 33, 7}, { 17, 6}, { 35, 7}, \
+ { 19, 6}, { 39, 7}, { 23, 6}, { 47, 7}, \
+ { 27, 8}, { 15, 7}, { 31, 6}, { 63, 7}, \
+ { 35, 8}, { 19, 7}, { 41, 8}, { 23, 7}, \
+ { 49, 8}, { 31, 7}, { 63, 8}, { 39, 7}, \
+ { 79, 8}, { 43, 9}, { 23, 8}, { 51, 9}, \
+ { 31, 8}, { 67, 9}, { 39, 8}, { 79, 9}, \
+ { 47, 8}, { 95, 9}, { 55,10}, { 31, 9}, \
+ { 63, 8}, { 127, 9}, { 79,10}, { 47, 9}, \
+ { 95,11}, { 31,10}, { 63, 9}, { 135,10}, \
+ { 79, 9}, { 159,10}, { 95, 9}, { 191,11}, \
+ { 63,10}, { 127, 9}, { 255,10}, { 159,11}, \
+ { 95,10}, { 191,12}, { 63,11}, { 127,10}, \
+ { 255, 9}, { 511,10}, { 271, 9}, { 543,11}, \
+ { 159,10}, { 319, 9}, { 671,11}, { 191,10}, \
+ { 383, 9}, { 767,12}, { 4096,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 70
+#define MUL_FFT_THRESHOLD 7552
+
+#define SQR_FFT_MODF_THRESHOLD 723 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 723, 5}, { 25, 6}, { 13, 5}, { 28, 6}, \
+ { 15, 5}, { 31, 6}, { 27, 7}, { 15, 6}, \
+ { 33, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \
+ { 39, 7}, { 23, 6}, { 47, 7}, { 27, 8}, \
+ { 15, 7}, { 31, 6}, { 63, 7}, { 35, 8}, \
+ { 19, 7}, { 41, 8}, { 23, 7}, { 47, 8}, \
+ { 31, 7}, { 63, 8}, { 43, 9}, { 23, 8}, \
+ { 47, 7}, { 95, 8}, { 51, 9}, { 31, 8}, \
+ { 67, 9}, { 39, 8}, { 79, 9}, { 47, 8}, \
+ { 95,10}, { 31, 9}, { 63, 8}, { 127, 9}, \
+ { 79,10}, { 47, 9}, { 95,11}, { 31,10}, \
+ { 63, 9}, { 135,10}, { 79, 9}, { 159,10}, \
+ { 95, 9}, { 191,11}, { 63,10}, { 127, 9}, \
+ { 255,10}, { 159,11}, { 95,10}, { 191,12}, \
+ { 63,11}, { 127,10}, { 255, 9}, { 543,11}, \
+ { 159, 9}, { 671,11}, { 191,10}, { 383, 9}, \
+ { 799,12}, { 4096,13}, { 8192,14}, { 16384,15}, \
+ { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 69
+#define SQR_FFT_THRESHOLD 5760
+
+#define MULLO_BASECASE_THRESHOLD 5
+#define MULLO_DC_THRESHOLD 45
+#define MULLO_MUL_N_THRESHOLD 13463
+
+#define DC_DIV_QR_THRESHOLD 75
+#define DC_DIVAPPR_Q_THRESHOLD 216
+#define DC_BDIV_QR_THRESHOLD 67
+#define DC_BDIV_Q_THRESHOLD 143
+
+#define INV_MULMOD_BNM1_THRESHOLD 75
+#define INV_NEWTON_THRESHOLD 244
+#define INV_APPR_THRESHOLD 228
+
+#define BINV_NEWTON_THRESHOLD 276
+#define REDC_1_TO_REDC_N_THRESHOLD 71
+
+#define MU_DIV_QR_THRESHOLD 1858
+#define MU_DIVAPPR_Q_THRESHOLD 1822
+#define MUPI_DIV_QR_THRESHOLD 122
+#define MU_BDIV_QR_THRESHOLD 1787
+#define MU_BDIV_Q_THRESHOLD 1787
+
+#define MATRIX22_STRASSEN_THRESHOLD 19
+#define HGCD_THRESHOLD 78
+#define HGCD_APPR_THRESHOLD 55
+#define HGCD_REDUCE_THRESHOLD 4633
+#define GCD_DC_THRESHOLD 474
+#define GCDEXT_DC_THRESHOLD 345
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 14
+#define GET_STR_PRECOMPUTE_THRESHOLD 31
+#define SET_STR_DC_THRESHOLD 270
+#define SET_STR_PRECOMPUTE_THRESHOLD 812
C cycles/limb startup (approx)
-C P5: 1.0 40
-C P6 2.4 70
-C K6 1.0 55
-C K7: 1.3 75
-C P4: 2.6 175
+C P5 1.0 40
+C P6 2.4 70
+C K6 1.0 55
+C K7 1.3 75
+C P4 2.6 175
C
C (Startup time includes some function call overheads.)
C cycles/limb startup (approx)
-C P5: 1.0 35
-C P6 0.75 45
-C K6 1.0 30
-C K7: 1.3 65
-C P4: 1.0 120
+C P5 1.0 35
+C P6 0.75 45
+C K6 1.0 30
+C K7 1.3 65
+C P4 1.0 120
C
C (Startup time includes some function call overheads.)
--- /dev/null
+/* x86/core2 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* Generated by tuneup.c, 2011-11-25, gcc 4.2 */
+
+#define MOD_1_NORM_THRESHOLD 4
+#define MOD_1_UNNORM_THRESHOLD 4
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 5
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 4
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 9
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 7
+#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD 19
+
+#define MUL_TOOM22_THRESHOLD 24
+#define MUL_TOOM33_THRESHOLD 93
+#define MUL_TOOM44_THRESHOLD 228
+#define MUL_TOOM6H_THRESHOLD 294
+#define MUL_TOOM8H_THRESHOLD 458
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 90
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 114
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 89
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 96
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 34
+#define SQR_TOOM3_THRESHOLD 116
+#define SQR_TOOM4_THRESHOLD 178
+#define SQR_TOOM6_THRESHOLD 262
+#define SQR_TOOM8_THRESHOLD 597
+
+#define MULMID_TOOM42_THRESHOLD 70
+
+#define MULMOD_BNM1_THRESHOLD 20
+#define SQRMOD_BNM1_THRESHOLD 19
+
+#define POWM_SEC_TABLE 6,26,262,991,2212
+
+#define MUL_FFT_MODF_THRESHOLD 690 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 690, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \
+ { 15, 5}, { 31, 6}, { 25, 7}, { 13, 6}, \
+ { 27, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \
+ { 35, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \
+ { 47, 7}, { 27, 8}, { 15, 7}, { 35, 8}, \
+ { 19, 7}, { 41, 8}, { 23, 7}, { 47, 8}, \
+ { 31, 7}, { 63, 8}, { 39, 9}, { 23, 8}, \
+ { 51, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \
+ { 79, 9}, { 47, 8}, { 95,10}, { 31, 9}, \
+ { 63, 8}, { 127, 9}, { 79,10}, { 47, 9}, \
+ { 95,11}, { 31,10}, { 63, 9}, { 135,10}, \
+ { 79, 9}, { 159,10}, { 95, 9}, { 191,11}, \
+ { 63,10}, { 127, 9}, { 255,10}, { 159,11}, \
+ { 95,10}, { 191,12}, { 63,11}, { 127,10}, \
+ { 271, 9}, { 543,10}, { 287,11}, { 159,10}, \
+ { 319, 9}, { 639,11}, { 191,10}, { 383, 9}, \
+ { 799,11}, { 223,12}, { 4096,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 70
+#define MUL_FFT_THRESHOLD 7552
+
+#define SQR_FFT_MODF_THRESHOLD 630 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 630, 5}, { 25, 6}, { 13, 5}, { 28, 6}, \
+ { 15, 5}, { 31, 6}, { 25, 7}, { 13, 6}, \
+ { 27, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \
+ { 35, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \
+ { 47, 7}, { 27, 8}, { 15, 7}, { 35, 8}, \
+ { 19, 7}, { 41, 8}, { 23, 7}, { 49, 8}, \
+ { 27, 9}, { 15, 8}, { 31, 7}, { 63, 8}, \
+ { 39, 9}, { 23, 8}, { 51, 9}, { 31, 8}, \
+ { 67, 9}, { 39, 8}, { 79, 9}, { 47, 8}, \
+ { 95, 9}, { 55,10}, { 31, 9}, { 79,10}, \
+ { 47, 9}, { 95,11}, { 31,10}, { 63, 9}, \
+ { 127,10}, { 79, 9}, { 159,10}, { 95,11}, \
+ { 63,10}, { 159,11}, { 95,10}, { 191,12}, \
+ { 63,11}, { 127,10}, { 271, 9}, { 543,11}, \
+ { 159,10}, { 319, 9}, { 671, 8}, { 1343,11}, \
+ { 191,10}, { 383, 9}, { 799,12}, { 4096,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 67
+#define SQR_FFT_THRESHOLD 5760
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 30
+#define MULLO_MUL_N_THRESHOLD 13463
+
+#define DC_DIV_QR_THRESHOLD 15
+#define DC_DIVAPPR_Q_THRESHOLD 49
+#define DC_BDIV_QR_THRESHOLD 76
+#define DC_BDIV_Q_THRESHOLD 190
+
+#define INV_MULMOD_BNM1_THRESHOLD 46
+#define INV_NEWTON_THRESHOLD 35
+#define INV_APPR_THRESHOLD 35
+
+#define BINV_NEWTON_THRESHOLD 324
+#define REDC_1_TO_REDC_N_THRESHOLD 83
+
+#define MU_DIV_QR_THRESHOLD 1442
+#define MU_DIVAPPR_Q_THRESHOLD 1099
+#define MUPI_DIV_QR_THRESHOLD 0 /* always */
+#define MU_BDIV_QR_THRESHOLD 1589
+#define MU_BDIV_Q_THRESHOLD 1718
+
+#define MATRIX22_STRASSEN_THRESHOLD 31
+#define HGCD_THRESHOLD 118
+#define HGCD_APPR_THRESHOLD 149
+#define HGCD_REDUCE_THRESHOLD 3524
+#define GCD_DC_THRESHOLD 351
+#define GCDEXT_DC_THRESHOLD 309
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 13
+#define GET_STR_PRECOMPUTE_THRESHOLD 26
+#define SET_STR_DC_THRESHOLD 517
+#define SET_STR_PRECOMPUTE_THRESHOLD 1402
--- /dev/null
+/* x86/coreinhm gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* Generated by tuneup.c, 2011-11-25, gcc 4.5 */
+
+#define MOD_1_NORM_THRESHOLD 24
+#define MOD_1_UNNORM_THRESHOLD 15
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 8
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 7
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 11
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 5
+#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD 16
+
+#define MUL_TOOM22_THRESHOLD 28
+#define MUL_TOOM33_THRESHOLD 81
+#define MUL_TOOM44_THRESHOLD 214
+#define MUL_TOOM6H_THRESHOLD 306
+#define MUL_TOOM8H_THRESHOLD 454
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 137
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 148
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 132
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 131
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 42
+#define SQR_TOOM3_THRESHOLD 149
+#define SQR_TOOM4_THRESHOLD 226
+#define SQR_TOOM6_THRESHOLD 333
+#define SQR_TOOM8_THRESHOLD 494
+
+#define MULMID_TOOM42_THRESHOLD 78
+
+#define MULMOD_BNM1_THRESHOLD 17
+#define SQRMOD_BNM1_THRESHOLD 21
+
+#define POWM_SEC_TABLE 2,33,294,1298,2870
+
+#define MUL_FFT_MODF_THRESHOLD 606 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 606, 5}, { 28, 6}, { 15, 5}, { 33, 6}, \
+ { 29, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \
+ { 36, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \
+ { 47, 7}, { 29, 8}, { 15, 7}, { 37, 8}, \
+ { 19, 7}, { 41, 8}, { 23, 7}, { 47, 8}, \
+ { 31, 7}, { 63, 8}, { 43, 9}, { 23, 8}, \
+ { 51, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \
+ { 79, 9}, { 47, 8}, { 95,10}, { 31, 9}, \
+ { 79,10}, { 47, 9}, { 95,11}, { 31,10}, \
+ { 63, 9}, { 135,10}, { 79, 9}, { 159,10}, \
+ { 95, 9}, { 191,11}, { 63,10}, { 159,11}, \
+ { 95,10}, { 191,12}, { 63,11}, { 127,10}, \
+ { 255, 9}, { 511,10}, { 271,11}, { 159,10}, \
+ { 319, 9}, { 639,10}, { 335,11}, { 191,10}, \
+ { 383, 9}, { 767,10}, { 399,12}, { 4096,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 63
+#define MUL_FFT_THRESHOLD 6784
+
+#define SQR_FFT_MODF_THRESHOLD 505 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 505, 5}, { 28, 6}, { 15, 5}, { 33, 6}, \
+ { 17, 5}, { 35, 6}, { 29, 7}, { 15, 6}, \
+ { 33, 7}, { 17, 6}, { 36, 7}, { 19, 6}, \
+ { 39, 7}, { 23, 6}, { 47, 7}, { 29, 8}, \
+ { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \
+ { 23, 7}, { 47, 8}, { 27, 7}, { 55, 8}, \
+ { 31, 7}, { 63, 8}, { 43, 9}, { 23, 8}, \
+ { 55, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \
+ { 79, 9}, { 47, 8}, { 95, 9}, { 55,10}, \
+ { 31, 9}, { 79,10}, { 47, 9}, { 95,11}, \
+ { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \
+ { 159,10}, { 95,11}, { 63,10}, { 143, 9}, \
+ { 287,10}, { 159,11}, { 95,12}, { 63,11}, \
+ { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \
+ { 543,10}, { 287,11}, { 159,10}, { 319, 9}, \
+ { 639,10}, { 335, 9}, { 671,10}, { 351,11}, \
+ { 191,10}, { 383, 9}, { 767,10}, { 399, 9}, \
+ { 799,10}, { 415,12}, { 4096,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 74
+#define SQR_FFT_THRESHOLD 4800
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 35
+#define MULLO_MUL_N_THRESHOLD 13463
+
+#define DC_DIV_QR_THRESHOLD 21
+#define DC_DIVAPPR_Q_THRESHOLD 42
+#define DC_BDIV_QR_THRESHOLD 84
+#define DC_BDIV_Q_THRESHOLD 156
+
+#define INV_MULMOD_BNM1_THRESHOLD 54
+#define INV_NEWTON_THRESHOLD 17
+#define INV_APPR_THRESHOLD 17
+
+#define BINV_NEWTON_THRESHOLD 348
+#define REDC_1_TO_REDC_N_THRESHOLD 83
+
+#define MU_DIV_QR_THRESHOLD 979
+#define MU_DIVAPPR_Q_THRESHOLD 501
+#define MUPI_DIV_QR_THRESHOLD 0 /* always */
+#define MU_BDIV_QR_THRESHOLD 1589
+#define MU_BDIV_Q_THRESHOLD 1787
+
+#define MATRIX22_STRASSEN_THRESHOLD 20
+#define HGCD_THRESHOLD 57
+#define HGCD_APPR_THRESHOLD 50
+#define HGCD_REDUCE_THRESHOLD 3524
+#define GCD_DC_THRESHOLD 253
+#define GCDEXT_DC_THRESHOLD 233
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 12
+#define GET_STR_PRECOMPUTE_THRESHOLD 20
+#define SET_STR_DC_THRESHOLD 127
+#define SET_STR_PRECOMPUTE_THRESHOLD 646
--- /dev/null
+/* x86/coreisbr gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* Generated by tuneup.c, 2011-11-24, gcc 4.2 */
+
+#define MOD_1_NORM_THRESHOLD 24
+#define MOD_1_UNNORM_THRESHOLD 25
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 7
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 11
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 3
+#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD 18
+
+#define MUL_TOOM22_THRESHOLD 28
+#define MUL_TOOM33_THRESHOLD 101
+#define MUL_TOOM44_THRESHOLD 244
+#define MUL_TOOM6H_THRESHOLD 351
+#define MUL_TOOM8H_THRESHOLD 547
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 109
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 183
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 109
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 109
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 48
+#define SQR_TOOM3_THRESHOLD 165
+#define SQR_TOOM4_THRESHOLD 276
+#define SQR_TOOM6_THRESHOLD 366
+#define SQR_TOOM8_THRESHOLD 572
+
+#define MULMID_TOOM42_THRESHOLD 98
+
+#define MULMOD_BNM1_THRESHOLD 20
+#define SQRMOD_BNM1_THRESHOLD 23
+
+#define POWM_SEC_TABLE 2,27,258,1052
+
+#define MUL_FFT_MODF_THRESHOLD 716 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 716, 5}, { 27, 6}, { 15, 5}, { 31, 6}, \
+ { 27, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \
+ { 35, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \
+ { 47, 7}, { 27, 8}, { 15, 7}, { 31, 6}, \
+ { 63, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \
+ { 23, 7}, { 51, 8}, { 31, 7}, { 63, 8}, \
+ { 43, 9}, { 23, 8}, { 55, 9}, { 31, 8}, \
+ { 71, 9}, { 39, 8}, { 79, 9}, { 47, 8}, \
+ { 95, 9}, { 55,10}, { 31, 9}, { 63, 8}, \
+ { 127, 9}, { 79,10}, { 47, 9}, { 95,11}, \
+ { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \
+ { 159,10}, { 95, 9}, { 191,11}, { 63,10}, \
+ { 127, 9}, { 255,10}, { 159,11}, { 95,10}, \
+ { 191,12}, { 63,11}, { 127,10}, { 255, 9}, \
+ { 511,10}, { 271,11}, { 159,10}, { 319, 9}, \
+ { 639,11}, { 191,10}, { 383, 9}, { 767,11}, \
+ { 223,12}, { 4096,13}, { 8192,14}, { 16384,15}, \
+ { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 69
+#define MUL_FFT_THRESHOLD 7552
+
+#define SQR_FFT_MODF_THRESHOLD 595 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 595, 5}, { 28, 6}, { 15, 5}, { 31, 6}, \
+ { 29, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \
+ { 35, 7}, { 19, 6}, { 39, 7}, { 23, 6}, \
+ { 47, 7}, { 35, 8}, { 19, 7}, { 43, 8}, \
+ { 23, 7}, { 49, 8}, { 31, 7}, { 63, 8}, \
+ { 43, 9}, { 23, 8}, { 55, 9}, { 31, 8}, \
+ { 67, 9}, { 39, 8}, { 79, 9}, { 47, 8}, \
+ { 95, 9}, { 55,10}, { 31, 9}, { 63, 8}, \
+ { 127, 9}, { 79,10}, { 47, 9}, { 95,11}, \
+ { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \
+ { 159,10}, { 95,11}, { 63,10}, { 159,11}, \
+ { 95,10}, { 191,12}, { 63,11}, { 127,10}, \
+ { 255, 9}, { 511,10}, { 271, 9}, { 543,11}, \
+ { 159,10}, { 319, 9}, { 671,11}, { 191,10}, \
+ { 383, 9}, { 767,10}, { 399,12}, { 4096,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 63
+#define SQR_FFT_THRESHOLD 5760
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 100
+#define MULLO_MUL_N_THRESHOLD 14379
+
+#define DC_DIV_QR_THRESHOLD 22
+#define DC_DIVAPPR_Q_THRESHOLD 30
+#define DC_BDIV_QR_THRESHOLD 120
+#define DC_BDIV_Q_THRESHOLD 268
+
+#define INV_MULMOD_BNM1_THRESHOLD 54
+#define INV_NEWTON_THRESHOLD 12
+#define INV_APPR_THRESHOLD 13
+
+#define BINV_NEWTON_THRESHOLD 410
+#define REDC_1_TO_REDC_N_THRESHOLD 100
+
+#define MU_DIV_QR_THRESHOLD 1037
+#define MU_DIVAPPR_Q_THRESHOLD 889
+#define MUPI_DIV_QR_THRESHOLD 0 /* always */
+#define MU_BDIV_QR_THRESHOLD 1858
+#define MU_BDIV_Q_THRESHOLD 2172
+
+#define MATRIX22_STRASSEN_THRESHOLD 21
+#define HGCD_THRESHOLD 59
+#define HGCD_APPR_THRESHOLD 56
+#define HGCD_REDUCE_THRESHOLD 4818
+#define GCD_DC_THRESHOLD 278
+#define GCDEXT_DC_THRESHOLD 298
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 11
+#define GET_STR_PRECOMPUTE_THRESHOLD 23
+#define SET_STR_DC_THRESHOLD 438
+#define SET_STR_PRECOMPUTE_THRESHOLD 1206
divert(-1)
-dnl Copyright 2007 Free Software Foundation, Inc.
+dnl Copyright 2007, 2011, 2012 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
define(`DARWIN')
+
dnl Usage LEA(symbol,reg)
dnl
-dnl FIXME: Only handles one symbol per assembly file because of the
-dnl way EPILOGUE_cpu is handled.
+dnl We maintain lists of stuff to append in load_eip and darwin_bd. The
+dnl `index' stuff is needed to suppress repeated definitions. To avoid
+dnl getting fooled by "var" and "var1", we add 'bol ' (the end of
+dnl 'indirect_symbol') at the beginning and and a newline at the end. This
+dnl might be a bit fragile.
-define(`LEA',`
-define(`EPILOGUE_cpu',
-` L(movl_eip_`'substr($2,1)):
+define(`LEA',
+m4_assert_numargs(2)
+`ifdef(`PIC',`
+ifelse(index(defn(`load_eip'), `$2'),-1,
+`m4append(`load_eip',
+`L(movl_eip_`'substr($2,1)):
movl (%esp), $2
ret_internal
- .section __IMPORT,__pointers,non_lazy_symbol_pointers
+')')
+ifelse(index(defn(`darwin_bd'), `bol $1
+'),-1,
+`m4append(`darwin_bd',
+` .section __IMPORT,__pointers,non_lazy_symbol_pointers
L($1`'$non_lazy_ptr):
.indirect_symbol $1
.long 0
-')
+')')
call L(movl_eip_`'substr($2,1))
movl L($1`'$non_lazy_ptr)-.($2), $2
-')
+',`
+ movl `$'$1, $2
+')')
+
+
+dnl EPILOGUE_cpu
+
+define(`EPILOGUE_cpu',`load_eip`'darwin_bd')
+
+define(`load_eip', `') dnl updated in LEA
+define(`darwin_bd', `') dnl updated in LEA
+
+
+dnl Usage: CALL(funcname)
+dnl
+
+define(`CALL',
+m4_assert_numargs(1)
+`call GSYM_PREFIX`'$1')
+
+undefine(`PIC_WITH_EBX')
divert`'dnl
seta %dl
cmp 20(%esp), %ebp
setae %al
- orb %dl, %al
+ orb %dl, %al C "orb" form to placate Sun tools
jne L(35)
L(8):
mov 60(%esp), %esi C fn
L(fix): seta %dl
cmp 20(%esp), %ebp
setae %al
- orb %dl, %al
+ orb %dl, %al C "orb" form to placate Sun tools
je L(bck)
inc %edi
sub 20(%esp), %ebp
--- /dev/null
+/* Fat binary fallback mpn_com.
+
+Copyright 2003, 2009, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+
+#include "mpn/generic/com.c"
+++ /dev/null
-/* Fat binary fallback mpn_divexact_by3c.
-
-Copyright 2003, 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-
-#include "mpn/generic/diveby3.c"
THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
COMPLETELY IN FUTURE GNU MP RELEASES.
-Copyright 2003, 2004, 2011 Free Software Foundation, Inc.
+Copyright 2003, 2004, 2011, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
/* fat_entry.asm */
-long __gmpn_cpuid __GMP_PROTO ((char dst[12], int id));
-int __gmpn_cpuid_available __GMP_PROTO ((void));
+long __gmpn_cpuid (char [12], int);
+int __gmpn_cpuid_available (void);
#if WANT_FAKE_CPUID
{ "pentiumpro", "GenuineIntel", MAKE_FMS (6, 0) },
{ "pentium2", "GenuineIntel", MAKE_FMS (6, 2) },
{ "pentium3", "GenuineIntel", MAKE_FMS (6, 7) },
- { "pentium4", "GenuineIntel", MAKE_FMS (7, 0) },
+ { "pentium4", "GenuineIntel", MAKE_FMS (15, 2) },
+ { "prescott", "GenuineIntel", MAKE_FMS (15, 3) },
+ { "nocona", "GenuineIntel", MAKE_FMS (15, 4) },
+ { "core2", "GenuineIntel", MAKE_FMS (6, 0xf) },
+ { "coreinhm", "GenuineIntel", MAKE_FMS (6, 0x1a) },
+ { "coreiwsm", "GenuineIntel", MAKE_FMS (6, 0x25) },
+ { "coreisbr", "GenuineIntel", MAKE_FMS (6, 0x2a) },
+ { "atom", "GenuineIntel", MAKE_FMS (6, 0x1c) },
{ "k5", "AuthenticAMD", MAKE_FMS (5, 0) },
{ "k6", "AuthenticAMD", MAKE_FMS (5, 3) },
{ "k62", "AuthenticAMD", MAKE_FMS (5, 8) },
{ "k63", "AuthenticAMD", MAKE_FMS (5, 9) },
{ "athlon", "AuthenticAMD", MAKE_FMS (6, 0) },
- { "x86_64", "AuthenticAMD", MAKE_FMS (15, 0) },
+ { "k8", "AuthenticAMD", MAKE_FMS (15, 0) },
+ { "k10", "AuthenticAMD", MAKE_FMS (16, 0) },
+ { "bobcat", "AuthenticAMD", MAKE_FMS (20, 1) },
+ { "bulldozer", "AuthenticAMD", MAKE_FMS (21, 1) },
{ "viac3", "CentaurHauls", MAKE_FMS (6, 0) },
{ "viac32", "CentaurHauls", MAKE_FMS (6, 9) },
+ { "nano", "CentaurHauls", MAKE_FMS (6, 15) },
};
static int
struct cpuvec_t __gmpn_cpuvec = {
__MPN(add_n_init),
+ 0,
+ 0,
__MPN(addmul_1_init),
+ 0,
+ __MPN(bdiv_dbm1c_init),
+ __MPN(com_init),
__MPN(copyd_init),
__MPN(copyi_init),
__MPN(divexact_1_init),
- __MPN(divexact_by3c_init),
__MPN(divrem_1_init),
__MPN(gcd_1_init),
__MPN(lshift_init),
+ __MPN(lshiftc_init),
__MPN(mod_1_init),
+ __MPN(mod_1_1p_init),
+ __MPN(mod_1_1p_cps_init),
+ __MPN(mod_1s_2p_init),
+ __MPN(mod_1s_2p_cps_init),
+ __MPN(mod_1s_4p_init),
+ __MPN(mod_1s_4p_cps_init),
__MPN(mod_34lsub1_init),
__MPN(modexact_1c_odd_init),
__MPN(mul_1_init),
__MPN(mul_basecase_init),
+ __MPN(mullo_basecase_init),
__MPN(preinv_divrem_1_init),
__MPN(preinv_mod_1_init),
+ __MPN(redc_1_init),
+ __MPN(redc_2_init),
__MPN(rshift_init),
__MPN(sqr_basecase_init),
__MPN(sub_n_init),
+ 0,
__MPN(submul_1_init),
0
};
+int __gmpn_cpuvec_initialized = 0;
/* The following setups start with generic x86, then overwrite with
specifics for a chip, and higher versions of that chip.
case 6:
TRACE (printf (" p6\n"));
CPUVEC_SETUP_p6;
- if (model >= 2)
- {
- TRACE (printf (" pentium2\n"));
+ switch (model)
+ {
+ case 0x00:
+ case 0x01:
+ TRACE (printf (" pentiumpro\n"));
+ break;
+
+ case 0x02:
+ case 0x03:
+ case 0x04:
+ case 0x05:
+ case 0x06:
+ TRACE (printf (" pentium2\n"));
+ CPUVEC_SETUP_p6_mmx;
+ break;
+
+ case 0x07:
+ case 0x08:
+ case 0x0a:
+ case 0x0b:
+ case 0x0c:
+ TRACE (printf (" pentium3\n"));
+ CPUVEC_SETUP_p6_mmx;
+ CPUVEC_SETUP_p6_p3mmx;
+ break;
+
+ case 0x09: /* Banias */
+ case 0x0d: /* Dothan */
+ case 0x0e: /* Yonah */
+ TRACE (printf (" Banias/Bothan/Yonah\n"));
CPUVEC_SETUP_p6_mmx;
- }
- if (model >= 7)
- {
- TRACE (printf (" pentium3\n"));
CPUVEC_SETUP_p6_p3mmx;
- }
- if (model >= 0xD || model == 9)
- {
- TRACE (printf (" p6 with sse2\n"));
CPUVEC_SETUP_p6_sse2;
- }
+ break;
+
+ case 0x0f: /* Conroe Merom Kentsfield Allendale */
+ case 0x10:
+ case 0x11:
+ case 0x12:
+ case 0x13:
+ case 0x14:
+ case 0x15:
+ case 0x16:
+ case 0x17: /* PNR Wolfdale Yorkfield */
+ case 0x18:
+ case 0x19:
+ case 0x1d: /* PNR Dunnington */
+ TRACE (printf (" Conroe\n"));
+ CPUVEC_SETUP_p6_mmx;
+ CPUVEC_SETUP_p6_p3mmx;
+ CPUVEC_SETUP_p6_sse2;
+ CPUVEC_SETUP_core2;
+ break;
+
+ case 0x1c: /* Atom Silverthorne */
+ case 0x26: /* Atom Lincroft */
+ case 0x27: /* Atom Saltwell */
+ case 0x36: /* Atom Cedarview/Saltwell */
+ TRACE (printf (" atom\n"));
+ CPUVEC_SETUP_atom;
+ CPUVEC_SETUP_atom_mmx;
+ CPUVEC_SETUP_atom_sse2;
+ break;
+
+ case 0x1a: /* NHM Gainestown */
+ case 0x1b:
+ case 0x1e: /* NHM Lynnfield/Jasper */
+ case 0x1f:
+ case 0x20:
+ case 0x21:
+ case 0x22:
+ case 0x23:
+ case 0x24:
+ case 0x25: /* WSM Clarkdale/Arrandale */
+ case 0x28:
+ case 0x29:
+ case 0x2b:
+ case 0x2c: /* WSM Gulftown */
+ case 0x2e: /* NHM Beckton */
+ case 0x2f: /* WSM Eagleton */
+ TRACE (printf (" nehalem/westmere\n"));
+ CPUVEC_SETUP_p6_mmx;
+ CPUVEC_SETUP_p6_p3mmx;
+ CPUVEC_SETUP_p6_sse2;
+ CPUVEC_SETUP_core2;
+ CPUVEC_SETUP_coreinhm;
+ break;
+
+ case 0x2a: /* SBR */
+ case 0x2d: /* SBR-EP */
+ case 0x3a: /* IBR */
+ case 0x3c: /* Haswell */
+ TRACE (printf (" sandybridge\n"));
+ CPUVEC_SETUP_p6_mmx;
+ CPUVEC_SETUP_p6_p3mmx;
+ CPUVEC_SETUP_p6_sse2;
+ CPUVEC_SETUP_core2;
+ CPUVEC_SETUP_coreinhm;
+ CPUVEC_SETUP_coreisbr;
+ break;
+ }
break;
case 15:
break;
case 6:
TRACE (printf (" athlon\n"));
- athlon:
CPUVEC_SETUP_k7;
CPUVEC_SETUP_k7_mmx;
break;
- case 15:
- TRACE (printf (" x86_64\n"));
- goto athlon;
+
+ case 0x0f: /* k8 */
+ case 0x11: /* "fam 11h", mix of k8 and k10 */
+ case 0x13: /* unknown, conservativeky assume k8 */
+ case 0x16: /* unknown, conservativeky assume k8 */
+ case 0x17: /* unknown, conservativeky assume k8 */
+ TRACE (printf (" k8\n"));
+ CPUVEC_SETUP_k7;
+ CPUVEC_SETUP_k7_mmx;
+ CPUVEC_SETUP_k8;
+ break;
+
+ case 0x10: /* k10 */
+ case 0x12: /* k10 (llano) */
+ TRACE (printf (" k10\n"));
+ CPUVEC_SETUP_k7;
+ CPUVEC_SETUP_k7_mmx;
+ break;
+
+ case 0x14: /* bobcat */
+ TRACE (printf (" bobcat\n"));
+ CPUVEC_SETUP_k7;
+ CPUVEC_SETUP_k7_mmx;
+ CPUVEC_SETUP_bobcat;
+ break;
+
+ case 0x15: /* bulldozer */
+ TRACE (printf (" bulldozer\n"));
+ CPUVEC_SETUP_k7;
+ CPUVEC_SETUP_k7_mmx;
+ break;
}
}
else if (strcmp (vendor_string, "CentaurHauls") == 0)
{
TRACE (printf (" viac32\n"));
}
+ if (model >= 15)
+ {
+ TRACE (printf (" nano\n"));
+ CPUVEC_SETUP_nano;
+ }
break;
}
}
/* Set this once the threshold fields are ready.
Use volatile to prevent it getting moved. */
- ((volatile struct cpuvec_t *) &__gmpn_cpuvec)->initialized = 1;
+ *((volatile int *) &__gmpn_cpuvec_initialized) = 1;
}
dnl x86 fat binary entrypoints.
-dnl Copyright 2003 Free Software Foundation, Inc.
+dnl Copyright 2003, 2012 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
L(fat_init):
C al __gmpn_cpuvec byte offset
- movsbl %al, %eax
+ movzbl %al, %eax
pushl %eax
ifdef(`PIC',`
/* Fat binary x86 gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2011 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
preinv. */
#define USE_PREINV_DIVREM_1 1
+#define BMOD_1_TO_MOD_1_THRESHOLD 20
+
/* mpn_sqr_basecase is faster than mpn_mul_basecase at all sizes, no need
for mpn_sqr to call the latter. */
#define SQR_BASECASE_THRESHOLD 0
--- /dev/null
+/* Fat binary fallback mpn_lshiftc.
+
+Copyright 2003, 2009, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+
+#include "mpn/generic/lshiftc.c"
--- /dev/null
+/* Fat binary fallback mpn_mod_1_1p.
+
+Copyright 2003, 2009, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+/*
+PROLOGUE(mpn_mod_1_1p_cps)
+*/
+
+#define OPERATION_mod_1_1_cps 1
+#include "mpn/generic/mod_1_1.c"
--- /dev/null
+/* Fat binary fallback mpn_mod_1s_2p.
+
+Copyright 2003, 2009, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+/*
+PROLOGUE(mpn_mod_1s_2p_cps)
+*/
+
+#define OPERATION_mod_1_2_cps 1
+#include "mpn/generic/mod_1_2.c"
--- /dev/null
+/* Fat binary fallback mpn_mod_1s_4p.
+
+Copyright 2003, 2009, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+/*
+PROLOGUE(mpn_mod_1s_4p_cps)
+*/
+
+#define OPERATION_mod_1_4_cps 1
+#include "mpn/generic/mod_1_4.c"
--- /dev/null
+/* Fat binary fallback mpn_mullo_basecase.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+
+#include "mpn/generic/mullo_basecase.c"
--- /dev/null
+/* Fat binary fallback mpn_redc_1.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+
+#include "mpn/generic/redc_1.c"
--- /dev/null
+/* Fat binary fallback mpn_redc_2.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+
+#include "mpn/generic/redc_2.c"
--- /dev/null
+/* Generic x86 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2011 Free Software Foundation,
+Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* Generated by tuneup.c, 2011-01-30, gcc 3.4 */
+
+#define MOD_1_NORM_THRESHOLD 6
+#define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 17
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 9
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 14
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
+#define USE_PREINV_DIVREM_1 0
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD 42
+
+#define MUL_TOOM22_THRESHOLD 18
+#define MUL_TOOM33_THRESHOLD 66
+#define MUL_TOOM44_THRESHOLD 105
+#define MUL_TOOM6H_THRESHOLD 141
+#define MUL_TOOM8H_THRESHOLD 212
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 62
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 69
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 65
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 67
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 33
+#define SQR_TOOM3_THRESHOLD 60
+#define SQR_TOOM4_THRESHOLD 136
+#define SQR_TOOM6_THRESHOLD 196
+#define SQR_TOOM8_THRESHOLD 292
+
+#define MULMOD_BNM1_THRESHOLD 14
+#define SQRMOD_BNM1_THRESHOLD 16
+
+#define MUL_FFT_MODF_THRESHOLD 468 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 468, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
+ { 11, 5}, { 23, 6}, { 21, 7}, { 11, 6}, \
+ { 25, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \
+ { 31, 7}, { 21, 8}, { 11, 7}, { 27, 8}, \
+ { 15, 7}, { 33, 8}, { 19, 7}, { 39, 8}, \
+ { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \
+ { 39, 9}, { 23, 8}, { 47,10}, { 15, 9}, \
+ { 31, 8}, { 67, 9}, { 39, 8}, { 79, 9}, \
+ { 47, 8}, { 95, 9}, { 55,10}, { 31, 9}, \
+ { 63, 8}, { 127, 9}, { 79,10}, { 47, 9}, \
+ { 95,11}, { 31,10}, { 63, 9}, { 135,10}, \
+ { 79, 9}, { 159,10}, { 95, 9}, { 191,11}, \
+ { 63,10}, { 127, 9}, { 255,10}, { 143, 9}, \
+ { 287,10}, { 159,11}, { 95,10}, { 191, 9}, \
+ { 383,12}, { 4096,13}, { 8192,14}, { 16384,15}, \
+ { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 61
+#define MUL_FFT_THRESHOLD 5504
+
+#define SQR_FFT_MODF_THRESHOLD 396 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 396, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
+ { 21, 7}, { 11, 6}, { 24, 7}, { 13, 6}, \
+ { 27, 7}, { 15, 6}, { 31, 7}, { 21, 8}, \
+ { 11, 7}, { 27, 8}, { 15, 7}, { 33, 8}, \
+ { 19, 7}, { 39, 8}, { 23, 7}, { 47, 8}, \
+ { 27, 9}, { 15, 8}, { 39, 9}, { 23, 8}, \
+ { 51,10}, { 15, 9}, { 31, 8}, { 67, 9}, \
+ { 39, 8}, { 79, 9}, { 47, 8}, { 95, 9}, \
+ { 55,10}, { 31, 9}, { 79,10}, { 47, 9}, \
+ { 95,11}, { 31,10}, { 63, 9}, { 127, 8}, \
+ { 255, 9}, { 135,10}, { 79, 9}, { 159, 8}, \
+ { 319,10}, { 95, 9}, { 191,11}, { 63,10}, \
+ { 127, 9}, { 255, 8}, { 511,10}, { 143, 9}, \
+ { 287, 8}, { 575,10}, { 159,11}, { 95,10}, \
+ { 191,12}, { 4096,13}, { 8192,14}, { 16384,15}, \
+ { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 61
+#define SQR_FFT_THRESHOLD 3712
+
+#define MULLO_BASECASE_THRESHOLD 3
+#define MULLO_DC_THRESHOLD 37
+#define MULLO_MUL_N_THRESHOLD 10950
+
+#define DC_DIV_QR_THRESHOLD 59
+#define DC_DIVAPPR_Q_THRESHOLD 189
+#define DC_BDIV_QR_THRESHOLD 55
+#define DC_BDIV_Q_THRESHOLD 136
+
+#define INV_MULMOD_BNM1_THRESHOLD 50
+#define INV_NEWTON_THRESHOLD 183
+#define INV_APPR_THRESHOLD 181
+
+#define BINV_NEWTON_THRESHOLD 204
+#define REDC_1_TO_REDC_N_THRESHOLD 54
+
+#define MU_DIV_QR_THRESHOLD 1142
+#define MU_DIVAPPR_Q_THRESHOLD 1142
+#define MUPI_DIV_QR_THRESHOLD 81
+#define MU_BDIV_QR_THRESHOLD 889
+#define MU_BDIV_Q_THRESHOLD 998
+
+#define MATRIX22_STRASSEN_THRESHOLD 13
+#define HGCD_THRESHOLD 133
+#define GCD_DC_THRESHOLD 451
+#define GCDEXT_DC_THRESHOLD 318
+#define JACOBI_BASE_METHOD 1
+
+#define GET_STR_DC_THRESHOLD 15
+#define GET_STR_PRECOMPUTE_THRESHOLD 30
+#define SET_STR_DC_THRESHOLD 547
+#define SET_STR_PRECOMPUTE_THRESHOLD 1049
+++ /dev/null
-dnl x86 mpn_invert_limb
-
-dnl Contributed to the GNU project by Niels Möller
-
-dnl Copyright 2009, 2011 Free Software Foundation, Inc.
-dnl
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or
-dnl modify it under the terms of the GNU Lesser General Public License as
-dnl published by the Free Software Foundation; either version 3 of the
-dnl License, or (at your option) any later version.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl Lesser General Public License for more details.
-dnl
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles (approx) div
-C K7: 46 53
-
-C Register usage:
-C Input D in %edi
-C Current approximation is in %eax and/or %ecx
-C %ebx and %edx are temporaries
-C %esi and %ebp are unused
-
-defframe(PARAM_DIVISOR,4)
-
-ASM_START()
-
-C Make approx_tab global to work around Apple relocation bug.
-ifdef(`DARWIN',`
- define(`approx_tab', MPN(invert_limb_tab))
- GLOBL approx_tab')
-
- TEXT
- ALIGN(16)
-PROLOGUE(mpn_invert_limb)
-deflit(`FRAME', 0)
- C Adding the unnecessary push of %ebp and the corresponding pop seems
- C to *reduce* running time from 46 to 43 cycles on K7. Don't know if
- C this is a benchmark artefact or some alignment issue.
-
- push %ebx FRAME_pushl()
- C push %ebp FRAME_pushl()
- push %edi FRAME_pushl()
-
- mov PARAM_DIVISOR, %edi
- mov %edi, %eax
- shr $22, %eax
-ifdef(`PIC',`
- LEA( approx_tab, %ebx)
- movzwl -1024(%ebx, %eax, 2), %eax
-',`
- movzwl -1024+approx_tab`'(%eax, %eax), %eax C %eax = v0
-')
-
- C v1 = (v0 << 4) - ((v0*v0*d_21) >> 32) - 1
- mov %eax, %ecx
- imul %eax, %eax
- mov %edi, %ebx
- shr $11, %ebx
- inc %ebx
- mul %ebx
- mov %edi, %ebx C Prepare
- shr %ebx
- sbb %eax, %eax
- sub %eax, %ebx C %ebx = d_31, %eax = mask
- shl $4, %ecx
- dec %ecx
- sub %edx, %ecx C %ecx = v1
-
- C v_2 = (v1 << 15) + ((v1 *(2^48 - v1 * d31 + (v1 >> 1) & mask)) >> 33)
- imul %ecx, %ebx
- and %ecx, %eax
- shr %eax
- sub %ebx, %eax
- mul %ecx
- mov %edi, %eax C Prepare for next mul
- shl $15, %ecx
- shr %edx
- add %edx, %ecx C %ecx = v2
-
- mul %ecx
- add %edi, %eax
- mov %ecx, %eax
- adc %edi, %edx
- sub %edx, %eax C %eax = v3
-
- pop %edi
- C pop %ebp
- pop %ebx
-
- ret
-
-EPILOGUE()
-
-DEF_OBJECT(approx_tab,2)
- .value 0x7fe1,0x7fa1,0x7f61,0x7f22,0x7ee3,0x7ea4,0x7e65,0x7e27
- .value 0x7de9,0x7dab,0x7d6d,0x7d30,0x7cf3,0x7cb6,0x7c79,0x7c3d
- .value 0x7c00,0x7bc4,0x7b89,0x7b4d,0x7b12,0x7ad7,0x7a9c,0x7a61
- .value 0x7a27,0x79ec,0x79b2,0x7979,0x793f,0x7906,0x78cc,0x7894
- .value 0x785b,0x7822,0x77ea,0x77b2,0x777a,0x7742,0x770b,0x76d3
- .value 0x769c,0x7665,0x762f,0x75f8,0x75c2,0x758c,0x7556,0x7520
- .value 0x74ea,0x74b5,0x7480,0x744b,0x7416,0x73e2,0x73ad,0x7379
- .value 0x7345,0x7311,0x72dd,0x72aa,0x7277,0x7243,0x7210,0x71de
- .value 0x71ab,0x7179,0x7146,0x7114,0x70e2,0x70b1,0x707f,0x704e
- .value 0x701c,0x6feb,0x6fba,0x6f8a,0x6f59,0x6f29,0x6ef9,0x6ec8
- .value 0x6e99,0x6e69,0x6e39,0x6e0a,0x6ddb,0x6dab,0x6d7d,0x6d4e
- .value 0x6d1f,0x6cf1,0x6cc2,0x6c94,0x6c66,0x6c38,0x6c0a,0x6bdd
- .value 0x6bb0,0x6b82,0x6b55,0x6b28,0x6afb,0x6acf,0x6aa2,0x6a76
- .value 0x6a49,0x6a1d,0x69f1,0x69c6,0x699a,0x696e,0x6943,0x6918
- .value 0x68ed,0x68c2,0x6897,0x686c,0x6842,0x6817,0x67ed,0x67c3
- .value 0x6799,0x676f,0x6745,0x671b,0x66f2,0x66c8,0x669f,0x6676
- .value 0x664d,0x6624,0x65fc,0x65d3,0x65aa,0x6582,0x655a,0x6532
- .value 0x650a,0x64e2,0x64ba,0x6493,0x646b,0x6444,0x641c,0x63f5
- .value 0x63ce,0x63a7,0x6381,0x635a,0x6333,0x630d,0x62e7,0x62c1
- .value 0x629a,0x6275,0x624f,0x6229,0x6203,0x61de,0x61b8,0x6193
- .value 0x616e,0x6149,0x6124,0x60ff,0x60da,0x60b6,0x6091,0x606d
- .value 0x6049,0x6024,0x6000,0x5fdc,0x5fb8,0x5f95,0x5f71,0x5f4d
- .value 0x5f2a,0x5f07,0x5ee3,0x5ec0,0x5e9d,0x5e7a,0x5e57,0x5e35
- .value 0x5e12,0x5def,0x5dcd,0x5dab,0x5d88,0x5d66,0x5d44,0x5d22
- .value 0x5d00,0x5cde,0x5cbd,0x5c9b,0x5c7a,0x5c58,0x5c37,0x5c16
- .value 0x5bf5,0x5bd4,0x5bb3,0x5b92,0x5b71,0x5b51,0x5b30,0x5b10
- .value 0x5aef,0x5acf,0x5aaf,0x5a8f,0x5a6f,0x5a4f,0x5a2f,0x5a0f
- .value 0x59ef,0x59d0,0x59b0,0x5991,0x5972,0x5952,0x5933,0x5914
- .value 0x58f5,0x58d6,0x58b7,0x5899,0x587a,0x585b,0x583d,0x581f
- .value 0x5800,0x57e2,0x57c4,0x57a6,0x5788,0x576a,0x574c,0x572e
- .value 0x5711,0x56f3,0x56d5,0x56b8,0x569b,0x567d,0x5660,0x5643
- .value 0x5626,0x5609,0x55ec,0x55cf,0x55b2,0x5596,0x5579,0x555d
- .value 0x5540,0x5524,0x5507,0x54eb,0x54cf,0x54b3,0x5497,0x547b
- .value 0x545f,0x5443,0x5428,0x540c,0x53f0,0x53d5,0x53b9,0x539e
- .value 0x5383,0x5368,0x534c,0x5331,0x5316,0x52fb,0x52e0,0x52c6
- .value 0x52ab,0x5290,0x5276,0x525b,0x5240,0x5226,0x520c,0x51f1
- .value 0x51d7,0x51bd,0x51a3,0x5189,0x516f,0x5155,0x513b,0x5121
- .value 0x5108,0x50ee,0x50d5,0x50bb,0x50a2,0x5088,0x506f,0x5056
- .value 0x503c,0x5023,0x500a,0x4ff1,0x4fd8,0x4fbf,0x4fa6,0x4f8e
- .value 0x4f75,0x4f5c,0x4f44,0x4f2b,0x4f13,0x4efa,0x4ee2,0x4eca
- .value 0x4eb1,0x4e99,0x4e81,0x4e69,0x4e51,0x4e39,0x4e21,0x4e09
- .value 0x4df1,0x4dda,0x4dc2,0x4daa,0x4d93,0x4d7b,0x4d64,0x4d4d
- .value 0x4d35,0x4d1e,0x4d07,0x4cf0,0x4cd8,0x4cc1,0x4caa,0x4c93
- .value 0x4c7d,0x4c66,0x4c4f,0x4c38,0x4c21,0x4c0b,0x4bf4,0x4bde
- .value 0x4bc7,0x4bb1,0x4b9a,0x4b84,0x4b6e,0x4b58,0x4b41,0x4b2b
- .value 0x4b15,0x4aff,0x4ae9,0x4ad3,0x4abd,0x4aa8,0x4a92,0x4a7c
- .value 0x4a66,0x4a51,0x4a3b,0x4a26,0x4a10,0x49fb,0x49e5,0x49d0
- .value 0x49bb,0x49a6,0x4990,0x497b,0x4966,0x4951,0x493c,0x4927
- .value 0x4912,0x48fe,0x48e9,0x48d4,0x48bf,0x48ab,0x4896,0x4881
- .value 0x486d,0x4858,0x4844,0x482f,0x481b,0x4807,0x47f3,0x47de
- .value 0x47ca,0x47b6,0x47a2,0x478e,0x477a,0x4766,0x4752,0x473e
- .value 0x472a,0x4717,0x4703,0x46ef,0x46db,0x46c8,0x46b4,0x46a1
- .value 0x468d,0x467a,0x4666,0x4653,0x4640,0x462c,0x4619,0x4606
- .value 0x45f3,0x45e0,0x45cd,0x45ba,0x45a7,0x4594,0x4581,0x456e
- .value 0x455b,0x4548,0x4536,0x4523,0x4510,0x44fe,0x44eb,0x44d8
- .value 0x44c6,0x44b3,0x44a1,0x448f,0x447c,0x446a,0x4458,0x4445
- .value 0x4433,0x4421,0x440f,0x43fd,0x43eb,0x43d9,0x43c7,0x43b5
- .value 0x43a3,0x4391,0x437f,0x436d,0x435c,0x434a,0x4338,0x4327
- .value 0x4315,0x4303,0x42f2,0x42e0,0x42cf,0x42bd,0x42ac,0x429b
- .value 0x4289,0x4278,0x4267,0x4256,0x4244,0x4233,0x4222,0x4211
- .value 0x4200,0x41ef,0x41de,0x41cd,0x41bc,0x41ab,0x419a,0x418a
- .value 0x4179,0x4168,0x4157,0x4147,0x4136,0x4125,0x4115,0x4104
- .value 0x40f4,0x40e3,0x40d3,0x40c2,0x40b2,0x40a2,0x4091,0x4081
- .value 0x4071,0x4061,0x4050,0x4040,0x4030,0x4020,0x4010,0x4000
-END_OBJECT(approx_tab)
--- /dev/null
+/* x86/k10 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* Generated by tuneup.c, 2011-11-25, gcc 4.2 */
+
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 12
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 6
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 13
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 15
+#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD 31
+
+#define MUL_TOOM22_THRESHOLD 26
+#define MUL_TOOM33_THRESHOLD 85
+#define MUL_TOOM44_THRESHOLD 130
+#define MUL_TOOM6H_THRESHOLD 206
+#define MUL_TOOM8H_THRESHOLD 309
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 80
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 91
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 89
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 90
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 112
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 40
+#define SQR_TOOM3_THRESHOLD 81
+#define SQR_TOOM4_THRESHOLD 178
+#define SQR_TOOM6_THRESHOLD 266
+#define SQR_TOOM8_THRESHOLD 357
+
+#define MULMID_TOOM42_THRESHOLD 54
+
+#define MULMOD_BNM1_THRESHOLD 17
+#define SQRMOD_BNM1_THRESHOLD 17
+
+#define MUL_FFT_MODF_THRESHOLD 606 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 786, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \
+ { 25, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \
+ { 33, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \
+ { 39, 7}, { 23, 6}, { 47, 7}, { 27, 8}, \
+ { 15, 7}, { 31, 6}, { 63, 7}, { 35, 8}, \
+ { 19, 7}, { 41, 8}, { 23, 7}, { 47, 8}, \
+ { 27, 9}, { 15, 8}, { 31, 7}, { 63, 8}, \
+ { 39, 9}, { 23, 8}, { 51, 9}, { 31, 8}, \
+ { 63, 9}, { 39, 8}, { 83, 9}, { 47,10}, \
+ { 31, 9}, { 63, 8}, { 127, 9}, { 79,10}, \
+ { 47, 9}, { 95,11}, { 31,10}, { 63, 9}, \
+ { 135,10}, { 79, 9}, { 159,10}, { 95, 9}, \
+ { 191,10}, { 111,11}, { 63,10}, { 127, 9}, \
+ { 255, 7}, { 1023, 8}, { 543, 9}, { 279,10}, \
+ { 159,11}, { 95,10}, { 191,12}, { 63,11}, \
+ { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \
+ { 543, 8}, { 1087,10}, { 287,11}, { 159, 9}, \
+ { 671,11}, { 191,10}, { 399, 9}, { 799,12}, \
+ { 4096,13}, { 8192,14}, { 16384,15}, { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 76
+#define MUL_FFT_THRESHOLD 6784
+
+#define SQR_FFT_MODF_THRESHOLD 505 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 660, 5}, { 25, 6}, { 13, 5}, { 28, 6}, \
+ { 25, 7}, { 13, 6}, { 28, 7}, { 15, 6}, \
+ { 31, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \
+ { 39, 7}, { 23, 6}, { 47, 7}, { 27, 8}, \
+ { 15, 7}, { 35, 8}, { 19, 7}, { 39, 8}, \
+ { 23, 7}, { 47, 8}, { 31, 7}, { 63, 8}, \
+ { 35, 7}, { 71, 8}, { 39, 9}, { 23, 8}, \
+ { 55,10}, { 15, 9}, { 31, 8}, { 63, 9}, \
+ { 39, 8}, { 79, 9}, { 47, 8}, { 95, 9}, \
+ { 55,10}, { 31, 9}, { 79,10}, { 47, 9}, \
+ { 95,11}, { 31,10}, { 63, 9}, { 135,10}, \
+ { 79, 9}, { 167,10}, { 95,11}, { 63,10}, \
+ { 159,11}, { 95, 8}, { 799,12}, { 63,11}, \
+ { 127,10}, { 255, 9}, { 543,11}, { 159, 9}, \
+ { 639,10}, { 367,11}, { 191,10}, { 383, 9}, \
+ { 799,10}, { 415,11}, { 223,12}, { 4096,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 67
+#define SQR_FFT_THRESHOLD 4736
+
+#define MULLO_BASECASE_THRESHOLD 7
+#define MULLO_DC_THRESHOLD 42
+#define MULLO_MUL_N_THRESHOLD 13463
+
+#define DC_DIV_QR_THRESHOLD 56
+#define DC_DIVAPPR_Q_THRESHOLD 270
+#define DC_BDIV_QR_THRESHOLD 55
+#define DC_BDIV_Q_THRESHOLD 182
+
+#define INV_MULMOD_BNM1_THRESHOLD 62
+#define INV_NEWTON_THRESHOLD 260
+#define INV_APPR_THRESHOLD 270
+
+#define BINV_NEWTON_THRESHOLD 276
+#define REDC_1_TO_REDC_N_THRESHOLD 71
+
+#define MU_DIV_QR_THRESHOLD 1652
+#define MU_DIVAPPR_Q_THRESHOLD 1652
+#define MUPI_DIV_QR_THRESHOLD 130
+#define MU_BDIV_QR_THRESHOLD 1499
+#define MU_BDIV_Q_THRESHOLD 1528
+
+#define POWM_SEC_TABLE 4,23,228,947
+
+#define MATRIX22_STRASSEN_THRESHOLD 16
+#define HGCD_THRESHOLD 132
+#define HGCD_APPR_THRESHOLD 180
+#define HGCD_REDUCE_THRESHOLD 3134
+#define GCD_DC_THRESHOLD 630
+#define GCDEXT_DC_THRESHOLD 432
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 12
+#define GET_STR_PRECOMPUTE_THRESHOLD 23
+#define SET_STR_DC_THRESHOLD 208
+#define SET_STR_PRECOMPUTE_THRESHOLD 1254
+
+#define FAC_DSC_THRESHOLD 208
+#define FAC_ODD_THRESHOLD 29
include(`../config.m4')
-C cycles/limb
-C P5:
-C P6 model 0-8,10-12) 5.94
-C P6 model 9 (Banias)
-C P6 model 13 (Dothan) 5.57
+C cycles/limb
+C P5
+C P6 model 0-8,10-12 5.94
+C P6 model 9 (Banias) 5.51
+C P6 model 13 (Dothan) 5.57
C P4 model 0 (Willamette)
C P4 model 1 (?)
C P4 model 2 (Northwood)
C P4 model 3 (Prescott)
C P4 model 4 (Nocona)
-C K6: 7.65-8.5 (data dependent)
-C K7:
-C K8:
+C AMD K6 7.65-8.5 (data dependent)
+C AMD K7
+C AMD K8
dnl K6: large multipliers small multipliers
#define MOD_1_NORM_THRESHOLD 12
#define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 28
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 18
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD MP_SIZE_T_MAX
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 82
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 41
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 32
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 3
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 128
#define USE_PREINV_DIVREM_1 0
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
#define BMOD_1_TO_MOD_1_THRESHOLD MP_SIZE_T_MAX /* never */
include(`../config.m4')
-C cycles/limb
-C P5:
-C P6 model 0-8,10-12) 5.5
+C cycles/limb
+C P5
+C P6 model 0-8,10-12 5.5
C P6 model 9 (Banias)
-C P6 model 13 (Dothan) 4.87
+C P6 model 13 (Dothan) 4.87
C P4 model 0 (Willamette)
C P4 model 1 (?)
C P4 model 2 (Northwood)
C P4 model 3 (Prescott)
C P4 model 4 (Nocona)
-C K6: 6.25
-C K7:
-C K8:
+C AMD K6 6.25
+C AMD K7
+C AMD K8
C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
--- /dev/null
+dnl AMD K7 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C This is an attempt at an addlsh1_n for x86-32, not relying on sse2 insns.
+C The innerloop is 2*3-way unrolled, which is best we can do with the available
+C registers. It seems tricky to use the same structure for rsblsh1_n, since we
+C cannot feed carry between operations there.
+
+C cycles/limb
+C P5
+C P6 model 0-8,10-12
+C P6 model 9 (Banias)
+C P6 model 13 (Dothan) 5.4 (worse than add_n + lshift)
+C P4 model 0 (Willamette)
+C P4 model 1 (?)
+C P4 model 2 (Northwood)
+C P4 model 3 (Prescott)
+C P4 model 4 (Nocona)
+C Intel Atom 6
+C AMD K6 ?
+C AMD K7 2.5
+C AMD K8
+
+C This is a basic addlsh1_n for k7, atom, and perhaps some other x86-32
+C processors. It uses 2*3-way unrolling, for good reasons. Unfortunately,
+C that means we need an initial magic multiply.
+C
+C It is not clear how to do sublsh1_n or rsblsh1_n using the same pattern. We
+C cannot do rsblsh1_n since we feed carry from the shift blocks to the
+C add/subtract blocks, which is right for addition but reversed for
+C subtraction. We could perhaps do sublsh1_n, with some extra move insns,
+C without losing any time, since we're not issue limited but carry recurrency
+C latency.
+C
+C Breaking carry recurrency might be a good idea. We would then need separate
+C registers for the shift carry and add/subtract carry, which in turn would
+C force is to 2*2-way unrolling.
+
+defframe(PARAM_SIZE, 16)
+defframe(PARAM_DBLD, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+dnl re-use parameter space
+define(VAR_COUNT,`PARAM_DST')
+define(VAR_TMP,`PARAM_DBLD')
+
+ASM_START()
+ TEXT
+ ALIGN(8)
+PROLOGUE(mpn_addlsh1_n)
+deflit(`FRAME',0)
+
+define(`rp', `%edi')
+define(`up', `%esi')
+define(`vp', `%ebp')
+
+ mov $0x2aaaaaab, %eax
+
+ push %ebx FRAME_pushl()
+ mov PARAM_SIZE, %ebx C size
+
+ push rp FRAME_pushl()
+ mov PARAM_DST, rp
+
+ mul %ebx
+
+ push up FRAME_pushl()
+ mov PARAM_SRC, up
+
+ not %edx C count = -(size\8)-1
+ mov %edx, VAR_COUNT
+
+ push vp FRAME_pushl()
+ mov PARAM_DBLD, vp
+
+ lea 3(%edx,%edx,2), %ecx C count*3+3 = -(size\6)*3
+ xor %edx, %edx
+ lea (%ebx,%ecx,2), %ebx C size + (count*3+3)*2 = size % 6
+ or %ebx, %ebx
+ jz L(exact)
+
+L(oop):
+ifdef(`CPU_P6',`
+ shr %edx ') C restore 2nd saved carry bit
+ mov (vp), %eax
+ adc %eax, %eax
+ rcr %edx C restore 1st saved carry bit
+ lea 4(vp), vp
+ adc (up), %eax
+ lea 4(up), up
+ adc %edx, %edx C save a carry bit in edx
+ifdef(`CPU_P6',`
+ adc %edx, %edx ') C save another carry bit in edx
+ dec %ebx
+ mov %eax, (rp)
+ lea 4(rp), rp
+ jnz L(oop)
+ mov vp, VAR_TMP
+L(exact):
+ incl VAR_COUNT
+ jz L(end)
+
+ ALIGN(16)
+L(top):
+ifdef(`CPU_P6',`
+ shr %edx ') C restore 2nd saved carry bit
+ mov (vp), %eax
+ adc %eax, %eax
+ mov 4(vp), %ebx
+ adc %ebx, %ebx
+ mov 8(vp), %ecx
+ adc %ecx, %ecx
+
+ rcr %edx C restore 1st saved carry bit
+
+ adc (up), %eax
+ mov %eax, (rp)
+ adc 4(up), %ebx
+ mov %ebx, 4(rp)
+ adc 8(up), %ecx
+ mov %ecx, 8(rp)
+
+ mov 12(vp), %eax
+ adc %eax, %eax
+ mov 16(vp), %ebx
+ adc %ebx, %ebx
+ mov 20(vp), %ecx
+ adc %ecx, %ecx
+
+ lea 24(vp), vp
+ adc %edx, %edx C save a carry bit in edx
+
+ adc 12(up), %eax
+ mov %eax, 12(rp)
+ adc 16(up), %ebx
+ mov %ebx, 16(rp)
+ adc 20(up), %ecx
+
+ lea 24(up), up
+
+ifdef(`CPU_P6',`
+ adc %edx, %edx ') C save another carry bit in edx
+ mov %ecx, 20(rp)
+ incl VAR_COUNT
+ lea 24(rp), rp
+ jne L(top)
+
+L(end):
+ pop vp FRAME_popl()
+ pop up FRAME_popl()
+
+ifdef(`CPU_P6',`
+ xor %eax, %eax
+ shr $1, %edx
+ adc %edx, %eax
+',`
+ adc $0, %edx
+ mov %edx, %eax
+')
+ pop rp FRAME_popl()
+ pop %ebx FRAME_popl()
+ ret
+EPILOGUE()
+ASM_END()
include(`../config.m4')
-C cycles/limb
-C P5:
-C P6 model 0-8,10-12)
-C P6 model 9 (Banias)
+C cycles/limb
+C P5
+C P6 model 0-8,10-12
+C P6 model 9 (Banias) 6.5
C P6 model 13 (Dothan)
C P4 model 0 (Willamette)
C P4 model 1 (?)
C P4 model 2 (Northwood)
C P4 model 3 (Prescott)
C P4 model 4 (Nocona)
-C K6:
-C K7: 3.75
-C K8:
+C AMD K6
+C AMD K7 3.75
+C AMD K8
C TODO
C * Improve feed-in and wind-down code. We beat the old code for all n != 1,
--- /dev/null
+dnl AMD K7 mpn_bdiv_q_1 -- mpn by limb exact division.
+
+dnl Copyright 2001, 2002, 2004, 2007, 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl Rearranged from mpn/x86/k7/dive_1.asm by Marco Bodrato.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb
+C Athlon: 11.0
+C Hammer: 9.0
+
+
+C void mpn_divexact_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t divisor);
+C
+C The dependent chain is mul+imul+sub for 11 cycles and that speed is
+C achieved with no special effort. The load and shrld latencies are hidden
+C by out of order execution.
+C
+C It's a touch faster on size==1 to use the mul-by-inverse than divl.
+
+defframe(PARAM_SHIFT, 24)
+defframe(PARAM_INVERSE,20)
+defframe(PARAM_DIVISOR,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+defframe(SAVE_EBX, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EDI, -12)
+defframe(SAVE_EBP, -16)
+defframe(VAR_INVERSE, -20)
+defframe(VAR_DST_END, -24)
+
+deflit(STACK_SPACE, 24)
+
+ TEXT
+
+C mp_limb_t
+C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C mp_limb_t inverse, int shift)
+ ALIGN(16)
+PROLOGUE(mpn_pi1_bdiv_q_1)
+deflit(`FRAME',0)
+
+ subl $STACK_SPACE, %esp deflit(`FRAME',STACK_SPACE)
+ movl PARAM_SHIFT, %ecx C shift count
+
+ movl %ebp, SAVE_EBP
+ movl PARAM_SIZE, %ebp
+
+ movl %esi, SAVE_ESI
+ movl PARAM_SRC, %esi
+
+ movl %edi, SAVE_EDI
+ movl PARAM_DST, %edi
+
+ movl %ebx, SAVE_EBX
+
+ leal (%esi,%ebp,4), %esi C src end
+ leal (%edi,%ebp,4), %edi C dst end
+ negl %ebp C -size
+
+ movl PARAM_INVERSE, %eax C inv
+
+L(common):
+ movl %eax, VAR_INVERSE
+ movl (%esi,%ebp,4), %eax C src[0]
+
+ incl %ebp
+ jz L(one)
+
+ movl (%esi,%ebp,4), %edx C src[1]
+
+ shrdl( %cl, %edx, %eax)
+
+ movl %edi, VAR_DST_END
+ xorl %ebx, %ebx
+ jmp L(entry)
+
+ ALIGN(8)
+L(top):
+ C eax q
+ C ebx carry bit, 0 or 1
+ C ecx shift
+ C edx
+ C esi src end
+ C edi dst end
+ C ebp counter, limbs, negative
+
+ mull PARAM_DIVISOR C carry limb in edx
+
+ movl -4(%esi,%ebp,4), %eax
+ movl (%esi,%ebp,4), %edi
+
+ shrdl( %cl, %edi, %eax)
+
+ subl %ebx, %eax C apply carry bit
+ setc %bl
+ movl VAR_DST_END, %edi
+
+ subl %edx, %eax C apply carry limb
+ adcl $0, %ebx
+
+L(entry):
+ imull VAR_INVERSE, %eax
+
+ movl %eax, -4(%edi,%ebp,4)
+ incl %ebp
+ jnz L(top)
+
+
+ mull PARAM_DIVISOR C carry limb in edx
+
+ movl -4(%esi), %eax C src high limb
+ shrl %cl, %eax
+ movl SAVE_ESI, %esi
+
+ subl %ebx, %eax C apply carry bit
+ movl SAVE_EBX, %ebx
+ movl SAVE_EBP, %ebp
+
+ subl %edx, %eax C apply carry limb
+
+ imull VAR_INVERSE, %eax
+
+ movl %eax, -4(%edi)
+ movl SAVE_EDI, %edi
+ addl $STACK_SPACE, %esp
+
+ ret
+
+L(one):
+ shrl %cl, %eax
+ movl SAVE_ESI, %esi
+ movl SAVE_EBX, %ebx
+
+ imull VAR_INVERSE, %eax
+
+ movl SAVE_EBP, %ebp
+
+ movl %eax, -4(%edi)
+ movl SAVE_EDI, %edi
+ addl $STACK_SPACE, %esp
+
+ ret
+EPILOGUE()
+
+C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t divisor);
+C
+
+ ALIGN(16)
+PROLOGUE(mpn_bdiv_q_1)
+deflit(`FRAME',0)
+
+ movl PARAM_DIVISOR, %eax
+ subl $STACK_SPACE, %esp deflit(`FRAME',STACK_SPACE)
+ movl $-1, %ecx C shift count
+
+ movl %ebp, SAVE_EBP
+ movl PARAM_SIZE, %ebp
+
+ movl %esi, SAVE_ESI
+ movl %edi, SAVE_EDI
+
+ C If there's usually only one or two trailing zero bits then this
+ C should be faster than bsfl.
+L(strip_twos):
+ incl %ecx
+ shrl %eax
+ jnc L(strip_twos)
+
+ movl %ebx, SAVE_EBX
+ leal 1(%eax,%eax), %ebx C d without twos
+ andl $127, %eax C d/2, 7 bits
+
+ifdef(`PIC',`
+ LEA( binvert_limb_table, %edx)
+ movzbl (%eax,%edx), %eax C inv 8 bits
+',`
+ movzbl binvert_limb_table(%eax), %eax C inv 8 bits
+')
+
+ leal (%eax,%eax), %edx C 2*inv
+ movl %ebx, PARAM_DIVISOR C d without twos
+
+ imull %eax, %eax C inv*inv
+
+ movl PARAM_SRC, %esi
+ movl PARAM_DST, %edi
+
+ imull %ebx, %eax C inv*inv*d
+
+ subl %eax, %edx C inv = 2*inv - inv*inv*d
+ leal (%edx,%edx), %eax C 2*inv
+
+ imull %edx, %edx C inv*inv
+
+ leal (%esi,%ebp,4), %esi C src end
+ leal (%edi,%ebp,4), %edi C dst end
+ negl %ebp C -size
+
+ imull %ebx, %edx C inv*inv*d
+
+ subl %edx, %eax C inv = 2*inv - inv*inv*d
+
+ ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+ pushl %eax FRAME_pushl()
+ imull PARAM_DIVISOR, %eax
+ cmpl $1, %eax
+ popl %eax FRAME_popl()')
+
+ jmp L(common)
+EPILOGUE()
-dnl AMD K7 mpn_gcd_1 -- mpn by 1 gcd.
+dnl x86 mpn_gcd_1 optimised for AMD K7.
-dnl Copyright 2000, 2001, 2002, 2009 Free Software Foundation, Inc.
-dnl
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or
-dnl modify it under the terms of the GNU Lesser General Public License as
-dnl published by the Free Software Foundation; either version 3 of the
-dnl License, or (at your option) any later version.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl Lesser General Public License for more details.
-dnl
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+dnl Contributed to the GNU project by by Kevin Ryde. Rehacked by Torbjorn
+dnl Granlund.
-include(`../config.m4')
+dnl Copyright 2000, 2001, 2002, 2005, 2009, 2011, 2012 Free Software
+dnl Foundation, Inc.
+dnl This file is part of the GNU MP Library.
-C K7: 6.75 cycles/bit (approx) 1x1 gcd
-C 11.0 cycles/limb Nx1 reduction (modexact_1_odd)
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
-dnl Reduce using x%y if x is more than DIV_THRESHOLD bits bigger than y,
-dnl where x is the larger of the two. See tune/README for more.
-dnl
-dnl divl at 40 cycles compared to the gcd at about 7 cycles/bitpair
-dnl suggests 40/7*2=11.4 but 7 seems to be about right.
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-deflit(DIV_THRESHOLD, 7)
+include(`../config.m4')
-C table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
-C
-C This is mixed in with the code, but as per the k7 optimization manual it's
-C a full cache line and suitably aligned so it won't get swapped between
-C code and data. Having it in TEXT rather than RODATA saves needing a GOT
-C entry when PIC.
-C
-C Actually, there doesn't seem to be a measurable difference between this in
-C it's own cache line or plonked in the middle of the code. Presumably
-C since TEXT is read-only there's no worries about coherency.
+C cycles/bit (approx)
+C AMD K7 5.31
+C AMD K8,K9 5.33
+C AMD K10 5.30
+C AMD bd1 ?
+C AMD bobcat 7.02
+C Intel P4-2 10.1
+C Intel P4-3/4 10.0
+C Intel P6/13 5.88
+C Intel core2 6.26
+C Intel NHM 6.83
+C Intel SBR 8.50
+C Intel atom 8.90
+C VIA nano ?
+C Numbers measured with: speed -CD -s16-32 -t16 mpn_gcd_1
+
+C TODO
+C * Tune overhead, this takes 2-3 cycles more than old code when v0 is tiny.
+C * Stream things better through registers, avoiding some copying.
+
+C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
deflit(MAXSHIFT, 6)
deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
- TEXT
- ALIGN(64)
-L(table):
+DEF_OBJECT(ctz_table,64)
.byte MAXSHIFT
forloop(i,1,MASK,
` .byte m4_count_trailing_zeros(i)
')
+END_OBJECT(ctz_table)
+C Threshold of when to call bmod when U is one limb. Should be about
+C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
+define(`DIV_THRES_LOG2', 7)
-C mp_limb_t mpn_gcd_1 (mp_srcptr src, mp_size_t size, mp_limb_t limb);
-C
-defframe(PARAM_LIMB, 12)
-defframe(PARAM_SIZE, 8)
-defframe(PARAM_SRC, 4)
+define(`up', `%edi')
+define(`n', `%esi')
+define(`v0', `%edx')
-defframe(SAVE_EBX, -4)
-defframe(SAVE_ESI, -8)
-defframe(SAVE_EDI, -12)
-defframe(SAVE_EBP, -16)
-defframe(CALL_DIVISOR,-20)
-defframe(CALL_SIZE, -24)
-defframe(CALL_SRC, -28)
-
-deflit(STACK_SPACE, 28)
+ASM_START()
TEXT
ALIGN(16)
-
PROLOGUE(mpn_gcd_1)
-deflit(`FRAME',0)
-
- ASSERT(ne, `cmpl $0, PARAM_LIMB') C y!=0
- ASSERT(ae, `cmpl $1, PARAM_SIZE') C size>=1
+ push %edi
+ push %esi
- mov PARAM_SRC, %eax
- mov PARAM_LIMB, %edx
- sub $STACK_SPACE, %esp deflit(`FRAME',STACK_SPACE)
+ mov 12(%esp), up
+ mov 16(%esp), n
+ mov 20(%esp), v0
- mov %esi, SAVE_ESI
- mov %ebx, SAVE_EBX
-
- mov (%eax), %esi C src low limb
-
-ifdef(`PIC',`
- mov %edi, SAVE_EDI
- call L(movl_eip_to_edi)
-L(here):
- add $L(table)-L(here), %edi
-')
-
- mov %esi, %ebx
- or %edx, %esi C x|y
+ mov (up), %eax C U low limb
+ or v0, %eax C x | y
mov $-1, %ecx
L(twos):
inc %ecx
- shr %esi
- jnc L(twos) C 3/4 chance of x or y odd already
-
- shr %cl, %ebx
- shr %cl, %edx
- mov %ecx, %esi C common twos
-
- mov PARAM_SIZE, %ecx
- cmp $1, %ecx
- ja L(divide)
-
-
- C eax
- C ebx x
- C ecx
- C edx y
- C esi common twos
- C edi [PIC] L(table)
- C ebp
-
- mov %edx, %eax
- cmp %ebx, %edx
-
- cmovb( %ebx, %eax) C swap to make x bigger than y
- cmovb( %edx, %ebx)
+ shr %eax
+ jnc L(twos)
+ shr %cl, v0
+ mov %ecx, %eax C common twos
-L(strip_y):
- C eax x
- C ebx y
- C ecx
- C edx
- C esi common twos
- C edi [PIC] L(table)
- C ebp
-
- ASSERT(nz,`orl %ebx,%ebx')
- shr %ebx
- jnc L(strip_y)
- rcl %ebx
-
+L(divide_strip_y):
+ shr v0
+ jnc L(divide_strip_y)
+ adc v0, v0
- C eax x
- C ebx y (odd)
- C ecx
- C edx
- C esi common twos
- C edi [PIC] L(table)
- C ebp
+ push %eax
+ push v0
- mov %eax, %ecx
- mov %ebx, %edx
- shr $DIV_THRESHOLD, %eax
+ cmp $1, n
+ jnz L(reduce_nby1)
- cmp %eax, %ebx
+C Both U and V are single limbs, reduce with bmod if u0 >> v0.
+ mov (up), %ecx
mov %ecx, %eax
- ja L(strip_x_entry) C do x%y if x much bigger than y
-
+ shr $DIV_THRES_LOG2, %ecx
+ cmp %ecx, v0
+ ja L(reduced)
+ mov v0, %esi
xor %edx, %edx
+ div %esi
+ mov %edx, %eax
+ jmp L(reduced)
- div %ebx
-
- or %edx, %edx
- mov %edx, %ecx C remainder -> x
- mov %ebx, %edx C y
-
- jz L(done_ebx)
- jmp L(strip_x)
-
-
- C Offset 0x9D here for non-PIC. About 0.4 cycles/bit is saved by
- C ensuring the end of the jnz at the end of this loop doesn't cross
- C into the next cache line at 0xC0.
- C
- C PIC on the other hand is offset 0xAC here and extends to 0xC9, so
- C it crosses but doesn't suffer any measurable slowdown.
-
-L(top):
- C eax x
- C ebx y-x
- C ecx x-y
- C edx y
- C esi twos, for use at end
- C edi [PIC] L(table)
-
- cmovc( %ebx, %ecx) C if x-y gave carry, use x and y-x
- cmovc( %eax, %edx)
-
-L(strip_x):
- mov %ecx, %eax
-L(strip_x_entry):
- and $MASK, %ecx
-
- ASSERT(nz, `orl %eax, %eax')
-
-ifdef(`PIC',`
- mov (%ecx,%edi), %cl
-',`
- mov L(table) (%ecx), %cl
+L(reduce_nby1):
+ifdef(`PIC_WITH_EBX',`
+ push %ebx
+ call L(movl_eip_to_ebx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
')
-
- shr %cl, %eax
- cmp $MAXSHIFT, %cl
-
- mov %eax, %ecx
- mov %edx, %ebx
- je L(strip_x)
-
- ASSERT(nz, `test $1, %eax') C both odd
- ASSERT(nz, `test $1, %edx')
-
- sub %eax, %ebx
- sub %edx, %ecx
- jnz L(top)
-
-
-L(done):
- mov %esi, %ecx
- mov SAVE_ESI, %esi
-ifdef(`PIC',`
- mov SAVE_EDI, %edi
+ push v0 C param 3
+ push n C param 2
+ push up C param 1
+ cmp $BMOD_1_TO_MOD_1_THRESHOLD, n
+ jl L(bmod)
+ CALL( mpn_mod_1)
+ jmp L(called)
+L(bmod):
+ CALL( mpn_modexact_1_odd)
+
+L(called):
+ add $12, %esp C deallocate params
+ifdef(`PIC_WITH_EBX',`
+ pop %ebx
')
+L(reduced):
+ pop %edx
+ LEA( ctz_table, %esi)
+ test %eax, %eax
+ mov %eax, %ecx
+ jnz L(mid)
+ jmp L(end)
+
+ ALIGN(16) C K8 BC P4 NHM SBR
+L(top): cmovc( %ecx, %eax) C if x-y < 0 0
+ cmovc( %edi, %edx) C use x,y-x 0
+L(mid): and $MASK, %ecx C 0
+ movzbl (%esi,%ecx), %ecx C 1
+ jz L(shift_alot) C 1
+ shr %cl, %eax C 3
+ mov %eax, %edi C 4
+ mov %edx, %ecx C 3
+ sub %eax, %ecx C 4
+ sub %edx, %eax C 4
+ jnz L(top) C 5
+
+L(end): pop %ecx
+ mov %edx, %eax
shl %cl, %eax
- mov SAVE_EBX, %ebx
- add $FRAME, %esp
-
+ pop %esi
+ pop %edi
ret
-
-
-C -----------------------------------------------------------------------------
-C two or more limbs
-
-dnl MODEXACT_THRESHOLD is the size at which it's better to call
-dnl mpn_modexact_1_odd than do an inline loop.
-
-deflit(MODEXACT_THRESHOLD, ifdef(`PIC',6,5))
-
-L(divide):
- C eax src
- C ebx
- C ecx size
- C edx y
- C esi common twos
- C edi [PIC] L(table)
- C ebp
-
-L(divide_strip_y):
- ASSERT(nz,`or %edx,%edx')
- shr %edx
- jnc L(divide_strip_y)
- lea 1(%edx,%edx), %ebx C y now odd
-
- mov %ebp, SAVE_EBP
- mov %eax, %ebp
- mov -4(%eax,%ecx,4), %eax C src high limb
-
- cmp $MODEXACT_THRESHOLD, %ecx
- jae L(modexact)
-
- cmp %ebx, %eax C high cmp divisor
- mov $0, %edx
-
- cmovc( %eax, %edx) C skip a div if high<divisor
- sbb $0, %ecx
-
-
-L(divide_top):
- C eax scratch (quotient)
- C ebx y
- C ecx counter (size to 1, inclusive)
- C edx carry (remainder)
- C esi common twos
- C edi [PIC] L(table)
- C ebp src
-
- mov -4(%ebp,%ecx,4), %eax
-
- div %ebx
-
- dec %ecx
- jnz L(divide_top)
-
-
- C eax
- C ebx y (odd)
- C ecx
- C edx x
- C esi common twos
- C edi [PIC] L(table)
- C ebp
-
- or %edx, %edx
- mov SAVE_EBP, %ebp
- mov %edx, %eax
-
- mov %edx, %ecx
- mov %ebx, %edx
- jnz L(strip_x_entry)
-
-
-L(done_ebx):
- mov %ebx, %eax
- jmp L(done)
-
-
-
-L(modexact):
- C eax
- C ebx y
- C ecx size
- C edx
- C esi common twos
- C edi [PIC] L(table)
- C ebp src
-
-ifdef(`PIC',`
- mov %ebp, CALL_SRC
- mov %ebx, %ebp C y
- mov %edi, %ebx C L(table)
-
- add $_GLOBAL_OFFSET_TABLE_+[.-L(table)], %ebx
- mov %ebp, CALL_DIVISOR
- mov %ecx, CALL_SIZE
-
- call GSYM_PREFIX`'mpn_modexact_1_odd@PLT
-',`
-dnl non-PIC
- mov %ebx, CALL_DIVISOR
- mov %ebp, CALL_SRC
- mov %ecx, CALL_SIZE
-
- call GSYM_PREFIX`'mpn_modexact_1_odd
-')
-
- C eax x
- C ebx [non-PIC] y
- C ecx
- C edx
- C esi common twos
- C edi [PIC] L(table)
- C ebp [PIC] y
-
- or %eax, %eax
- mov ifdef(`PIC',`%ebp',`%ebx'), %edx
- mov SAVE_EBP, %ebp
-
+L(shift_alot):
+ shr $MAXSHIFT, %eax
mov %eax, %ecx
- jnz L(strip_x_entry)
+ jmp L(mid)
- mov %edx, %eax
- jmp L(done)
-
-
-ifdef(`PIC', `
-L(movl_eip_to_edi):
- mov (%esp), %edi
- ret_internal
+ifdef(`PIC_WITH_EBX',`
+L(movl_eip_to_ebx):
+ mov (%esp), %ebx
+ ret
')
-
EPILOGUE()
#define MOD_1_NORM_THRESHOLD 0 /* always */
-#define MOD_1_UNNORM_THRESHOLD 4
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 14
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 6
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 20
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 26
+#define MOD_1_UNNORM_THRESHOLD 3
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 7
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 24
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10
#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 28
+#define BMOD_1_TO_MOD_1_THRESHOLD 24
#define MUL_TOOM22_THRESHOLD 28
#define MUL_TOOM33_THRESHOLD 85
-#define MUL_TOOM44_THRESHOLD 148
-#define MUL_TOOM6H_THRESHOLD 204
+#define MUL_TOOM44_THRESHOLD 142
+#define MUL_TOOM6H_THRESHOLD 258
#define MUL_TOOM8H_THRESHOLD 309
#define MUL_TOOM32_TO_TOOM43_THRESHOLD 85
#define MUL_TOOM32_TO_TOOM53_THRESHOLD 99
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 93
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 101
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 97
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 102
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 144
#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
#define SQR_TOOM2_THRESHOLD 50
-#define SQR_TOOM3_THRESHOLD 87
-#define SQR_TOOM4_THRESHOLD 208
-#define SQR_TOOM6_THRESHOLD 306
+#define SQR_TOOM3_THRESHOLD 83
+#define SQR_TOOM4_THRESHOLD 216
+#define SQR_TOOM6_THRESHOLD 318
#define SQR_TOOM8_THRESHOLD 430
-#define MULMOD_BNM1_THRESHOLD 18
+#define MULMID_TOOM42_THRESHOLD 56
+
+#define MULMOD_BNM1_THRESHOLD 17
#define SQRMOD_BNM1_THRESHOLD 19
#define MUL_FFT_MODF_THRESHOLD 888 /* k = 6 */
{ 1151,11}, { 2303,12}, { 1215,11}, { 2431,13}, \
{ 8192,14}, { 16384,15}, { 32768,16} }
#define MUL_FFT_TABLE3_SIZE 167
-#define MUL_FFT_THRESHOLD 7808
+#define MUL_FFT_THRESHOLD 7552
-#define SQR_FFT_MODF_THRESHOLD 786 /* k = 6 */
+#define SQR_FFT_MODF_THRESHOLD 666 /* k = 6 */
#define SQR_FFT_TABLE3 \
{ { 786, 6}, { 25, 7}, { 13, 6}, { 27, 7}, \
{ 15, 6}, { 31, 7}, { 17, 6}, { 35, 7}, \
{ 1215,11}, { 2431,13}, { 8192,14}, { 16384,15}, \
{ 32768,16} }
#define SQR_FFT_TABLE3_SIZE 177
-#define SQR_FFT_THRESHOLD 7552
+#define SQR_FFT_THRESHOLD 7040
-#define MULLO_BASECASE_THRESHOLD 10
-#define MULLO_DC_THRESHOLD 50
+#define MULLO_BASECASE_THRESHOLD 11
+#define MULLO_DC_THRESHOLD 35
#define MULLO_MUL_N_THRESHOLD 13463
-#define DC_DIV_QR_THRESHOLD 60
-#define DC_DIVAPPR_Q_THRESHOLD 333
-#define DC_BDIV_QR_THRESHOLD 82
-#define DC_BDIV_Q_THRESHOLD 268
+#define DC_DIV_QR_THRESHOLD 41
+#define DC_DIVAPPR_Q_THRESHOLD 214
+#define DC_BDIV_QR_THRESHOLD 41
+#define DC_BDIV_Q_THRESHOLD 148
-#define INV_MULMOD_BNM1_THRESHOLD 62
-#define INV_NEWTON_THRESHOLD 284
-#define INV_APPR_THRESHOLD 290
+#define INV_MULMOD_BNM1_THRESHOLD 77
+#define INV_NEWTON_THRESHOLD 204
+#define INV_APPR_THRESHOLD 204
-#define BINV_NEWTON_THRESHOLD 264
-#define REDC_1_TO_REDC_N_THRESHOLD 86
+#define BINV_NEWTON_THRESHOLD 230
+#define REDC_1_TO_REDC_N_THRESHOLD 59
-#define MU_DIV_QR_THRESHOLD 1858
-#define MU_DIVAPPR_Q_THRESHOLD 1718
-#define MUPI_DIV_QR_THRESHOLD 114
-#define MU_BDIV_QR_THRESHOLD 1387
+#define MU_DIV_QR_THRESHOLD 1752
+#define MU_DIVAPPR_Q_THRESHOLD 1528
+#define MUPI_DIV_QR_THRESHOLD 82
+#define MU_BDIV_QR_THRESHOLD 1360
#define MU_BDIV_Q_THRESHOLD 1470
-#define MATRIX22_STRASSEN_THRESHOLD 15
-#define HGCD_THRESHOLD 154
-#define GCD_DC_THRESHOLD 599
-#define GCDEXT_DC_THRESHOLD 443
-#define JACOBI_BASE_METHOD 1
+#define POWM_SEC_TABLE 2,17,176,905,2246
+
+#define MATRIX22_STRASSEN_THRESHOLD 16
+#define HGCD_THRESHOLD 125
+#define HGCD_APPR_THRESHOLD 143
+#define HGCD_REDUCE_THRESHOLD 4633
+#define GCD_DC_THRESHOLD 460
+#define GCDEXT_DC_THRESHOLD 330
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 15
+#define GET_STR_PRECOMPUTE_THRESHOLD 35
+#define SET_STR_DC_THRESHOLD 272
+#define SET_STR_PRECOMPUTE_THRESHOLD 1183
-#define GET_STR_DC_THRESHOLD 17
-#define GET_STR_PRECOMPUTE_THRESHOLD 34
-#define SET_STR_DC_THRESHOLD 542
-#define SET_STR_PRECOMPUTE_THRESHOLD 1615
+#define FAC_DSC_THRESHOLD 336
+#define FAC_ODD_THRESHOLD 29
--- /dev/null
+dnl x86 mpn_invert_limb
+
+dnl Contributed to the GNU project by Niels Möller
+
+dnl Copyright 2009, 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles (approx) div
+C P5 ?
+C P6 model 0-8,10-12 ?
+C P6 model 9 (Banias) ?
+C P6 model 13 (Dothan) ?
+C P4 model 0 (Willamette) ?
+C P4 model 1 (?) ?
+C P4 model 2 (Northwood) ?
+C P4 model 3 (Prescott) ?
+C P4 model 4 (Nocona) ?
+C AMD K6 ?
+C AMD K7 41 53
+C AMD K8 ?
+
+C TODO
+C * These c/l numbers are for a non-PIC build. Consider falling back to using
+C the 'div' instruction for PIC builds.
+C * Perhaps use this file--or at least the algorithm--for more machines than k7.
+
+C Register usage:
+C Input D in %edi
+C Current approximation is in %eax and/or %ecx
+C %ebx and %edx are temporaries
+C %esi and %ebp are unused
+
+defframe(PARAM_DIVISOR,4)
+
+ASM_START()
+
+C Make approx_tab global to work around Apple relocation bug.
+ifdef(`DARWIN',`
+ deflit(`approx_tab', MPN(invert_limb_tab))
+ GLOBL approx_tab')
+
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_invert_limb)
+deflit(`FRAME', 0)
+ mov PARAM_DIVISOR, %eax
+ C Avoid push/pop on k7.
+ sub $8, %esp FRAME_subl_esp(8)
+ mov %ebx, (%esp)
+ mov %edi, 4(%esp)
+
+ mov %eax, %edi
+ shr $22, %eax
+ifdef(`PIC',`
+ LEA( approx_tab, %ebx)
+ movzwl -1024(%ebx, %eax, 2), %eax
+',`
+ movzwl -1024+approx_tab(%eax, %eax), %eax C %eax = v0
+')
+
+ C v1 = (v0 << 4) - ((v0*v0*d_21) >> 32) - 1
+ mov %eax, %ecx
+ imul %eax, %eax
+ mov %edi, %ebx
+ shr $11, %ebx
+ inc %ebx
+ mul %ebx
+ mov %edi, %ebx C Prepare
+ shr %ebx
+ sbb %eax, %eax
+ sub %eax, %ebx C %ebx = d_31, %eax = mask
+ shl $4, %ecx
+ dec %ecx
+ sub %edx, %ecx C %ecx = v1
+
+ C v_2 = (v1 << 15) + ((v1 *(2^48 - v1 * d31 + (v1 >> 1) & mask)) >> 33)
+ imul %ecx, %ebx
+ and %ecx, %eax
+ shr %eax
+ sub %ebx, %eax
+ mul %ecx
+ mov %edi, %eax C Prepare for next mul
+ shl $15, %ecx
+ shr %edx
+ add %edx, %ecx C %ecx = v2
+
+ mul %ecx
+ add %edi, %eax
+ mov %ecx, %eax
+ adc %edi, %edx
+ sub %edx, %eax C %eax = v3
+
+ mov (%esp), %ebx
+ mov 4(%esp), %edi
+ add $8, %esp
+
+ ret
+
+EPILOGUE()
+
+DEF_OBJECT(approx_tab,2)
+ .value 0x7fe1,0x7fa1,0x7f61,0x7f22,0x7ee3,0x7ea4,0x7e65,0x7e27
+ .value 0x7de9,0x7dab,0x7d6d,0x7d30,0x7cf3,0x7cb6,0x7c79,0x7c3d
+ .value 0x7c00,0x7bc4,0x7b89,0x7b4d,0x7b12,0x7ad7,0x7a9c,0x7a61
+ .value 0x7a27,0x79ec,0x79b2,0x7979,0x793f,0x7906,0x78cc,0x7894
+ .value 0x785b,0x7822,0x77ea,0x77b2,0x777a,0x7742,0x770b,0x76d3
+ .value 0x769c,0x7665,0x762f,0x75f8,0x75c2,0x758c,0x7556,0x7520
+ .value 0x74ea,0x74b5,0x7480,0x744b,0x7416,0x73e2,0x73ad,0x7379
+ .value 0x7345,0x7311,0x72dd,0x72aa,0x7277,0x7243,0x7210,0x71de
+ .value 0x71ab,0x7179,0x7146,0x7114,0x70e2,0x70b1,0x707f,0x704e
+ .value 0x701c,0x6feb,0x6fba,0x6f8a,0x6f59,0x6f29,0x6ef9,0x6ec8
+ .value 0x6e99,0x6e69,0x6e39,0x6e0a,0x6ddb,0x6dab,0x6d7d,0x6d4e
+ .value 0x6d1f,0x6cf1,0x6cc2,0x6c94,0x6c66,0x6c38,0x6c0a,0x6bdd
+ .value 0x6bb0,0x6b82,0x6b55,0x6b28,0x6afb,0x6acf,0x6aa2,0x6a76
+ .value 0x6a49,0x6a1d,0x69f1,0x69c6,0x699a,0x696e,0x6943,0x6918
+ .value 0x68ed,0x68c2,0x6897,0x686c,0x6842,0x6817,0x67ed,0x67c3
+ .value 0x6799,0x676f,0x6745,0x671b,0x66f2,0x66c8,0x669f,0x6676
+ .value 0x664d,0x6624,0x65fc,0x65d3,0x65aa,0x6582,0x655a,0x6532
+ .value 0x650a,0x64e2,0x64ba,0x6493,0x646b,0x6444,0x641c,0x63f5
+ .value 0x63ce,0x63a7,0x6381,0x635a,0x6333,0x630d,0x62e7,0x62c1
+ .value 0x629a,0x6275,0x624f,0x6229,0x6203,0x61de,0x61b8,0x6193
+ .value 0x616e,0x6149,0x6124,0x60ff,0x60da,0x60b6,0x6091,0x606d
+ .value 0x6049,0x6024,0x6000,0x5fdc,0x5fb8,0x5f95,0x5f71,0x5f4d
+ .value 0x5f2a,0x5f07,0x5ee3,0x5ec0,0x5e9d,0x5e7a,0x5e57,0x5e35
+ .value 0x5e12,0x5def,0x5dcd,0x5dab,0x5d88,0x5d66,0x5d44,0x5d22
+ .value 0x5d00,0x5cde,0x5cbd,0x5c9b,0x5c7a,0x5c58,0x5c37,0x5c16
+ .value 0x5bf5,0x5bd4,0x5bb3,0x5b92,0x5b71,0x5b51,0x5b30,0x5b10
+ .value 0x5aef,0x5acf,0x5aaf,0x5a8f,0x5a6f,0x5a4f,0x5a2f,0x5a0f
+ .value 0x59ef,0x59d0,0x59b0,0x5991,0x5972,0x5952,0x5933,0x5914
+ .value 0x58f5,0x58d6,0x58b7,0x5899,0x587a,0x585b,0x583d,0x581f
+ .value 0x5800,0x57e2,0x57c4,0x57a6,0x5788,0x576a,0x574c,0x572e
+ .value 0x5711,0x56f3,0x56d5,0x56b8,0x569b,0x567d,0x5660,0x5643
+ .value 0x5626,0x5609,0x55ec,0x55cf,0x55b2,0x5596,0x5579,0x555d
+ .value 0x5540,0x5524,0x5507,0x54eb,0x54cf,0x54b3,0x5497,0x547b
+ .value 0x545f,0x5443,0x5428,0x540c,0x53f0,0x53d5,0x53b9,0x539e
+ .value 0x5383,0x5368,0x534c,0x5331,0x5316,0x52fb,0x52e0,0x52c6
+ .value 0x52ab,0x5290,0x5276,0x525b,0x5240,0x5226,0x520c,0x51f1
+ .value 0x51d7,0x51bd,0x51a3,0x5189,0x516f,0x5155,0x513b,0x5121
+ .value 0x5108,0x50ee,0x50d5,0x50bb,0x50a2,0x5088,0x506f,0x5056
+ .value 0x503c,0x5023,0x500a,0x4ff1,0x4fd8,0x4fbf,0x4fa6,0x4f8e
+ .value 0x4f75,0x4f5c,0x4f44,0x4f2b,0x4f13,0x4efa,0x4ee2,0x4eca
+ .value 0x4eb1,0x4e99,0x4e81,0x4e69,0x4e51,0x4e39,0x4e21,0x4e09
+ .value 0x4df1,0x4dda,0x4dc2,0x4daa,0x4d93,0x4d7b,0x4d64,0x4d4d
+ .value 0x4d35,0x4d1e,0x4d07,0x4cf0,0x4cd8,0x4cc1,0x4caa,0x4c93
+ .value 0x4c7d,0x4c66,0x4c4f,0x4c38,0x4c21,0x4c0b,0x4bf4,0x4bde
+ .value 0x4bc7,0x4bb1,0x4b9a,0x4b84,0x4b6e,0x4b58,0x4b41,0x4b2b
+ .value 0x4b15,0x4aff,0x4ae9,0x4ad3,0x4abd,0x4aa8,0x4a92,0x4a7c
+ .value 0x4a66,0x4a51,0x4a3b,0x4a26,0x4a10,0x49fb,0x49e5,0x49d0
+ .value 0x49bb,0x49a6,0x4990,0x497b,0x4966,0x4951,0x493c,0x4927
+ .value 0x4912,0x48fe,0x48e9,0x48d4,0x48bf,0x48ab,0x4896,0x4881
+ .value 0x486d,0x4858,0x4844,0x482f,0x481b,0x4807,0x47f3,0x47de
+ .value 0x47ca,0x47b6,0x47a2,0x478e,0x477a,0x4766,0x4752,0x473e
+ .value 0x472a,0x4717,0x4703,0x46ef,0x46db,0x46c8,0x46b4,0x46a1
+ .value 0x468d,0x467a,0x4666,0x4653,0x4640,0x462c,0x4619,0x4606
+ .value 0x45f3,0x45e0,0x45cd,0x45ba,0x45a7,0x4594,0x4581,0x456e
+ .value 0x455b,0x4548,0x4536,0x4523,0x4510,0x44fe,0x44eb,0x44d8
+ .value 0x44c6,0x44b3,0x44a1,0x448f,0x447c,0x446a,0x4458,0x4445
+ .value 0x4433,0x4421,0x440f,0x43fd,0x43eb,0x43d9,0x43c7,0x43b5
+ .value 0x43a3,0x4391,0x437f,0x436d,0x435c,0x434a,0x4338,0x4327
+ .value 0x4315,0x4303,0x42f2,0x42e0,0x42cf,0x42bd,0x42ac,0x429b
+ .value 0x4289,0x4278,0x4267,0x4256,0x4244,0x4233,0x4222,0x4211
+ .value 0x4200,0x41ef,0x41de,0x41cd,0x41bc,0x41ab,0x419a,0x418a
+ .value 0x4179,0x4168,0x4157,0x4147,0x4136,0x4125,0x4115,0x4104
+ .value 0x40f4,0x40e3,0x40d3,0x40c2,0x40b2,0x40a2,0x4091,0x4081
+ .value 0x4071,0x4061,0x4050,0x4040,0x4030,0x4020,0x4010,0x4000
+END_OBJECT(approx_tab)
C rnd() means rounding down to a multiple of d.
C
C m*n2 + b*n2 <= m*(d-1) + b*(d-1)
-C = m*d + b*d - m - b
-C = floor((b(b-d)-1)/d)*d + b*d - m - b
-C = rnd(b(b-d)-1) + b*d - m - b
-C = rnd(b(b-d)-1 + b*d) - m - b
-C = rnd(b*b-1) - m - b
-C <= (b-2)*b
+C = m*d + b*d - m - b
+C = floor((b(b-d)-1)/d)*d + b*d - m - b
+C = rnd(b(b-d)-1) + b*d - m - b
+C = rnd(b(b-d)-1 + b*d) - m - b
+C = rnd(b*b-1) - m - b
+C <= (b-2)*b
C
C Unchanged from the general case is that the final quotient limb q can be
C either q1 or q1+1, and the q1+1 case occurs often. This can be seen from
C popcount hamdist
C P3 generic 6.5 7
-C P3 model 9 (Banias) ? ?
+C P3 model 9 (Banias) 5.7 6.1
C P3 model 13 (Dothan) 5.75 6
C K7 5 6
--- /dev/null
+dnl x86-32 mpn_mod_1_1p, requiring cmov.
+
+dnl Contributed to the GNU project by Niels Möller and Torbjorn Granlund.
+dnl
+dnl Copyright 2010, 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C P5 ?
+C P6 model 0-8,10-12 ?
+C P6 model 9 (Banias) ?
+C P6 model 13 (Dothan) ?
+C P4 model 0 (Willamette) ?
+C P4 model 1 (?) ?
+C P4 model 2 (Northwood) ?
+C P4 model 3 (Prescott) ?
+C P4 model 4 (Nocona) ?
+C AMD K6 ?
+C AMD K7 7
+C AMD K8 ?
+
+define(`B2mb', `%ebx')
+define(`r0', `%esi')
+define(`r2', `%ebp')
+define(`t0', `%edi')
+define(`ap', `%ecx') C Also shift count
+
+C Stack frame
+C pre 36(%esp)
+C b 32(%esp)
+C n 28(%esp)
+C ap 24(%esp)
+C return 20(%esp)
+C %ebp 16(%esp)
+C %edi 12(%esp)
+C %esi 8(%esp)
+C %ebx 4(%esp)
+C B2mod (%esp)
+
+define(`B2modb', `(%esp)')
+define(`n', `28(%esp)')
+define(`b', `32(%esp)')
+define(`pre', `36(%esp)')
+
+C mp_limb_t
+C mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t pre[4])
+C
+C The pre array contains bi, cnt, B1modb, B2modb
+C Note: This implementation needs B1modb only when cnt > 0
+
+ASM_START()
+ TEXT
+ ALIGN(8)
+PROLOGUE(mpn_mod_1_1p)
+ push %ebp
+ push %edi
+ push %esi
+ push %ebx
+ mov 32(%esp), %ebp C pre[]
+
+ mov 12(%ebp), %eax C B2modb
+ push %eax C Put it on stack
+
+ mov n, %edx
+ mov 24(%esp), ap
+
+ lea (ap, %edx, 4), ap
+ mov -4(ap), %eax
+ cmp $3, %edx
+ jnc L(first)
+ mov -8(ap), r0
+ jmp L(reduce_two)
+
+L(first):
+ C First iteration, no r2
+ mull B2modb
+ mov -12(ap), r0
+ add %eax, r0
+ mov -8(ap), %eax
+ adc %edx, %eax
+ sbb r2, r2
+ sub $3, n
+ lea -16(ap), ap
+ jz L(reduce_three)
+
+ mov B2modb, B2mb
+ sub b, B2mb
+ lea (B2mb, r0), t0
+ jmp L(mid)
+
+ ALIGN(16)
+L(top): C Loopmixed to 7 c/l on k7
+ add %eax, r0
+ lea (B2mb, r0), t0
+ mov r2, %eax
+ adc %edx, %eax
+ sbb r2, r2
+L(mid): mull B2modb
+ and B2modb, r2
+ add r0, r2
+ decl n
+ mov (ap), r0
+ cmovc( t0, r2)
+ lea -4(ap), ap
+ jnz L(top)
+
+ add %eax, r0
+ mov r2, %eax
+ adc %edx, %eax
+ sbb r2, r2
+
+L(reduce_three):
+ C Eliminate r2
+ and b, r2
+ sub r2, %eax
+
+L(reduce_two):
+ mov pre, %ebp
+ movb 4(%ebp), %cl
+ test %cl, %cl
+ jz L(normalized)
+
+ C Unnormalized, use B1modb to reduce to size < B b
+ mull 8(%ebp)
+ xor t0, t0
+ add %eax, r0
+ adc %edx, t0
+ mov t0, %eax
+
+ C Left-shift to normalize
+ shld %cl, r0, %eax C Always use shld?
+
+ shl %cl, r0
+ jmp L(udiv)
+
+L(normalized):
+ mov %eax, t0
+ sub b, t0
+ cmovnc( t0, %eax)
+
+L(udiv):
+ lea 1(%eax), t0
+ mull (%ebp)
+ mov b, %ebx C Needed in register for lea
+ add r0, %eax
+ adc t0, %edx
+ imul %ebx, %edx
+ sub %edx, r0
+ cmp r0, %eax
+ lea (%ebx, r0), %eax
+ cmovnc( r0, %eax)
+ cmp %ebx, %eax
+ jnc L(fix)
+L(ok): shr %cl, %eax
+
+ add $4, %esp
+ pop %ebx
+ pop %esi
+ pop %edi
+ pop %ebp
+
+ ret
+L(fix): sub %ebx, %eax
+ jmp L(ok)
+EPILOGUE()
+
+PROLOGUE(mpn_mod_1_1p_cps)
+ push %ebp
+ mov 12(%esp), %ebp
+ push %esi
+ bsr %ebp, %ecx
+ push %ebx
+ xor $31, %ecx
+ mov 16(%esp), %esi
+ sal %cl, %ebp
+ mov %ebp, %edx
+ not %edx
+ mov $-1, %eax
+ div %ebp C On K7, invert_limb would be a few cycles faster.
+ mov %eax, (%esi) C store bi
+ mov %ecx, 4(%esi) C store cnt
+ neg %ebp
+ mov $1, %edx
+ shld %cl, %eax, %edx
+ imul %ebp, %edx
+ shr %cl, %edx
+ imul %ebp, %eax
+ mov %edx, 8(%esi) C store B1modb
+ mov %eax, 12(%esi) C store B2modb
+ pop %ebx
+ pop %esi
+ pop %ebp
+ ret
+EPILOGUE()
dnl x86-32 mpn_mod_1s_4p, requiring cmov.
dnl Contributed to the GNU project by Torbjorn Granlund.
-
-dnl Copyright 2009 Free Software Foundation, Inc.
-
+dnl
+dnl Copyright 2009, 2010 Free Software Foundation, Inc.
+dnl
dnl This file is part of the GNU MP Library.
-
+dnl
dnl The GNU MP Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 3 of the License, or (at
dnl your option) any later version.
-
+dnl
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
-
+dnl
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
-C cycles/limb
-C P5:
-C P6 model 0-8,10-12)
-C P6 model 9 (Banias)
-C P6 model 13 (Dothan) 6.0
-C P4 model 0 (Willamette)
-C P4 model 1 (?)
+C cycles/limb
+C P5 ?
+C P6 model 0-8,10-12 ?
+C P6 model 9 (Banias) ?
+C P6 model 13 (Dothan) 6
+C P4 model 0 (Willamette) ?
+C P4 model 1 (?) ?
C P4 model 2 (Northwood) 15.5
-C P4 model 3 (Prescott)
-C P4 model 4 (Nocona)
-C K6:
-C K7: 4.75
-C K8:
-
-
-C Ths inner loop was manually written, it ought to be loopmixed.
-C Presumably, we could get to 4 c/l for K7.
-
-C The cps function was compiler generated. It can clearly be optimized.
-
+C P4 model 3 (Prescott) ?
+C P4 model 4 (Nocona) ?
+C AMD K6 ?
+C AMD K7 4.75
+C AMD K8 ?
ASM_START()
TEXT
-
ALIGN(16)
PROLOGUE(mpn_mod_1s_4p)
push %ebp
push %esi
push %ebx
sub $28, %esp
- mov 60(%esp), %edi C cps
+ mov 60(%esp), %edi C cps[]
mov 8(%edi), %eax
mov 12(%edi), %edx
mov 16(%edi), %ecx
lea -4(%esi), %esi
jmp L(m1)
-L(b2): mov 8(%esi), %eax
- mull 4(%esp)
+L(b2): mov 8(%esi), %edi
mov 4(%esi), %ebp
lea -8(%esi), %esi
- jmp L(m0)
+ jmp L(m1)
ALIGN(16)
L(top): mov (%esi), %eax
mov %ebx, %ecx
mov %eax, %ebx
mov %ebp, %eax
+ mov 56(%esp), %ebp
sal %cl, %eax
add %eax, %ebx
adc %esi, %edx
- imul 56(%esp), %edx
- mov 56(%esp), %esi
+ imul %ebp, %edx
sub %edx, %eax
- lea (%eax,%esi), %edx
+ lea (%eax,%ebp), %edx
cmp %eax, %ebx
- cmovb( %edx, %eax)
+ cmovc( %edx, %eax)
mov %eax, %edx
- sub %esi, %eax
- cmovb( %edx, %eax)
+ sub %ebp, %eax
+ cmovc( %edx, %eax)
add $28, %esp
pop %ebx
pop %esi
ALIGN(16)
PROLOGUE(mpn_mod_1s_4p_cps)
- sub $56, %esp
- mov %esi, 44(%esp)
- mov 64(%esp), %esi
- mov %edi, 48(%esp)
- mov %ebx, 40(%esp)
- mov $-1, %ebx
- mov %ebp, 52(%esp)
- bsr %esi, %eax
- xor $31, %eax
- mov %eax, %ecx
- mov %eax, 24(%esp)
- mov %ebx, %eax
- sal %cl, %esi
- mov %esi, %ecx
- mov %esi, %edi
- mov %esi, %ebp
- neg %ecx
- not %edi
- mov %ecx, 20(%esp)
- mov $32, %ecx
- sub 24(%esp), %ecx
- mov %edi, %edx
- mov %edi, 16(%esp)
- mov 20(%esp), %edi
- div %esi
- mov %eax, %ebx
- shr %cl, %eax
- movzbl 24(%esp), %ecx
- mov %eax, 12(%esp)
- mov $1, %eax
- sal %cl, %eax
- or %eax, 12(%esp)
- imul 12(%esp), %edi
+C CAUTION: This is the same code as in pentium4/sse2/mod_1_4.asm
+ push %ebp
+ push %edi
+ push %esi
+ push %ebx
+ mov 20(%esp), %ebp C FIXME: avoid bp for 0-idx
+ mov 24(%esp), %ebx
+ bsr %ebx, %ecx
+ xor $31, %ecx
+ sal %cl, %ebx C b << cnt
+ mov %ebx, %edx
+ not %edx
+ mov $-1, %eax
+ div %ebx
+ xor %edi, %edi
+ sub %ebx, %edi
+ mov $1, %esi
+ mov %eax, (%ebp) C store bi
+ mov %ecx, 4(%ebp) C store cnt
+ shld %cl, %eax, %esi
+ imul %edi, %esi
+ mov %eax, %edi
+ mul %esi
+
+ add %esi, %edx
+ shr %cl, %esi
+ mov %esi, 8(%ebp) C store B1modb
+
+ not %edx
+ imul %ebx, %edx
+ lea (%edx,%ebx), %esi
+ cmp %edx, %eax
+ cmovnc( %edx, %esi)
mov %edi, %eax
- mov %edi, 20(%esp)
- mul %ebx
- mov %eax, %ecx
- lea 1(%edx,%edi), %eax
- neg %eax
- imul %eax, %ebp
- lea (%ebp,%esi), %eax
- cmp %ebp, %ecx
- cmovb( %eax, %ebp)
- mov %ebp, %eax
- mul %ebx
- lea 1(%ebp,%edx), %edi
- mov %eax, %ecx
- neg %edi
- mov %edi, 8(%esp)
- imul %esi, %edi
+ mul %esi
+
+ add %esi, %edx
+ shr %cl, %esi
+ mov %esi, 12(%ebp) C store B2modb
+
+ not %edx
+ imul %ebx, %edx
+ lea (%edx,%ebx), %esi
+ cmp %edx, %eax
+ cmovnc( %edx, %esi)
mov %edi, %eax
- add %esi, %eax
- cmp %edi, %ecx
- cmovae( %edi, %eax)
- mov %eax, 32(%esp)
- mov 32(%esp), %edi
- mul %ebx
- mov %eax, 36(%esp)
- lea 1(%edi,%edx), %eax
- negl %eax
- imul %esi, %eax
- mov %eax, %ecx
- add %esi, %ecx
- cmp %eax, 36(%esp)
- cmovae( %eax, %ecx)
- mov %ecx, (%esp)
- mov %ecx, %eax
- mul %ebx
- mov %eax, %edi
- mov (%esp), %eax
- lea 1(%eax,%edx), %ecx
- mov 60(%esp), %edx
- neg %ecx
- imul %esi, %ecx
- mov %ebx, (%edx)
- add %ecx, %esi
- cmp %ecx, %edi
- cmovae( %ecx, %esi)
- mov 24(%esp), %ecx
- shrl %cl, 20(%esp)
- mov 20(%esp), %edi
- mov %esi, 4(%esp)
- mov %ecx, 4(%edx)
- movzbl 24(%esp), %ecx
- mov %edi, 8(%edx)
- shr %cl, %ebp
- shr %cl, %eax
- mov %ebp, 12(%edx)
- shrl %cl, 32(%esp)
- mov 32(%esp), %edi
- shrl %cl, 4(%esp)
- mov %eax, 20(%edx)
- mov %edi, 16(%edx)
- mov 4(%esp), %edi
- mov %edi, 24(%edx)
- mov 40(%esp), %ebx
- mov 44(%esp), %esi
- mov 48(%esp), %edi
- mov 52(%esp), %ebp
- add $56, %esp
+ mul %esi
+
+ add %esi, %edx
+ shr %cl, %esi
+ mov %esi, 16(%ebp) C store B3modb
+
+ not %edx
+ imul %ebx, %edx
+ lea (%edx,%ebx), %esi
+ cmp %edx, %eax
+ cmovnc( %edx, %esi)
+ mov %edi, %eax
+ mul %esi
+
+ add %esi, %edx
+ shr %cl, %esi
+ mov %esi, 20(%ebp) C store B4modb
+
+ not %edx
+ imul %ebx, %edx
+ add %edx, %ebx
+ cmp %edx, %eax
+ cmovnc( %edx, %ebx)
+
+ shr %cl, %ebx
+ mov %ebx, 24(%ebp) C store B5modb
+
+ pop %ebx
+ pop %esi
+ pop %edi
+ pop %ebp
ret
EPILOGUE()
include(`../config.m4')
-C cycles/limb
-C P5:
+C cycles/limb
+C P5
C P6 model 0-8,10-12)
C P6 model 9 (Banias)
C P6 model 13 (Dothan)
C P4 model 2 (Northwood)
C P4 model 3 (Prescott)
C P4 model 4 (Nocona)
-C K6:
-C K7: 3.25
-C K8:
+C AMD K6
+C AMD K7 3.25
+C AMD K8
C TODO
C * Improve feed-in and wind-down code. We beat the old code for all n != 1,
--- /dev/null
+dnl AMD K7 mpn_sublsh1_n_ip1 -- rp[] = rp[] - (up[] << 1)
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C This is an attempt at a sublsh1_n for x86-32, not relying on sse2 insns. The
+C innerloop is 2*3-way unrolled, which is best we can do with the available
+C registers. It seems tricky to use the same structure for rsblsh1_n, since we
+C cannot feed carry between operations there.
+
+C cycles/limb
+C P5
+C P6 model 0-8,10-12
+C P6 model 9 (Banias)
+C P6 model 13 (Dothan)
+C P4 model 0 (Willamette)
+C P4 model 1 (?)
+C P4 model 2 (Northwood)
+C P4 model 3 (Prescott)
+C P4 model 4 (Nocona)
+C Intel Atom 6.75
+C AMD K6
+C AMD K7
+C AMD K8
+
+C This is a basic sublsh1_n for k7, atom, and perhaps some other x86-32
+C processors. It uses 2*4-way unrolling, for good reasons.
+C
+C Breaking carry recurrency might be a good idea. We would then need separate
+C registers for the shift carry and add/subtract carry, which in turn would
+C force is to 2*2-way unrolling.
+
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+dnl re-use parameter space
+define(VAR_COUNT,`PARAM_SIZE')
+define(SAVE_EBX,`PARAM_SRC')
+define(SAVE_EBP,`PARAM_DST')
+
+ASM_START()
+ TEXT
+ ALIGN(8)
+PROLOGUE(mpn_sublsh1_n_ip1)
+deflit(`FRAME',0)
+
+define(`rp', `%edi')
+define(`up', `%esi')
+
+ mov PARAM_SIZE, %eax C size
+ push up FRAME_pushl()
+ push rp FRAME_pushl()
+ xor %edx, %edx
+ mov PARAM_SRC, up
+ mov PARAM_DST, rp
+ mov %ebx, SAVE_EBX
+ mov %eax, %ebx
+ shr $3, %eax
+
+ not %eax C count = -(size\8)-i
+ and $7, %ebx C size % 8
+ jz L(exact)
+
+L(oop):
+ifdef(`CPU_P6',`
+ shr %edx ') C restore 2nd saved carry bit
+ mov (up), %ecx
+ adc %ecx, %ecx
+ rcr %edx C restore 1st saved carry bit
+ lea 4(up), up
+ sbb %ecx, (rp)
+ lea 4(rp), rp
+ adc %edx, %edx C save a carry bit in edx
+ifdef(`CPU_P6',`
+ adc %edx, %edx ') C save another carry bit in edx
+ dec %ebx
+ jnz L(oop)
+L(exact):
+ inc %eax
+ jz L(end)
+ mov %eax, VAR_COUNT
+ mov %ebp, SAVE_EBP
+
+ ALIGN(16)
+L(top):
+ifdef(`CPU_P6',`
+ shr %edx ') C restore 2nd saved carry bit
+ mov (up), %eax
+ adc %eax, %eax
+ mov 4(up), %ebx
+ adc %ebx, %ebx
+ mov 8(up), %ecx
+ adc %ecx, %ecx
+ mov 12(up), %ebp
+ adc %ebp, %ebp
+
+ rcr %edx C restore 1st saved carry bit
+
+ sbb %eax, (rp)
+ sbb %ebx, 4(rp)
+ sbb %ecx, 8(rp)
+ sbb %ebp, 12(rp)
+
+ mov 16(up), %eax
+ adc %eax, %eax
+ mov 20(up), %ebx
+ adc %ebx, %ebx
+ mov 24(up), %ecx
+ adc %ecx, %ecx
+ mov 28(up), %ebp
+ adc %ebp, %ebp
+
+ lea 32(up), up
+ adc %edx, %edx C save a carry bit in edx
+
+ sbb %eax, 16(rp)
+ sbb %ebx, 20(rp)
+ sbb %ecx, 24(rp)
+ sbb %ebp, 28(rp)
+
+ifdef(`CPU_P6',`
+ adc %edx, %edx ') C save another carry bit in edx
+ incl VAR_COUNT
+ lea 32(rp), rp
+ jne L(top)
+
+ mov SAVE_EBP, %ebp
+L(end):
+ mov SAVE_EBX, %ebx
+
+ifdef(`CPU_P6',`
+ xor %eax, %eax
+ shr $1, %edx
+ adc %edx, %eax
+',`
+ adc $0, %edx
+ mov %edx, %eax
+')
+ pop rp FRAME_popl()
+ pop up FRAME_popl()
+ ret
+EPILOGUE()
+ASM_END()
--- /dev/null
+/* x86/k8 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* Generated by tuneup.c, 2011-11-25, gcc 4.2 */
+
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 3
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 10
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 4
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 12
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 12
+#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD 40
+
+#define MUL_TOOM22_THRESHOLD 26
+#define MUL_TOOM33_THRESHOLD 81
+#define MUL_TOOM44_THRESHOLD 136
+#define MUL_TOOM6H_THRESHOLD 270
+#define MUL_TOOM8H_THRESHOLD 430
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 81
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 91
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 93
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 96
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 121
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 46
+#define SQR_TOOM3_THRESHOLD 81
+#define SQR_TOOM4_THRESHOLD 202
+#define SQR_TOOM6_THRESHOLD 286
+#define SQR_TOOM8_THRESHOLD 430
+
+#define MULMID_TOOM42_THRESHOLD 56
+
+#define MULMOD_BNM1_THRESHOLD 17
+#define SQRMOD_BNM1_THRESHOLD 17
+
+#define MUL_FFT_MODF_THRESHOLD 888 /* k = 6 */
+#define MUL_FFT_TABLE3 \
+ { { 888, 6}, { 15, 5}, { 31, 6}, { 25, 7}, \
+ { 13, 6}, { 27, 7}, { 15, 6}, { 33, 7}, \
+ { 17, 6}, { 35, 7}, { 19, 6}, { 39, 7}, \
+ { 23, 6}, { 47, 7}, { 27, 8}, { 15, 7}, \
+ { 31, 6}, { 63, 7}, { 35, 8}, { 19, 7}, \
+ { 41, 8}, { 23, 7}, { 47, 8}, { 31, 7}, \
+ { 63, 8}, { 39, 7}, { 79, 9}, { 23, 8}, \
+ { 51, 9}, { 31, 8}, { 67, 9}, { 39, 8}, \
+ { 79, 9}, { 47, 8}, { 95, 9}, { 55,10}, \
+ { 31, 9}, { 63, 8}, { 127, 9}, { 79,10}, \
+ { 47, 9}, { 95,11}, { 31,10}, { 63, 9}, \
+ { 135,10}, { 79, 9}, { 167,10}, { 95, 9}, \
+ { 191,10}, { 111,11}, { 63,10}, { 127, 9}, \
+ { 255,10}, { 159,11}, { 95,10}, { 191,12}, \
+ { 63,11}, { 127,10}, { 271, 9}, { 543,10}, \
+ { 287,11}, { 159,10}, { 335,11}, { 191,10}, \
+ { 383, 9}, { 767,10}, { 399, 9}, { 799,11}, \
+ { 223,12}, { 4096,13}, { 8192,14}, { 16384,15}, \
+ { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 73
+#define MUL_FFT_THRESHOLD 7552
+
+#define SQR_FFT_MODF_THRESHOLD 758 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 758, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \
+ { 25, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \
+ { 32, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \
+ { 39, 7}, { 23, 6}, { 47, 7}, { 27, 8}, \
+ { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \
+ { 23, 7}, { 47, 8}, { 31, 7}, { 63, 8}, \
+ { 39, 9}, { 23, 8}, { 51, 9}, { 31, 8}, \
+ { 67, 9}, { 39, 8}, { 79, 9}, { 47, 8}, \
+ { 95, 9}, { 55,10}, { 31, 9}, { 63, 8}, \
+ { 127, 9}, { 79,10}, { 47, 9}, { 95,11}, \
+ { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \
+ { 159,10}, { 95, 9}, { 191,10}, { 111,11}, \
+ { 63,10}, { 127, 9}, { 255,10}, { 159,11}, \
+ { 95,10}, { 191,12}, { 63,11}, { 127,10}, \
+ { 255, 9}, { 511,10}, { 271, 9}, { 543, 8}, \
+ { 1087,10}, { 287,11}, { 159,10}, { 319, 9}, \
+ { 671,11}, { 191,10}, { 383, 9}, { 767,10}, \
+ { 399, 9}, { 799,12}, { 4096,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 74
+#define SQR_FFT_THRESHOLD 7296
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 28
+#define MULLO_MUL_N_THRESHOLD 13463
+
+#define DC_DIV_QR_THRESHOLD 91
+#define DC_DIVAPPR_Q_THRESHOLD 280
+#define DC_BDIV_QR_THRESHOLD 87
+#define DC_BDIV_Q_THRESHOLD 222
+
+#define INV_MULMOD_BNM1_THRESHOLD 62
+#define INV_NEWTON_THRESHOLD 266
+#define INV_APPR_THRESHOLD 268
+
+#define BINV_NEWTON_THRESHOLD 272
+#define REDC_1_TO_REDC_N_THRESHOLD 79
+
+#define MU_DIV_QR_THRESHOLD 1822
+#define MU_DIVAPPR_Q_THRESHOLD 1652
+#define MUPI_DIV_QR_THRESHOLD 108
+#define MU_BDIV_QR_THRESHOLD 1470
+#define MU_BDIV_Q_THRESHOLD 1470
+
+#define POWM_SEC_TABLE 3,21,195,961,2783
+
+#define MATRIX22_STRASSEN_THRESHOLD 19
+#define HGCD_THRESHOLD 149
+#define HGCD_APPR_THRESHOLD 181
+#define HGCD_REDUCE_THRESHOLD 4633
+#define GCD_DC_THRESHOLD 610
+#define GCDEXT_DC_THRESHOLD 419
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 14
+#define GET_STR_PRECOMPUTE_THRESHOLD 31
+#define SET_STR_DC_THRESHOLD 272
+#define SET_STR_PRECOMPUTE_THRESHOLD 1330
+
+#define FAC_DSC_THRESHOLD 438
+#define FAC_ODD_THRESHOLD 24
C cycles/limb
-C P54: 7.5
-C P55: 7.0
-C P6: 2.5
-C K6: 4.5
-C K7: 5.0
-C P4: 14.5
+C P54 7.5
+C P55 7.0
+C P6 2.5
+C K6 4.5
+C K7 5.0
+C P4 14.5
C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
C cycles/limb
-C P5: 3.0
-C P6: 3.66
-C K6: 3.0
-C K7: 1.3
-C P4: 9
+C P5 3.0
+C P6 3.66
+C K6 3.0
+C K7 1.3
+C P4 9
C mp_limb_t mpn_mod_34lsub1 (mp_srcptr src, mp_size_t size)
include(`../config.m4')
-C cycles/limb
-C P5: 12.5
-C P6 model 0-8,10-12) 5.5
+C cycles/limb
+C P5 12.5
+C P6 model 0-8,10-12 5.5
C P6 model 9 (Banias)
-C P6 model 13 (Dothan) 5.25
-C P4 model 0 (Willamette) 19.0
-C P4 model 1 (?) 19.0
-C P4 model 2 (Northwood) 19.0
+C P6 model 13 (Dothan) 5.25
+C P4 model 0 (Willamette) 19.0
+C P4 model 1 (?) 19.0
+C P4 model 2 (Northwood) 19.0
C P4 model 3 (Prescott)
C P4 model 4 (Nocona)
-C K6: 10.5
-C K7: 4.5
-C K8:
+C AMD K6 10.5
+C AMD K7 4.5
+C AMD K8
C mp_limb_t mpn_mul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
C cycles/crossproduct
-C P5: 15
-C P6: 7.5
-C K6: 12.5
-C K7: 5.5
-C P4: 24
+C P5 15
+C P6 7.5
+C K6 12.5
+C K7 5.5
+C P4 24
C void mpn_mul_basecase (mp_ptr wp,
--- /dev/null
+/* x86/nano gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 32
+#define BYTES_PER_MP_LIMB 4
+
+/* Generated by tuneup.c, 2011-11-25, gcc 4.2 */
+
+#define MOD_1_1P_METHOD 1
+#define MOD_1_NORM_THRESHOLD 3
+#define MOD_1_UNNORM_THRESHOLD 3
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 10
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 9
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 53
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 12
+#define USE_PREINV_DIVREM_1 1
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD 32
+
+#define MUL_TOOM22_THRESHOLD 16
+#define MUL_TOOM33_THRESHOLD 132
+#define MUL_TOOM44_THRESHOLD 195
+#define MUL_TOOM6H_THRESHOLD 270
+#define MUL_TOOM8H_THRESHOLD 478
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 129
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 138
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 130
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 135
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 28
+#define SQR_TOOM3_THRESHOLD 194
+#define SQR_TOOM4_THRESHOLD 502
+#define SQR_TOOM6_THRESHOLD 746
+#define SQR_TOOM8_THRESHOLD 1005
+
+#define MULMID_TOOM42_THRESHOLD 40
+
+#define MULMOD_BNM1_THRESHOLD 14
+#define SQRMOD_BNM1_THRESHOLD 19
+
+#define POWM_SEC_TABLE 4,23,258,828,2246
+
+#define MUL_FFT_MODF_THRESHOLD 308 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 308, 5}, { 13, 6}, { 7, 5}, { 17, 6}, \
+ { 9, 5}, { 19, 6}, { 11, 5}, { 23, 6}, \
+ { 13, 7}, { 7, 6}, { 17, 7}, { 9, 6}, \
+ { 19, 7}, { 11, 6}, { 24, 7}, { 15, 6}, \
+ { 31, 7}, { 19, 8}, { 11, 7}, { 25, 8}, \
+ { 15, 7}, { 33, 8}, { 19, 7}, { 39, 8}, \
+ { 23, 7}, { 47, 9}, { 15, 8}, { 31, 7}, \
+ { 63, 8}, { 39, 9}, { 23, 8}, { 47,10}, \
+ { 15, 9}, { 31, 8}, { 63, 9}, { 47,10}, \
+ { 31, 9}, { 71,10}, { 47, 9}, { 95,11}, \
+ { 31,10}, { 63, 9}, { 127, 8}, { 255,10}, \
+ { 79, 9}, { 159,10}, { 95, 9}, { 191,11}, \
+ { 63,10}, { 127, 9}, { 255, 8}, { 543, 9}, \
+ { 287, 8}, { 575, 7}, { 1215,10}, { 159,11}, \
+ { 95,10}, { 191,12}, { 63,11}, { 127,10}, \
+ { 255, 9}, { 543, 8}, { 1087,10}, { 287, 9}, \
+ { 607, 8}, { 1215,11}, { 159,10}, { 319, 9}, \
+ { 639,10}, { 351, 9}, { 703, 8}, { 1407, 9}, \
+ { 735, 8}, { 1471,11}, { 191,10}, { 383, 9}, \
+ { 767,10}, { 415, 9}, { 831,11}, { 223,10}, \
+ { 447, 9}, { 895,10}, { 479, 9}, { 959, 8}, \
+ { 1919,12}, { 4096,13}, { 8192,14}, { 16384,15}, \
+ { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 89
+#define MUL_FFT_THRESHOLD 1856
+
+#define SQR_FFT_MODF_THRESHOLD 396 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 396, 5}, { 13, 6}, { 7, 5}, { 21, 6}, \
+ { 11, 5}, { 23, 6}, { 21, 7}, { 11, 6}, \
+ { 25, 7}, { 15, 6}, { 31, 7}, { 19, 6}, \
+ { 39, 7}, { 21, 8}, { 11, 7}, { 23, 6}, \
+ { 47, 7}, { 27, 8}, { 15, 7}, { 33, 8}, \
+ { 19, 7}, { 39, 8}, { 23, 7}, { 47, 8}, \
+ { 27, 9}, { 15, 8}, { 31, 7}, { 63, 8}, \
+ { 39, 9}, { 23, 8}, { 47,10}, { 15, 9}, \
+ { 31, 8}, { 63, 9}, { 39, 8}, { 79, 9}, \
+ { 47,10}, { 31, 9}, { 79,10}, { 47, 9}, \
+ { 95,11}, { 31,10}, { 63, 9}, { 127,10}, \
+ { 79, 9}, { 159,10}, { 95,11}, { 63,10}, \
+ { 127, 9}, { 255, 8}, { 543,10}, { 143, 9}, \
+ { 287, 8}, { 607, 7}, { 1215, 6}, { 2431,10}, \
+ { 159, 8}, { 639,11}, { 95,10}, { 191,12}, \
+ { 63,11}, { 127,10}, { 255, 9}, { 543, 8}, \
+ { 1087,10}, { 287, 9}, { 607, 8}, { 1215,11}, \
+ { 159,10}, { 319, 9}, { 671,10}, { 351, 9}, \
+ { 703, 8}, { 1407, 9}, { 735, 8}, { 1471, 7}, \
+ { 2943,11}, { 191,10}, { 383, 9}, { 799,10}, \
+ { 415, 9}, { 895,10}, { 479,12}, { 4096,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 87
+#define SQR_FFT_THRESHOLD 2368
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 51
+#define MULLO_MUL_N_THRESHOLD 3369
+
+#define DC_DIV_QR_THRESHOLD 56
+#define DC_DIVAPPR_Q_THRESHOLD 183
+#define DC_BDIV_QR_THRESHOLD 55
+#define DC_BDIV_Q_THRESHOLD 118
+
+#define INV_MULMOD_BNM1_THRESHOLD 30
+#define INV_NEWTON_THRESHOLD 266
+#define INV_APPR_THRESHOLD 218
+
+#define BINV_NEWTON_THRESHOLD 268
+#define REDC_1_TO_REDC_N_THRESHOLD 56
+
+#define MU_DIV_QR_THRESHOLD 1308
+#define MU_DIVAPPR_Q_THRESHOLD 1528
+#define MUPI_DIV_QR_THRESHOLD 124
+#define MU_BDIV_QR_THRESHOLD 855
+#define MU_BDIV_Q_THRESHOLD 1334
+
+#define MATRIX22_STRASSEN_THRESHOLD 14
+#define HGCD_THRESHOLD 104
+#define HGCD_APPR_THRESHOLD 139
+#define HGCD_REDUCE_THRESHOLD 2121
+#define GCD_DC_THRESHOLD 456
+#define GCDEXT_DC_THRESHOLD 321
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 11
+#define GET_STR_PRECOMPUTE_THRESHOLD 25
+#define SET_STR_DC_THRESHOLD 542
+#define SET_STR_PRECOMPUTE_THRESHOLD 840
mpn_mul_basecase 8.2 cycles/crossproduct (approx)
mpn_sqr_basecase 4.0 cycles/crossproduct (approx)
- or 7.75 cycles/triangleproduct (approx)
+ or 7.75 cycles/triangleproduct (approx)
Pentium II and III have MMX and get the following improvements.
C * Avoid indexed addressing, it makes us stall on the two-ported register
C file.
-C cycles/limb
-C P6 model 0-8,10-12) 3.17
-C P6 model 9 (Banias) ?
-C P6 model 13 (Dothan) 2.25
+C cycles/limb
+C P6 model 0-8,10-12 3.17
+C P6 model 9 (Banias) 2.15
+C P6 model 13 (Dothan) 2.25
define(`rp', `%edi')
include(`../config.m4')
-C cycles/limb
-C P5:
-C P6 model 0-8,10-12) 6.44
-C P6 model 9 (Banias)
-C P6 model 13 (Dothan) 6.11
+C cycles/limb
+C P5
+C P6 model 0-8,10-12 6.44
+C P6 model 9 (Banias) 6.15
+C P6 model 13 (Dothan) 6.11
C P4 model 0 (Willamette)
C P4 model 1 (?)
C P4 model 2 (Northwood)
C P4 model 3 (Prescott)
C P4 model 4 (Nocona)
-C K6:
-C K7:
-C K8:
+C AMD K6
+C AMD K7
+C AMD K8
dnl P6 UNROLL_COUNT cycles/limb
--- /dev/null
+dnl Intel P6 mpn_modexact_1_odd -- exact division style remainder.
+
+dnl Copyright 2001, 2002, 2007, 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl Rearranged from mpn/x86/p6/dive_1.asm by Marco Bodrato.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C odd even divisor
+C P6: 10.0 12.0 cycles/limb
+
+C MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
+
+C The odd case is basically the same as mpn_modexact_1_odd, just with an
+C extra store, and it runs at the same 10 cycles which is the dependent
+C chain.
+C
+C The shifts for the even case aren't on the dependent chain so in principle
+C it could run the same too, but nothing running at 10 has been found.
+C Perhaps there's too many uops (an extra 4 over the odd case).
+
+defframe(PARAM_SHIFT, 24)
+defframe(PARAM_INVERSE,20)
+defframe(PARAM_DIVISOR,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+defframe(SAVE_EBX, -4)
+defframe(SAVE_ESI, -8)
+defframe(SAVE_EDI, -12)
+defframe(SAVE_EBP, -16)
+deflit(STACK_SPACE, 16)
+
+dnl re-use parameter space
+define(VAR_INVERSE,`PARAM_SRC')
+
+ TEXT
+
+C mp_limb_t
+C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C mp_limb_t inverse, int shift)
+
+ ALIGN(16)
+PROLOGUE(mpn_pi1_bdiv_q_1)
+deflit(`FRAME',0)
+
+ subl $STACK_SPACE, %esp FRAME_subl_esp(STACK_SPACE)
+
+ movl %esi, SAVE_ESI
+ movl PARAM_SRC, %esi
+
+ movl %ebx, SAVE_EBX
+ movl PARAM_SIZE, %ebx
+
+ movl %ebp, SAVE_EBP
+ movl PARAM_INVERSE, %ebp
+
+ movl PARAM_SHIFT, %ecx C trailing twos
+
+L(common):
+ movl %edi, SAVE_EDI
+ movl PARAM_DST, %edi
+
+ leal (%esi,%ebx,4), %esi C src end
+
+ leal (%edi,%ebx,4), %edi C dst end
+ negl %ebx C -size
+
+ movl (%esi,%ebx,4), %eax C src[0]
+
+ orl %ecx, %ecx
+ jz L(odd_entry)
+
+ movl %edi, PARAM_DST
+ movl %ebp, VAR_INVERSE
+
+L(even):
+ C eax src[0]
+ C ebx counter, limbs, negative
+ C ecx shift
+ C edx
+ C esi
+ C edi
+ C ebp
+
+ xorl %ebp, %ebp C initial carry bit
+ xorl %edx, %edx C initial carry limb (for size==1)
+
+ incl %ebx
+ jz L(even_one)
+
+ movl (%esi,%ebx,4), %edi C src[1]
+
+ shrdl( %cl, %edi, %eax)
+
+ jmp L(even_entry)
+
+
+L(even_top):
+ C eax scratch
+ C ebx counter, limbs, negative
+ C ecx shift
+ C edx scratch
+ C esi &src[size]
+ C edi &dst[size] and scratch
+ C ebp carry bit
+
+ movl (%esi,%ebx,4), %edi
+
+ mull PARAM_DIVISOR
+
+ movl -4(%esi,%ebx,4), %eax
+ shrdl( %cl, %edi, %eax)
+
+ subl %ebp, %eax
+
+ sbbl %ebp, %ebp
+ subl %edx, %eax
+
+ sbbl $0, %ebp
+
+L(even_entry):
+ imull VAR_INVERSE, %eax
+
+ movl PARAM_DST, %edi
+ negl %ebp
+
+ movl %eax, -4(%edi,%ebx,4)
+ incl %ebx
+ jnz L(even_top)
+
+ mull PARAM_DIVISOR
+
+ movl -4(%esi), %eax
+
+L(even_one):
+ shrl %cl, %eax
+ movl SAVE_ESI, %esi
+
+ subl %ebp, %eax
+ movl SAVE_EBP, %ebp
+
+ subl %edx, %eax
+ movl SAVE_EBX, %ebx
+
+ imull VAR_INVERSE, %eax
+
+ movl %eax, -4(%edi)
+ movl SAVE_EDI, %edi
+ addl $STACK_SPACE, %esp
+
+ ret
+
+C The dependent chain here is
+C
+C subl %edx, %eax 1
+C imull %ebp, %eax 4
+C mull PARAM_DIVISOR 5
+C ----
+C total 10
+C
+C and this is the measured speed. No special scheduling is necessary, out
+C of order execution hides the load latency.
+
+L(odd_top):
+ C eax scratch (src limb)
+ C ebx counter, limbs, negative
+ C ecx carry bit
+ C edx carry limb, high of last product
+ C esi &src[size]
+ C edi &dst[size]
+ C ebp inverse
+
+ mull PARAM_DIVISOR
+
+ movl (%esi,%ebx,4), %eax
+ subl %ecx, %eax
+
+ sbbl %ecx, %ecx
+ subl %edx, %eax
+
+ sbbl $0, %ecx
+
+L(odd_entry):
+ imull %ebp, %eax
+
+ movl %eax, (%edi,%ebx,4)
+ negl %ecx
+
+ incl %ebx
+ jnz L(odd_top)
+
+
+ movl SAVE_ESI, %esi
+
+ movl SAVE_EDI, %edi
+
+ movl SAVE_EBP, %ebp
+
+ movl SAVE_EBX, %ebx
+ addl $STACK_SPACE, %esp
+
+ ret
+
+EPILOGUE()
+
+C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t divisor);
+C
+
+ ALIGN(16)
+PROLOGUE(mpn_bdiv_q_1)
+deflit(`FRAME',0)
+
+ movl PARAM_DIVISOR, %eax
+ subl $STACK_SPACE, %esp FRAME_subl_esp(STACK_SPACE)
+
+ movl %esi, SAVE_ESI
+ movl PARAM_SRC, %esi
+
+ movl %ebx, SAVE_EBX
+ movl PARAM_SIZE, %ebx
+
+ bsfl %eax, %ecx C trailing twos
+
+ movl %ebp, SAVE_EBP
+
+ shrl %cl, %eax C d without twos
+
+ movl %eax, %edx
+ shrl %eax C d/2 without twos
+
+ movl %edx, PARAM_DIVISOR
+ andl $127, %eax
+
+ifdef(`PIC',`
+ LEA( binvert_limb_table, %ebp)
+ movzbl (%eax,%ebp), %ebp C inv 8 bits
+',`
+ movzbl binvert_limb_table(%eax), %ebp C inv 8 bits
+')
+
+ leal (%ebp,%ebp), %eax C 2*inv
+
+ imull %ebp, %ebp C inv*inv
+ imull %edx, %ebp C inv*inv*d
+
+ subl %ebp, %eax C inv = 2*inv - inv*inv*d
+ leal (%eax,%eax), %ebp C 2*inv
+
+ imull %eax, %eax C inv*inv
+ imull %edx, %eax C inv*inv*d
+
+ subl %eax, %ebp C inv = 2*inv - inv*inv*d
+
+ jmp L(common)
+
+EPILOGUE()
C imull %ebp, %eax 4
C mull PARAM_DIVISOR 5
C ----
-C total 10
+C total 10
C
C and this is the measured speed. No special scheduling is necessary, out
C of order execution hides the load latency.
--- /dev/null
+dnl x86 mpn_gcd_1 optimised for processors with fast BSF.
+
+dnl Based on the K7 gcd_1.asm, by Kevin Ryde. Rehacked by Torbjorn Granlund.
+
+dnl Copyright 2000, 2001, 2002, 2005, 2009, 2011, 2012 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/bit (approx)
+C AMD K7 7.80
+C AMD K8,K9 7.79
+C AMD K10 4.08
+C AMD bd1 ?
+C AMD bobcat 7.82
+C Intel P4-2 14.9
+C Intel P4-3/4 14.0
+C Intel P6/13 5.09
+C Intel core2 4.22
+C Intel NHM 5.00
+C Intel SBR 5.00
+C Intel atom 17.1
+C VIA nano ?
+C Numbers measured with: speed -CD -s16-32 -t16 mpn_gcd_1
+
+C Threshold of when to call bmod when U is one limb. Should be about
+C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
+define(`BMOD_THRES_LOG2', 6)
+
+
+define(`up', `%edi')
+define(`n', `%esi')
+define(`v0', `%edx')
+
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_gcd_1)
+ push %edi
+ push %esi
+
+ mov 12(%esp), up
+ mov 16(%esp), n
+ mov 20(%esp), v0
+
+ mov (up), %eax C U low limb
+ or v0, %eax
+ bsf %eax, %eax C min(ctz(u0),ctz(v0))
+
+ bsf v0, %ecx
+ shr %cl, v0
+
+ push %eax C preserve common twos over call
+ push v0 C preserve v0 argument over call
+
+ cmp $1, n
+ jnz L(reduce_nby1)
+
+C Both U and V are single limbs, reduce with bmod if u0 >> v0.
+ mov (up), %ecx
+ mov %ecx, %eax
+ shr $BMOD_THRES_LOG2, %ecx
+ cmp %ecx, v0
+ ja L(reduced)
+ jmp L(bmod)
+
+L(reduce_nby1):
+ cmp $BMOD_1_TO_MOD_1_THRESHOLD, n
+ jl L(bmod)
+ifdef(`PIC_WITH_EBX',`
+ push %ebx
+ call L(movl_eip_to_ebx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+')
+ push v0 C param 3
+ push n C param 2
+ push up C param 1
+ CALL( mpn_mod_1)
+ jmp L(called)
+
+L(bmod):
+ifdef(`PIC_WITH_EBX',`dnl
+ push %ebx
+ call L(movl_eip_to_ebx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+')
+ push v0 C param 3
+ push n C param 2
+ push up C param 1
+ CALL( mpn_modexact_1_odd)
+
+L(called):
+ add $12, %esp C deallocate params
+ifdef(`PIC_WITH_EBX',`dnl
+ pop %ebx
+')
+L(reduced):
+ pop %edx
+
+ bsf %eax, %ecx
+C test %eax, %eax C FIXME: does this lower latency?
+ jnz L(mid)
+ jmp L(end)
+
+ ALIGN(16) C K10 BD C2 NHM SBR
+L(top): cmovc( %esi, %eax) C if x-y < 0 0,3 0,3 0,6 0,5 0,5
+ cmovc( %edi, %edx) C use x,y-x 0,3 0,3 2,8 1,7 1,7
+L(mid): shr %cl, %eax C 1,7 1,6 2,8 2,8 2,8
+ mov %edx, %esi C 1 1 4 3 3
+ sub %eax, %esi C 2 2 5 4 4
+ bsf %esi, %ecx C 3 3 6 5 5
+ mov %eax, %edi C 2 2 3 3 4
+ sub %edx, %eax C 2 2 4 3 4
+ jnz L(top) C
+
+L(end): pop %ecx
+ mov %edx, %eax
+ shl %cl, %eax
+
+ pop %esi
+ pop %edi
+ ret
+
+ifdef(`PIC_WITH_EBX',`dnl
+L(movl_eip_to_ebx):
+ mov (%esp), %ebx
+ ret
+')
+EPILOGUE()
/* Intel P6 gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1999, 2000, 2001, 2002, 2003, 2008, 2009, 2010, 2012
+Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#define BYTES_PER_MP_LIMB 4
-/* NOTE: In a fat binary build SQR_TOOM2_THRESHOLD here cannot be smaller than
- the value in mpn/x86/p6/mmx/gmp-mparam.h. The former is used as a hard
- limit in mpn/x86/p6/sqr_basecase.asm, and that file will be run by the
- p6/mmx cpus (pentium2, pentium3). */
-
-
-/* 200MHz Pentium Pro */
-
-/* Generated by tuneup.c, 2003-02-12, gcc 2.95 */
-
-#define MUL_TOOM22_THRESHOLD 23
-#define MUL_TOOM33_THRESHOLD 140
-
-#define SQR_BASECASE_THRESHOLD 0 /* always */
-#define SQR_TOOM2_THRESHOLD 52
-#define SQR_TOOM3_THRESHOLD 189
-
-#define DIV_SB_PREINV_THRESHOLD 0 /* always */
-#define DIV_DC_THRESHOLD 116
-#define POWM_THRESHOLD 131
-
-#define GCD_ACCEL_THRESHOLD 3
-#define JACOBI_BASE_METHOD 1
-
-#define USE_PREINV_DIVREM_1 0
-#define USE_PREINV_MOD_1 1 /* native */
-#define DIVREM_2_THRESHOLD 0 /* always */
-#define DIVEXACT_1_THRESHOLD 0 /* always */
-#define MODEXACT_1_ODD_THRESHOLD 0 /* always */
-
-#define GET_STR_DC_THRESHOLD 18
-#define GET_STR_PRECOMPUTE_THRESHOLD 23
-#define SET_STR_THRESHOLD 6093
-
-#define MUL_FFT_TABLE { 464, 928, 1920, 3584, 10240, 40960, 0 }
-#define MUL_FFT_MODF_THRESHOLD 360
-#define MUL_FFT_THRESHOLD 2816
-
-#define SQR_FFT_TABLE { 528, 1184, 1920, 4608, 14336, 40960, 0 }
-#define SQR_FFT_MODF_THRESHOLD 440
-#define SQR_FFT_THRESHOLD 2816
+/* NOTE: In a fat binary build SQR_TOOM2_THRESHOLD here cannot be more than the
+ value in mpn/x86/p6/gmp-mparam.h. The latter is used as a hard limit in
+ mpn/x86/p6/sqr_basecase.asm. */
+
+
+/* 1867 MHz P6 model 13 */
+
+#define MOD_1_NORM_THRESHOLD 4
+#define MOD_1_UNNORM_THRESHOLD 4
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 5
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 4
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 11
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 8
+#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD 21
+
+#define MUL_TOOM22_THRESHOLD 20
+#define MUL_TOOM33_THRESHOLD 74
+#define MUL_TOOM44_THRESHOLD 181
+#define MUL_TOOM6H_THRESHOLD 252
+#define MUL_TOOM8H_THRESHOLD 363
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 114
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 115
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 80
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 30
+#define SQR_TOOM3_THRESHOLD 101
+#define SQR_TOOM4_THRESHOLD 154
+#define SQR_TOOM6_THRESHOLD 222
+#define SQR_TOOM8_THRESHOLD 527
+
+#define MULMID_TOOM42_THRESHOLD 58
+
+#define MULMOD_BNM1_THRESHOLD 13
+#define SQRMOD_BNM1_THRESHOLD 17
+
+#define POWM_SEC_TABLE 4,23,258,768,2388
+
+#define MUL_FFT_MODF_THRESHOLD 565 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 565, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \
+ { 25, 7}, { 13, 6}, { 28, 7}, { 15, 6}, \
+ { 31, 7}, { 17, 6}, { 35, 7}, { 27, 8}, \
+ { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \
+ { 23, 7}, { 47, 8}, { 27, 9}, { 15, 8}, \
+ { 31, 7}, { 63, 8}, { 39, 9}, { 23, 5}, \
+ { 383, 4}, { 991, 5}, { 511, 6}, { 267, 7}, \
+ { 157, 8}, { 91, 9}, { 47, 8}, { 111, 9}, \
+ { 63, 8}, { 127, 9}, { 79,10}, { 47, 9}, \
+ { 95,11}, { 31,10}, { 63, 9}, { 135,10}, \
+ { 79, 9}, { 159,10}, { 95,11}, { 63,10}, \
+ { 143, 9}, { 287,10}, { 159,11}, { 95,10}, \
+ { 191,12}, { 63,11}, { 127,10}, { 255, 9}, \
+ { 511,10}, { 271, 9}, { 543,10}, { 287,11}, \
+ { 159,10}, { 335, 9}, { 671,11}, { 191,10}, \
+ { 383, 9}, { 767,10}, { 399, 9}, { 799,10}, \
+ { 415,11}, { 223,12}, { 127,11}, { 255,10}, \
+ { 543, 9}, { 1087,11}, { 287,10}, { 607,11}, \
+ { 319,10}, { 671,12}, { 191,11}, { 383,10}, \
+ { 799,11}, { 415,10}, { 831,13}, { 127,12}, \
+ { 255,11}, { 543,10}, { 1087,11}, { 607,10}, \
+ { 1215,12}, { 319,11}, { 671,10}, { 1343,11}, \
+ { 735,10}, { 1471,12}, { 383,11}, { 799,10}, \
+ { 1599,11}, { 863,12}, { 447,11}, { 959,13}, \
+ { 255,12}, { 511,11}, { 1087,12}, { 575,11}, \
+ { 1215,12}, { 639,11}, { 1343,12}, { 703,11}, \
+ { 1471,13}, { 383,12}, { 831,11}, { 1727,12}, \
+ { 959,14}, { 255,13}, { 511,12}, { 1215,13}, \
+ { 639,12}, { 1471,11}, { 2943,13}, { 767,12}, \
+ { 1727,13}, { 895,12}, { 1919,14}, { 511,13}, \
+ { 1023,12}, { 2111,13}, { 1151,12}, { 2431,13}, \
+ { 1407,12}, { 2815,14}, { 767,13}, { 1663,12}, \
+ { 3455,13}, { 8192,14}, { 16384,15}, { 32768,16} }
+#define MUL_FFT_TABLE3_SIZE 132
+#define MUL_FFT_THRESHOLD 6784
+
+#define SQR_FFT_MODF_THRESHOLD 472 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 472, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \
+ { 25, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \
+ { 31, 7}, { 17, 6}, { 35, 7}, { 27, 8}, \
+ { 15, 7}, { 35, 8}, { 19, 7}, { 41, 8}, \
+ { 23, 7}, { 49, 8}, { 27, 9}, { 15, 8}, \
+ { 39, 9}, { 23, 8}, { 51,10}, { 15, 9}, \
+ { 31, 8}, { 63, 4}, { 1023, 8}, { 67, 9}, \
+ { 39, 5}, { 639, 4}, { 1471, 6}, { 383, 7}, \
+ { 209, 8}, { 119, 9}, { 63, 7}, { 255, 8}, \
+ { 139, 9}, { 71, 8}, { 143, 9}, { 79,10}, \
+ { 47, 9}, { 95,11}, { 31,10}, { 63, 9}, \
+ { 135,10}, { 79, 9}, { 159, 8}, { 319, 9}, \
+ { 167,10}, { 95,11}, { 63,10}, { 143, 9}, \
+ { 287,10}, { 159,11}, { 95,10}, { 191,12}, \
+ { 63,11}, { 127,10}, { 255, 9}, { 543, 8}, \
+ { 1087,10}, { 287, 9}, { 575,11}, { 159,10}, \
+ { 319, 9}, { 639,10}, { 335, 9}, { 671,10}, \
+ { 351, 9}, { 703,11}, { 191,10}, { 383, 9}, \
+ { 767,10}, { 399, 9}, { 799,10}, { 415, 9}, \
+ { 831,11}, { 223,12}, { 127,11}, { 255,10}, \
+ { 543, 9}, { 1087,11}, { 287,10}, { 607, 9}, \
+ { 1215,11}, { 319,10}, { 671, 9}, { 1343,11}, \
+ { 351,10}, { 703,12}, { 191,11}, { 383,10}, \
+ { 799,11}, { 415,10}, { 831,13}, { 127,12}, \
+ { 255,11}, { 543,10}, { 1087,11}, { 607,12}, \
+ { 319,11}, { 671,10}, { 1343,11}, { 735,12}, \
+ { 383,11}, { 799,10}, { 1599,11}, { 863,12}, \
+ { 447,11}, { 959,13}, { 255,12}, { 511,11}, \
+ { 1087,12}, { 575,11}, { 1215,12}, { 639,11}, \
+ { 1343,12}, { 703,11}, { 1471,13}, { 383,12}, \
+ { 767,11}, { 1599,12}, { 831,11}, { 1727,12}, \
+ { 959,14}, { 255,13}, { 511,12}, { 1215,13}, \
+ { 639,12}, { 1471,13}, { 767,12}, { 1727,13}, \
+ { 895,12}, { 1919,14}, { 511,13}, { 1023,12}, \
+ { 2111,13}, { 1151,12}, { 2431,13}, { 1407,14}, \
+ { 767,13}, { 1663,12}, { 3455,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16} }
+#define SQR_FFT_TABLE3_SIZE 146
+#define SQR_FFT_THRESHOLD 5760
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 33
+#define MULLO_MUL_N_THRESHOLD 13463
+
+#define DC_DIV_QR_THRESHOLD 20
+#define DC_DIVAPPR_Q_THRESHOLD 56
+#define DC_BDIV_QR_THRESHOLD 60
+#define DC_BDIV_Q_THRESHOLD 134
+
+#define INV_MULMOD_BNM1_THRESHOLD 38
+#define INV_NEWTON_THRESHOLD 66
+#define INV_APPR_THRESHOLD 63
+
+#define BINV_NEWTON_THRESHOLD 250
+#define REDC_1_TO_REDC_N_THRESHOLD 63
+
+#define MU_DIV_QR_THRESHOLD 1164
+#define MU_DIVAPPR_Q_THRESHOLD 979
+#define MUPI_DIV_QR_THRESHOLD 38
+#define MU_BDIV_QR_THRESHOLD 1442
+#define MU_BDIV_Q_THRESHOLD 1470
+
+#define MATRIX22_STRASSEN_THRESHOLD 17
+#define HGCD_THRESHOLD 64
+#define HGCD_APPR_THRESHOLD 105
+#define HGCD_REDUCE_THRESHOLD 3524
+#define GCD_DC_THRESHOLD 386
+#define GCDEXT_DC_THRESHOLD 309
+#define JACOBI_BASE_METHOD 1
+
+#define GET_STR_DC_THRESHOLD 13
+#define GET_STR_PRECOMPUTE_THRESHOLD 26
+#define SET_STR_DC_THRESHOLD 587
+#define SET_STR_PRECOMPUTE_THRESHOLD 1104
/* 800 MHz P6 model 8 */
#define MOD_1_NORM_THRESHOLD 4
-#define MOD_1_UNNORM_THRESHOLD 5
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 8
+#define MOD_1_UNNORM_THRESHOLD 4
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 9
#define MOD_1U_TO_MOD_1_1_THRESHOLD 7
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 12
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 15
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 8
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 10
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 17
#define USE_PREINV_DIVREM_1 1 /* native */
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
#define BMOD_1_TO_MOD_1_THRESHOLD 49
#define MUL_TOOM42_TO_TOOM63_THRESHOLD 80
#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 48
+#define SQR_TOOM2_THRESHOLD 30
#define SQR_TOOM3_THRESHOLD 81
#define SQR_TOOM4_THRESHOLD 142
#define SQR_TOOM6_THRESHOLD 258
#define DC_BDIV_QR_THRESHOLD 76
#define DC_BDIV_Q_THRESHOLD 175
-#define INV_MULMOD_BNM1_THRESHOLD 82
+#define INV_MULMOD_BNM1_THRESHOLD 42
#define INV_NEWTON_THRESHOLD 268
#define INV_APPR_THRESHOLD 250
#define HGCD_THRESHOLD 121
#define GCD_DC_THRESHOLD 478
#define GCDEXT_DC_THRESHOLD 361
-#define JACOBI_BASE_METHOD 1
+#define JACOBI_BASE_METHOD 4
#define GET_STR_DC_THRESHOLD 13
#define GET_STR_PRECOMPUTE_THRESHOLD 26
C imull %edi, %eax 4
C mull PARAM_DIVISOR 5
C ----
-C total 10
+C total 10
C
C and this is the measured speed. No special scheduling is necessary, out
C of order execution hides the load latency.
/* 1867 MHz P6 model 13 */
#define MOD_1_NORM_THRESHOLD 4
-#define MOD_1_UNNORM_THRESHOLD 6
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 9
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 6
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 8
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 15
+#define MOD_1_UNNORM_THRESHOLD 4
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 5
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 4
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 11
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 8
#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 22
+#define BMOD_1_TO_MOD_1_THRESHOLD 21
#define MUL_TOOM22_THRESHOLD 20
#define MUL_TOOM33_THRESHOLD 77
-#define MUL_TOOM44_THRESHOLD 182
-#define MUL_TOOM6H_THRESHOLD 252
+#define MUL_TOOM44_THRESHOLD 169
+#define MUL_TOOM6H_THRESHOLD 246
#define MUL_TOOM8H_THRESHOLD 381
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 75
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 122
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 115
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 79
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 114
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 97
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 80
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 106
#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
#define SQR_TOOM2_THRESHOLD 30
#define SQR_TOOM3_THRESHOLD 101
#define SQR_TOOM4_THRESHOLD 154
#define SQR_TOOM6_THRESHOLD 222
-#define SQR_TOOM8_THRESHOLD 547
+#define SQR_TOOM8_THRESHOLD 527
+
+#define MULMID_TOOM42_THRESHOLD 58
#define MULMOD_BNM1_THRESHOLD 13
-#define SQRMOD_BNM1_THRESHOLD 18
+#define SQRMOD_BNM1_THRESHOLD 17
-#define MUL_FFT_MODF_THRESHOLD 565 /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD 690 /* k = 5 */
#define MUL_FFT_TABLE3 \
{ { 565, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \
{ 25, 7}, { 13, 6}, { 28, 7}, { 15, 6}, \
{ 1407,12}, { 2815,14}, { 767,13}, { 1663,12}, \
{ 3455,13}, { 8192,14}, { 16384,15}, { 32768,16} }
#define MUL_FFT_TABLE3_SIZE 132
-#define MUL_FFT_THRESHOLD 6784
+#define MUL_FFT_THRESHOLD 7424
-#define SQR_FFT_MODF_THRESHOLD 472 /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD 565 /* k = 5 */
#define SQR_FFT_TABLE3 \
{ { 472, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \
{ 25, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \
#define SQR_FFT_THRESHOLD 5760
#define MULLO_BASECASE_THRESHOLD 0 /* always */
-#define MULLO_DC_THRESHOLD 34
+#define MULLO_DC_THRESHOLD 31
#define MULLO_MUL_N_THRESHOLD 13463
-#define DC_DIV_QR_THRESHOLD 19
-#define DC_DIVAPPR_Q_THRESHOLD 56
+#define DC_DIV_QR_THRESHOLD 25
+#define DC_DIVAPPR_Q_THRESHOLD 55
#define DC_BDIV_QR_THRESHOLD 60
#define DC_BDIV_Q_THRESHOLD 132
#define INV_MULMOD_BNM1_THRESHOLD 38
-#define INV_NEWTON_THRESHOLD 69
+#define INV_NEWTON_THRESHOLD 65
#define INV_APPR_THRESHOLD 65
-#define BINV_NEWTON_THRESHOLD 276
-#define REDC_1_TO_REDC_N_THRESHOLD 63
+#define BINV_NEWTON_THRESHOLD 252
+#define REDC_1_TO_REDC_N_THRESHOLD 62
-#define MU_DIV_QR_THRESHOLD 1308
-#define MU_DIVAPPR_Q_THRESHOLD 998
-#define MUPI_DIV_QR_THRESHOLD 62
-#define MU_BDIV_QR_THRESHOLD 1442
+#define MU_DIV_QR_THRESHOLD 1164
+#define MU_DIVAPPR_Q_THRESHOLD 748
+#define MUPI_DIV_QR_THRESHOLD 38
+#define MU_BDIV_QR_THRESHOLD 1360
#define MU_BDIV_Q_THRESHOLD 1470
+#define POWM_SEC_TABLE 2,23,258,879,2246
+
#define MATRIX22_STRASSEN_THRESHOLD 17
-#define HGCD_THRESHOLD 60
-#define GCD_DC_THRESHOLD 393
+#define HGCD_THRESHOLD 69
+#define HGCD_APPR_THRESHOLD 112
+#define HGCD_REDUCE_THRESHOLD 3389
+#define GCD_DC_THRESHOLD 386
#define GCDEXT_DC_THRESHOLD 303
#define JACOBI_BASE_METHOD 1
#define GET_STR_DC_THRESHOLD 13
-#define GET_STR_PRECOMPUTE_THRESHOLD 22
-#define SET_STR_DC_THRESHOLD 587
-#define SET_STR_PRECOMPUTE_THRESHOLD 983
+#define GET_STR_PRECOMPUTE_THRESHOLD 25
+#define SET_STR_DC_THRESHOLD 582
+#define SET_STR_PRECOMPUTE_THRESHOLD 1118
+
+#define FAC_DSC_THRESHOLD 178
+#define FAC_ODD_THRESHOLD 34
--- /dev/null
+dnl Intel P6/SSE2 mpn_mod_1_1.
+
+dnl Copyright 2009, 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_mod_1_1p)
+include_mpn(`x86/pentium4/sse2/mod_1_1.asm')
pushl %edx
FRAME_pushl()
movl PARAM_CARRY,%eax
- shrl $1,%eax C shift bit 0 into carry
+ shrl %eax C shift bit 0 into carry
jmp L(oop)
L(endgo):
deflit(`FRAME',16)
movl PARAM_CARRY,%eax
- shrl $1,%eax C shift bit 0 into carry
+ shrl %eax C shift bit 0 into carry
jmp L(end)
EPILOGUE()
--- /dev/null
+dnl Intel Pentium mpn_divexact_1 -- mpn by limb exact division.
+
+dnl Copyright 2001, 2002, 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl Rearranged from mpn/x86/pentium/dive_1.asm by Marco Bodrato.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C divisor
+C odd even
+C P54: 24.5 30.5 cycles/limb
+C P55: 23.0 28.0
+
+MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1)
+
+C The P55 speeds noted above, 23 cycles odd or 28 cycles even, are as
+C expected. On P54 in the even case the shrdl pairing nonsense (see
+C mpn/x86/pentium/README) costs 1 cycle, but it's not clear why there's a
+C further 1.5 slowdown for both odd and even.
+
+defframe(PARAM_SHIFT, 24)
+defframe(PARAM_INVERSE,20)
+defframe(PARAM_DIVISOR,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+dnl re-use parameter space
+define(VAR_INVERSE,`PARAM_DST')
+
+ TEXT
+
+ ALIGN(32)
+C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t divisor);
+C
+PROLOGUE(mpn_bdiv_q_1)
+deflit(`FRAME',0)
+
+ movl $-1, %ecx
+ movl PARAM_DIVISOR, %eax
+
+L(strip_twos):
+ ASSERT(nz, `orl %eax, %eax')
+ shrl %eax
+ incl %ecx C shift count
+
+ jnc L(strip_twos)
+
+ leal 1(%eax,%eax), %edx C d
+ andl $127, %eax C d/2, 7 bits
+
+ pushl %ebx FRAME_pushl()
+ pushl %ebp FRAME_pushl()
+
+ifdef(`PIC',`
+ call L(here)
+L(here):
+ popl %ebp C eip
+
+ addl $_GLOBAL_OFFSET_TABLE_+[.-L(here)], %ebp
+ C AGI
+ movl binvert_limb_table@GOT(%ebp), %ebp
+ C AGI
+ movzbl (%eax,%ebp), %eax
+',`
+
+dnl non-PIC
+ movzbl binvert_limb_table(%eax), %eax C inv 8 bits
+')
+
+ movl %eax, %ebp C inv
+ addl %eax, %eax C 2*inv
+
+ imull %ebp, %ebp C inv*inv
+
+ imull %edx, %ebp C inv*inv*d
+
+ subl %ebp, %eax C inv = 2*inv - inv*inv*d
+ movl PARAM_SIZE, %ebx
+
+ movl %eax, %ebp
+ addl %eax, %eax C 2*inv
+
+ imull %ebp, %ebp C inv*inv
+
+ imull %edx, %ebp C inv*inv*d
+
+ subl %ebp, %eax C inv = 2*inv - inv*inv*d
+ movl %edx, PARAM_DIVISOR C d without twos
+
+ ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+ pushl %eax FRAME_pushl()
+ imull PARAM_DIVISOR, %eax
+ cmpl $1, %eax
+ popl %eax FRAME_popl()')
+
+ jmp L(common)
+EPILOGUE()
+
+C mp_limb_t
+C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C mp_limb_t inverse, int shift)
+ ALIGN(32)
+PROLOGUE(mpn_pi1_bdiv_q_1)
+deflit(`FRAME',0)
+
+ movl PARAM_SHIFT, %ecx
+
+ pushl %ebx FRAME_pushl()
+ pushl %ebp FRAME_pushl()
+
+ movl PARAM_SIZE, %ebx
+ movl PARAM_INVERSE, %eax
+
+L(common):
+ pushl %esi FRAME_pushl()
+ push %edi FRAME_pushl()
+
+ movl PARAM_SRC, %esi
+ movl PARAM_DST, %edi
+ movl %eax, VAR_INVERSE
+
+ leal (%esi,%ebx,4), %esi C src end
+ leal (%edi,%ebx,4), %edi C dst end
+
+ negl %ebx C -size
+
+ xorl %ebp, %ebp C initial carry bit
+
+ orl %ecx, %ecx C shift
+ movl (%esi,%ebx,4), %eax C src low limb
+ jz L(odd_entry)
+
+ xorl %edx, %edx C initial carry limb (for even, if one)
+ incl %ebx
+ jz L(one)
+
+ movl (%esi,%ebx,4), %edx C src second limb (for even)
+ shrdl( %cl, %edx, %eax)
+
+ jmp L(even_entry)
+
+
+ ALIGN(8)
+L(odd_top):
+ C eax scratch
+ C ebx counter, limbs, negative
+ C ecx
+ C edx
+ C esi src end
+ C edi dst end
+ C ebp carry bit, 0 or -1
+
+ mull PARAM_DIVISOR
+
+ movl (%esi,%ebx,4), %eax
+ subl %ebp, %edx
+
+ subl %edx, %eax
+
+ sbbl %ebp, %ebp
+
+L(odd_entry):
+ imull VAR_INVERSE, %eax
+
+ movl %eax, (%edi,%ebx,4)
+
+ incl %ebx
+ jnz L(odd_top)
+
+ popl %edi
+ popl %esi
+
+ popl %ebp
+ popl %ebx
+
+ ret
+
+L(even_top):
+ C eax scratch
+ C ebx counter, limbs, negative
+ C ecx twos
+ C edx
+ C esi src end
+ C edi dst end
+ C ebp carry bit, 0 or -1
+
+ mull PARAM_DIVISOR
+
+ subl %ebp, %edx C carry bit
+ movl -4(%esi,%ebx,4), %eax C src limb
+
+ movl (%esi,%ebx,4), %ebp C and one above it
+
+ shrdl( %cl, %ebp, %eax)
+
+ subl %edx, %eax C carry limb
+
+ sbbl %ebp, %ebp
+
+L(even_entry):
+ imull VAR_INVERSE, %eax
+
+ movl %eax, -4(%edi,%ebx,4)
+ incl %ebx
+
+ jnz L(even_top)
+
+ mull PARAM_DIVISOR
+
+ movl -4(%esi), %eax C src high limb
+ subl %ebp, %edx
+
+L(one):
+ shrl %cl, %eax
+
+ subl %edx, %eax C no carry if division is exact
+
+ imull VAR_INVERSE, %eax
+
+ movl %eax, -4(%edi) C dst high limb
+ nop C protect against cache bank clash
+
+ popl %edi
+ popl %esi
+
+ popl %ebp
+ popl %ebx
+
+ ret
+
+EPILOGUE()
include(`../config.m4')
-C P4 Willamette, Northwood: 4.0 cycles/limb if dst!=src1 and dst!=src2
-C 6.0 cycles/limb if dst==src1 or dst==src2
-C P4 Prescott: >= 5 cycles/limb
-
-C mp_limb_t mpn_add_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C mp_size_t size);
-C mp_limb_t mpn_add_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C mp_size_t size, mp_limb_t carry);
-C
-C The 4 c/l achieved here isn't particularly good, but is better than 9 c/l
-C for a basic adc loop.
+C cycles/limb
+C dst!=src1,2 dst==src1 dst==src2
+C P6 model 0-8,10-12 -
+C P6 model 9 (Banias) ?
+C P6 model 13 (Dothan) ?
+C P4 model 0-1 (Willamette) ?
+C P4 model 2 (Northwood) 4 6 6
+C P4 model 3-4 (Prescott) 4.25 7.5 7.5
defframe(PARAM_CARRY,20)
defframe(PARAM_SIZE, 16)
PROLOGUE(mpn_add_nc)
deflit(`FRAME',0)
-
movd PARAM_CARRY, %mm0
jmp L(start_nc)
-
EPILOGUE()
ALIGN(8)
PROLOGUE(mpn_add_n)
deflit(`FRAME',0)
-
pxor %mm0, %mm0
-
L(start_nc):
- movl PARAM_SRC1, %eax
- movl %ebx, SAVE_EBX
- movl PARAM_SRC2, %ebx
- movl PARAM_DST, %edx
- movl PARAM_SIZE, %ecx
+ mov PARAM_SRC1, %eax
+ mov %ebx, SAVE_EBX
+ mov PARAM_SRC2, %ebx
+ mov PARAM_DST, %edx
+ mov PARAM_SIZE, %ecx
- leal (%eax,%ecx,4), %eax C src1 end
- leal (%ebx,%ecx,4), %ebx C src2 end
- leal (%edx,%ecx,4), %edx C dst end
- negl %ecx C -size
+ lea (%eax,%ecx,4), %eax C src1 end
+ lea (%ebx,%ecx,4), %ebx C src2 end
+ lea (%edx,%ecx,4), %edx C dst end
+ neg %ecx C -size
L(top):
C eax src1 end
psrlq $32, %mm0
- addl $1, %ecx
+ add $1, %ecx
jnz L(top)
-
movd %mm0, %eax
- movl SAVE_EBX, %ebx
+ mov SAVE_EBX, %ebx
emms
ret
include(`../config.m4')
-C cycles/limb (approx)
-C dst!=src1,2 dst==src1 dst==src2
-C P4 m2: 4.5 ?7.25 ?6.75
-C P4 m3: 5.3 ? ?
+C cycles/limb
+C dst!=src1,2 dst==src1 dst==src2
+C P6 model 0-8,10-12 -
+C P6 model 9 (Banias) ?
+C P6 model 13 (Dothan) ?
+C P4 model 0-1 (Willamette) ?
+C P4 model 2 (Northwood) 4.25 6 6
+C P4 model 3-4 (Prescott) 5 8.5 8.5
-C mp_limb_t mpn_addlsh1_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C mp_size_t size);
-C
C The slightly strange combination of indexing and pointer incrementing
C that's used seems to work best. Not sure why, but %ecx,4 with src1 and/or
C src2 is a slowdown.
PROLOGUE(mpn_addlsh1_n)
deflit(`FRAME',0)
- movl PARAM_SRC1, %eax
- movl %ebx, SAVE_EBX
+ mov PARAM_SRC1, %eax
+ mov %ebx, SAVE_EBX
- movl PARAM_SRC2, %ebx
+ mov PARAM_SRC2, %ebx
pxor %mm0, %mm0 C initial carry
- movl PARAM_DST, %edx
+ mov PARAM_DST, %edx
- movl PARAM_SIZE, %ecx
+ mov PARAM_SIZE, %ecx
- leal (%edx,%ecx,4), %edx C dst end
- negl %ecx C -size
+ lea (%edx,%ecx,4), %edx C dst end
+ neg %ecx C -size
L(top):
C eax src1 end
C edx dst end
C mm0 carry
- movd (%eax), %mm1
movd (%ebx), %mm2
+ movd (%eax), %mm1
psrlq $32, %mm0
- leal 4(%eax), %eax
- leal 4(%ebx), %ebx
+ lea 4(%eax), %eax
+ lea 4(%ebx), %ebx
- paddq %mm2, %mm1
+ psllq $1, %mm2
paddq %mm2, %mm1
paddq %mm1, %mm0
movd %mm0, (%edx,%ecx,4)
- addl $1, %ecx
+ add $1, %ecx
jnz L(top)
psrlq $32, %mm0
- movl SAVE_EBX, %ebx
+ mov SAVE_EBX, %ebx
movd %mm0, %eax
emms
ret
dnl mpn_addmul_1 for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F).
-dnl Copyright 2005, 2007 Free Software Foundation, Inc.
+dnl Copyright 2005, 2007, 2011 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
include(`../config.m4')
+C cycles/limb
+C P6 model 0-8,10-12 -
+C P6 model 9 (Banias) 5.24
+C P6 model 13 (Dothan) 5.24
+C P4 model 0-1 (Willamette) 5
+C P4 model 2 (Northwood) 5
+C P4 model 3-4 (Prescott) 5
+
C TODO:
C * Tweak eax/edx offsets in loop as to save some lea's
C * Perhaps software pipeline small-case code
-C cycles/limb
-C P6 model 0-8,10-12) -
-C P6 model 9 (Banias) ?
-C P6 model 13 (Dothan) 5.24
-C P4 model 0-1 (Willamette): 5
-C P4 model 2 (Northwood): 5
-C P4 model 3-4 (Prescott): 5
-
C INPUT PARAMETERS
C rp sp + 4
C up sp + 8
TEXT
ALIGN(16)
-PROLOGUE(mpn_addmul_1c)
- mov 4(%esp), %edx
- mov 8(%esp), %eax
- mov 12(%esp), %ecx
- movd 16(%esp), %mm7
- movd 20(%esp), %mm6
- jmp L(ent)
-EPILOGUE()
- ALIGN(16)
PROLOGUE(mpn_addmul_1)
- mov 4(%esp), %edx
+ pxor %mm6, %mm6
+L(ent): mov 4(%esp), %edx
mov 8(%esp), %eax
mov 12(%esp), %ecx
movd 16(%esp), %mm7
- pxor %mm6, %mm6
-L(ent): cmp $4, %ecx
+ cmp $4, %ecx
jnc L(big)
L(lp0): movd (%eax), %mm0
emms
ret
EPILOGUE()
+PROLOGUE(mpn_addmul_1c)
+ movd 20(%esp), %mm6
+ jmp L(ent)
+EPILOGUE()
--- /dev/null
+dnl Intel Atom mpn_bdiv_dbm1.
+
+dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+dnl
+dnl Copyright 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C cycles/limb
+C P5 -
+C P6 model 0-8,10-12 -
+C P6 model 9 (Banias) 9.75
+C P6 model 13 (Dothan)
+C P4 model 0 (Willamette)
+C P4 model 1 (?)
+C P4 model 2 (Northwood) 8.25
+C P4 model 3 (Prescott)
+C P4 model 4 (Nocona)
+C Intel Atom 8
+C AMD K6 -
+C AMD K7 -
+C AMD K8
+C AMD K10
+
+C TODO: This code was optimised for atom-32, consider moving it back to atom
+C dir(atom currently grabs this code), and write a 4-way version(7c/l).
+
+defframe(PARAM_CARRY,20)
+defframe(PARAM_MUL, 16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+dnl re-use parameter space
+define(SAVE_RP,`PARAM_MUL')
+define(SAVE_UP,`PARAM_SIZE')
+
+define(`rp', `%edi')
+define(`up', `%esi')
+define(`n', `%ecx')
+define(`reg', `%edx')
+define(`cy', `%eax') C contains the return value
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+deflit(`FRAME',0)
+
+PROLOGUE(mpn_bdiv_dbm1c)
+ mov PARAM_SIZE, n C size
+ mov up, SAVE_UP
+ mov PARAM_SRC, up
+ movd PARAM_MUL, %mm7
+ mov rp, SAVE_RP
+ mov PARAM_DST, rp
+
+ movd (up), %mm0
+ pmuludq %mm7, %mm0
+ shr n
+ mov PARAM_CARRY, cy
+ jz L(eq1)
+
+ movd 4(up), %mm1
+ jc L(odd)
+
+ lea 4(up), up
+ pmuludq %mm7, %mm1
+ movd %mm0, reg
+ psrlq $32, %mm0
+ sub reg, cy
+ movd %mm0, reg
+ movq %mm1, %mm0
+ dec n
+ mov cy, (rp)
+ lea 4(rp), rp
+ jz L(end)
+
+C ALIGN(16)
+L(top): movd 4(up), %mm1
+ sbb reg, cy
+L(odd): movd %mm0, reg
+ psrlq $32, %mm0
+ pmuludq %mm7, %mm1
+ sub reg, cy
+ lea 8(up), up
+ movd %mm0, reg
+ movd (up), %mm0
+ mov cy, (rp)
+ sbb reg, cy
+ movd %mm1, reg
+ psrlq $32, %mm1
+ sub reg, cy
+ movd %mm1, reg
+ pmuludq %mm7, %mm0
+ dec n
+ mov cy, 4(rp)
+ lea 8(rp), rp
+ jnz L(top)
+
+L(end): sbb reg, cy
+
+L(eq1): movd %mm0, reg
+ psrlq $32, %mm0
+ mov SAVE_UP, up
+ sub reg, cy
+ movd %mm0, reg
+ emms
+ mov cy, (rp)
+ sbb reg, cy
+
+ mov SAVE_RP, rp
+ ret
+EPILOGUE()
+ASM_END()
--- /dev/null
+dnl Intel Pentium-4 mpn_divexact_1 -- mpn by limb exact division.
+
+dnl Copyright 2001, 2002, 2007, 2011 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl Rearranged from mpn/x86/pentium4/sse2/dive_1.asm by Marco Bodrato.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C P4: 19.0 cycles/limb
+
+C Pairs of movd's are used to avoid unaligned loads. Despite the loads not
+C being on the dependent chain and there being plenty of cycles available,
+C using an unaligned movq on every second iteration measured about 23 c/l.
+C
+
+defframe(PARAM_SHIFT, 24)
+defframe(PARAM_INVERSE,20)
+defframe(PARAM_DIVISOR,16)
+defframe(PARAM_SIZE, 12)
+defframe(PARAM_SRC, 8)
+defframe(PARAM_DST, 4)
+
+ TEXT
+
+C mp_limb_t
+C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor,
+C mp_limb_t inverse, int shift)
+ ALIGN(32)
+PROLOGUE(mpn_pi1_bdiv_q_1)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %edx
+
+ movl PARAM_SRC, %eax
+
+ movl PARAM_DIVISOR, %ecx
+
+ movd %ecx, %mm6
+ movl PARAM_SHIFT, %ecx
+
+ movd %ecx, %mm7 C shift
+
+ C
+
+ movl PARAM_INVERSE, %ecx
+ movd %ecx, %mm5 C inv
+
+ movl PARAM_DST, %ecx
+ pxor %mm1, %mm1 C initial carry limb
+ pxor %mm0, %mm0 C initial carry bit
+
+ subl $1, %edx
+ jz L(done)
+
+ pcmpeqd %mm4, %mm4
+ psrlq $32, %mm4 C 0x00000000FFFFFFFF
+
+C The dependent chain here is as follows.
+C
+C latency
+C psubq s = (src-cbit) - climb 2
+C pmuludq q = s*inverse 8
+C pmuludq prod = q*divisor 8
+C psrlq climb = high(prod) 2
+C --
+C 20
+C
+C Yet the loop measures 19.0 c/l, so obviously there's something gained
+C there over a straight reading of the chip documentation.
+
+L(top):
+ C eax src, incrementing
+ C ebx
+ C ecx dst, incrementing
+ C edx counter, size-1 iterations
+ C
+ C mm0 carry bit
+ C mm1 carry limb
+ C mm4 0x00000000FFFFFFFF
+ C mm5 inverse
+ C mm6 divisor
+ C mm7 shift
+
+ movd (%eax), %mm2
+ movd 4(%eax), %mm3
+ addl $4, %eax
+ punpckldq %mm3, %mm2
+
+ psrlq %mm7, %mm2
+ pand %mm4, %mm2 C src
+ psubq %mm0, %mm2 C src - cbit
+
+ psubq %mm1, %mm2 C src - cbit - climb
+ movq %mm2, %mm0
+ psrlq $63, %mm0 C new cbit
+
+ pmuludq %mm5, %mm2 C s*inverse
+ movd %mm2, (%ecx) C q
+ addl $4, %ecx
+
+ movq %mm6, %mm1
+ pmuludq %mm2, %mm1 C q*divisor
+ psrlq $32, %mm1 C new climb
+
+L(entry):
+ subl $1, %edx
+ jnz L(top)
+
+L(done):
+ movd (%eax), %mm2
+ psrlq %mm7, %mm2 C src
+ psubq %mm0, %mm2 C src - cbit
+
+ psubq %mm1, %mm2 C src - cbit - climb
+
+ pmuludq %mm5, %mm2 C s*inverse
+ movd %mm2, (%ecx) C q
+
+ emms
+ ret
+
+EPILOGUE()
+
+ ALIGN(16)
+C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
+C mp_limb_t divisor);
+C
+PROLOGUE(mpn_bdiv_q_1)
+deflit(`FRAME',0)
+
+ movl PARAM_SIZE, %edx
+
+ movl PARAM_DIVISOR, %ecx
+
+ C eax src
+ C ebx
+ C ecx divisor
+ C edx size-1
+
+ movl %ecx, %eax
+ bsfl %ecx, %ecx C trailing twos
+
+ shrl %cl, %eax C d = divisor without twos
+ movd %eax, %mm6
+ movd %ecx, %mm7 C shift
+
+ shrl %eax C d/2
+
+ andl $127, %eax C d/2, 7 bits
+
+ifdef(`PIC',`
+ LEA( binvert_limb_table, %ecx)
+ movzbl (%eax,%ecx), %eax C inv 8 bits
+',`
+ movzbl binvert_limb_table(%eax), %eax C inv 8 bits
+')
+
+ C
+
+ movd %eax, %mm5 C inv
+
+ movd %eax, %mm0 C inv
+
+ pmuludq %mm5, %mm5 C inv*inv
+
+ C
+
+ pmuludq %mm6, %mm5 C inv*inv*d
+ paddd %mm0, %mm0 C 2*inv
+
+ C
+
+ psubd %mm5, %mm0 C inv = 2*inv - inv*inv*d
+ pxor %mm5, %mm5
+
+ paddd %mm0, %mm5
+ pmuludq %mm0, %mm0 C inv*inv
+
+ pcmpeqd %mm4, %mm4
+ psrlq $32, %mm4 C 0x00000000FFFFFFFF
+
+ C
+
+ pmuludq %mm6, %mm0 C inv*inv*d
+ paddd %mm5, %mm5 C 2*inv
+
+ movl PARAM_SRC, %eax
+ movl PARAM_DST, %ecx
+ pxor %mm1, %mm1 C initial carry limb
+
+ C
+
+ psubd %mm0, %mm5 C inv = 2*inv - inv*inv*d
+
+ ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS
+ pushl %eax FRAME_pushl()
+ movq %mm6, %mm0
+ pmuludq %mm5, %mm0
+ movd %mm0, %eax
+ cmpl $1, %eax
+ popl %eax FRAME_popl()')
+
+ pxor %mm0, %mm0 C initial carry bit
+ jmp L(entry)
+
+EPILOGUE()
C The dependent chain here is as follows.
C
-C latency
-C psubq s = (src-cbit) - climb 2
-C pmuludq q = s*inverse 8
-C pmuludq prod = q*divisor 8
-C psrlq climb = high(prod) 2
-C --
-C 20
+C latency
+C psubq s = (src-cbit) - climb 2
+C pmuludq q = s*inverse 8
+C pmuludq prod = q*divisor 8
+C psrlq climb = high(prod) 2
+C --
+C 20
C
C Yet the loop measures 19.0 c/l, so obviously there's something gained
C there over a straight reading of the chip documentation.
#define MOD_1_NORM_THRESHOLD 24
#define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 26
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 9
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 6
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 5
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 13
#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 34
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 2
#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 22
+#define BMOD_1_TO_MOD_1_THRESHOLD 20
-#define MUL_TOOM22_THRESHOLD 30
-#define MUL_TOOM33_THRESHOLD 120
-#define MUL_TOOM44_THRESHOLD 296
-#define MUL_TOOM6H_THRESHOLD 414
-#define MUL_TOOM8H_THRESHOLD 620
+#define MUL_TOOM22_THRESHOLD 29
+#define MUL_TOOM33_THRESHOLD 107
+#define MUL_TOOM44_THRESHOLD 276
+#define MUL_TOOM6H_THRESHOLD 422
+#define MUL_TOOM8H_THRESHOLD 587
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 198
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 216
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 194
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 209
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 117
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 207
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 193
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 184
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 164
#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
#define SQR_TOOM2_THRESHOLD 48
-#define SQR_TOOM3_THRESHOLD 170
-#define SQR_TOOM4_THRESHOLD 454
-#define SQR_TOOM6_THRESHOLD 454
+#define SQR_TOOM3_THRESHOLD 173
+#define SQR_TOOM4_THRESHOLD 264
+#define SQR_TOOM6_THRESHOLD 354
#define SQR_TOOM8_THRESHOLD 915
+#define MULMID_TOOM42_THRESHOLD 66
+
#define MULMOD_BNM1_THRESHOLD 19
-#define SQRMOD_BNM1_THRESHOLD 24
+#define SQRMOD_BNM1_THRESHOLD 19
-#define MUL_FFT_MODF_THRESHOLD 904 /* k = 6 */
+#define MUL_FFT_MODF_THRESHOLD 1103 /* k = 5 */
#define MUL_FFT_TABLE3 \
{ { 904, 6}, { 15, 5}, { 32, 6}, { 17, 5}, \
- { 35, 6}, { 19, 5}, { 39, 6}, { 28, 7}, \
+ { 35, 6}, { 19, 5}, { 39, 6}, { 29, 7}, \
{ 15, 6}, { 33, 7}, { 17, 6}, { 35, 7}, \
{ 19, 6}, { 41, 7}, { 21, 6}, { 43, 7}, \
- { 23, 6}, { 47, 7}, { 27, 6}, { 55, 8}, \
- { 15, 7}, { 31, 6}, { 63, 7}, { 35, 8}, \
- { 19, 7}, { 43, 8}, { 23, 7}, { 51, 8}, \
- { 27, 7}, { 55, 8}, { 31, 7}, { 63, 8}, \
- { 43, 9}, { 23, 8}, { 55, 9}, { 31, 8}, \
- { 71, 9}, { 39, 8}, { 79, 9}, { 47, 8}, \
- { 95, 9}, { 55,10}, { 31, 9}, { 63, 8}, \
- { 127, 9}, { 79,10}, { 47, 9}, { 103,11}, \
- { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \
- { 159,10}, { 95,11}, { 63,10}, { 127, 9}, \
- { 263,10}, { 143, 9}, { 287,10}, { 159,11}, \
- { 95,10}, { 207,12}, { 63,11}, { 127,10}, \
- { 271,11}, { 159,10}, { 319,11}, { 191,10}, \
- { 383,11}, { 223,12}, { 127,11}, { 287,10}, \
- { 607,11}, { 319,12}, { 191,11}, { 383,10}, \
- { 767,13}, { 127,12}, { 255,11}, { 511,10}, \
- { 1055,11}, { 543,10}, { 1119, 9}, { 2239,11}, \
- { 607,12}, { 319,11}, { 671,10}, { 1407,11}, \
- { 735,10}, { 1471, 9}, { 2943,12}, { 383,11}, \
- { 799,10}, { 1663,11}, { 863,10}, { 1727,12}, \
- { 447,13}, { 255,12}, { 511,11}, { 1055,10}, \
- { 2111,11}, { 1119,10}, { 2239, 9}, { 4479,12}, \
- { 575,11}, { 1247,10}, { 2495, 9}, { 4991,12}, \
- { 639,11}, { 1471,10}, { 2943,13}, { 383,12}, \
- { 767,11}, { 1599,12}, { 831,11}, { 1727,10}, \
- { 3455,14}, { 255,13}, { 511,12}, { 1023,11}, \
- { 2111,12}, { 1087,11}, { 2239,10}, { 4479,12}, \
- { 1215,11}, { 2495,10}, { 4991,13}, { 639,12}, \
- { 1471,11}, { 2943,10}, { 5887,11}, { 3007,13}, \
- { 767,12}, { 1727,11}, { 3455,13}, { 895,11}, \
- { 3839,12}, { 4096,13}, { 8192,14}, { 16384,15}, \
+ { 23, 6}, { 47, 7}, { 27, 6}, { 55, 7}, \
+ { 31, 6}, { 63, 7}, { 43, 8}, { 23, 7}, \
+ { 51, 8}, { 27, 7}, { 55, 8}, { 31, 7}, \
+ { 63, 8}, { 39, 7}, { 79, 8}, { 43, 9}, \
+ { 23, 8}, { 55, 9}, { 31, 8}, { 71, 9}, \
+ { 39, 8}, { 79, 9}, { 47, 8}, { 95, 9}, \
+ { 55,10}, { 31, 9}, { 63, 8}, { 127, 9}, \
+ { 79,10}, { 47, 9}, { 111,11}, { 31,10}, \
+ { 63, 9}, { 143,10}, { 79, 9}, { 167,10}, \
+ { 95, 9}, { 191,10}, { 111,11}, { 63,10}, \
+ { 127, 9}, { 255,10}, { 159, 9}, { 319,10}, \
+ { 175,11}, { 95,10}, { 207,12}, { 63,11}, \
+ { 127,10}, { 287,11}, { 159,10}, { 319, 9}, \
+ { 639,10}, { 351,11}, { 191,10}, { 383,11}, \
+ { 223,12}, { 4096,13}, { 8192,14}, { 16384,15}, \
{ 32768,16} }
-#define MUL_FFT_TABLE3_SIZE 141
-#define MUL_FFT_THRESHOLD 7552
+#define MUL_FFT_TABLE3_SIZE 77
+#define MUL_FFT_THRESHOLD 7808
-#define SQR_FFT_MODF_THRESHOLD 793 /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD 824 /* k = 5 */
#define SQR_FFT_TABLE3 \
{ { 793, 5}, { 28, 6}, { 15, 5}, { 33, 6}, \
- { 17, 5}, { 35, 6}, { 19, 5}, { 39, 6}, \
- { 29, 7}, { 15, 6}, { 33, 7}, { 17, 6}, \
- { 35, 7}, { 19, 6}, { 41, 7}, { 23, 6}, \
- { 47, 7}, { 27, 6}, { 55, 7}, { 31, 6}, \
- { 63, 7}, { 37, 8}, { 19, 7}, { 43, 8}, \
- { 23, 7}, { 49, 8}, { 31, 7}, { 63, 8}, \
- { 39, 7}, { 79, 8}, { 43, 9}, { 23, 8}, \
- { 55, 9}, { 31, 8}, { 71, 9}, { 39, 8}, \
- { 79, 9}, { 47, 8}, { 95, 9}, { 55,10}, \
- { 31, 9}, { 79,10}, { 47, 9}, { 103,11}, \
- { 31,10}, { 63, 9}, { 135,10}, { 79, 9}, \
- { 159,10}, { 95, 9}, { 191,11}, { 63,10}, \
- { 159,11}, { 95,10}, { 191,12}, { 63,11}, \
- { 127,10}, { 255, 9}, { 511,10}, { 271,11}, \
- { 159,10}, { 335,11}, { 191,10}, { 383, 9}, \
- { 767,10}, { 399, 9}, { 799,11}, { 223,12}, \
- { 127,11}, { 255,10}, { 527, 9}, { 1055,10}, \
- { 543,11}, { 287,10}, { 607, 9}, { 1215,11}, \
- { 319,12}, { 191,11}, { 383,10}, { 799,13}, \
- { 127,12}, { 255,11}, { 511,10}, { 1055,11}, \
- { 543,10}, { 1119, 9}, { 2239,11}, { 607,10}, \
- { 1215,12}, { 319,11}, { 671,10}, { 1407,11}, \
- { 735,10}, { 1471, 9}, { 2943,10}, { 1503,12}, \
- { 383,11}, { 799,10}, { 1599,11}, { 863,10}, \
- { 1727,12}, { 447,11}, { 991,13}, { 255,12}, \
- { 511,11}, { 1055,10}, { 2111,11}, { 1119,10}, \
- { 2239,12}, { 575,11}, { 1247,10}, { 2495,12}, \
- { 639,11}, { 1471,10}, { 2943,13}, { 383,12}, \
- { 767,11}, { 1599,12}, { 831,11}, { 1727,10}, \
- { 3455,12}, { 959,11}, { 1919,14}, { 255,13}, \
- { 511,12}, { 1023,11}, { 2111,12}, { 1087,11}, \
- { 2239,10}, { 4479,12}, { 1215,11}, { 2495,13}, \
- { 639,12}, { 1471,11}, { 2943,10}, { 5887,13}, \
- { 767,12}, { 1727,11}, { 3455,13}, { 895,12}, \
- { 1791,11}, { 3711,12}, { 1919,11}, { 3839,12}, \
+ { 17, 5}, { 35, 6}, { 28, 7}, { 15, 6}, \
+ { 33, 7}, { 17, 6}, { 35, 7}, { 19, 6}, \
+ { 41, 7}, { 23, 6}, { 47, 7}, { 27, 6}, \
+ { 55, 8}, { 15, 7}, { 31, 6}, { 63, 7}, \
+ { 37, 8}, { 19, 7}, { 43, 8}, { 23, 7}, \
+ { 51, 8}, { 31, 7}, { 63, 8}, { 39, 7}, \
+ { 79, 8}, { 43, 9}, { 23, 8}, { 55, 9}, \
+ { 31, 8}, { 71, 9}, { 39, 8}, { 79, 9}, \
+ { 47, 8}, { 95, 9}, { 55,10}, { 31, 9}, \
+ { 79,10}, { 47, 9}, { 95,11}, { 31,10}, \
+ { 63, 9}, { 135,10}, { 79, 9}, { 159,10}, \
+ { 95, 9}, { 191,10}, { 111,11}, { 63,10}, \
+ { 127, 9}, { 255,10}, { 159,11}, { 95,10}, \
+ { 191,12}, { 63,11}, { 127,10}, { 255, 9}, \
+ { 511,10}, { 271,11}, { 159,10}, { 319, 9}, \
+ { 639,11}, { 191,10}, { 399, 9}, { 799,12}, \
{ 4096,13}, { 8192,14}, { 16384,15}, { 32768,16} }
-#define SQR_FFT_TABLE3_SIZE 148
-#define SQR_FFT_THRESHOLD 5760
+#define SQR_FFT_TABLE3_SIZE 72
+#define SQR_FFT_THRESHOLD 7296
-#define MULLO_BASECASE_THRESHOLD 12
-#define MULLO_DC_THRESHOLD 51
-#define MULLO_MUL_N_THRESHOLD 13463
+#define MULLO_BASECASE_THRESHOLD 13
+#define MULLO_DC_THRESHOLD 48
+#define MULLO_MUL_N_THRESHOLD 14709
-#define DC_DIV_QR_THRESHOLD 28
-#define DC_DIVAPPR_Q_THRESHOLD 61
-#define DC_BDIV_QR_THRESHOLD 55
-#define DC_BDIV_Q_THRESHOLD 82
+#define DC_DIV_QR_THRESHOLD 38
+#define DC_DIVAPPR_Q_THRESHOLD 77
+#define DC_BDIV_QR_THRESHOLD 54
+#define DC_BDIV_Q_THRESHOLD 97
-#define INV_MULMOD_BNM1_THRESHOLD 60
-#define INV_NEWTON_THRESHOLD 94
-#define INV_APPR_THRESHOLD 78
+#define INV_MULMOD_BNM1_THRESHOLD 57
+#define INV_NEWTON_THRESHOLD 202
+#define INV_APPR_THRESHOLD 116
#define BINV_NEWTON_THRESHOLD 327
-#define REDC_1_TO_REDC_N_THRESHOLD 63
+#define REDC_1_TO_REDC_N_THRESHOLD 34
#define MU_DIV_QR_THRESHOLD 2350
-#define MU_DIVAPPR_Q_THRESHOLD 2089
-#define MUPI_DIV_QR_THRESHOLD 7
-#define MU_BDIV_QR_THRESHOLD 2089
-#define MU_BDIV_Q_THRESHOLD 2089
-
-#define MATRIX22_STRASSEN_THRESHOLD 34
-#define HGCD_THRESHOLD 74
-#define GCD_DC_THRESHOLD 321
-#define GCDEXT_DC_THRESHOLD 209
-#define JACOBI_BASE_METHOD 1
-
-#define GET_STR_DC_THRESHOLD 12
-#define GET_STR_PRECOMPUTE_THRESHOLD 28
-#define SET_STR_DC_THRESHOLD 123
-#define SET_STR_PRECOMPUTE_THRESHOLD 1265
+#define MU_DIVAPPR_Q_THRESHOLD 2172
+#define MUPI_DIV_QR_THRESHOLD 66
+#define MU_BDIV_QR_THRESHOLD 1787
+#define MU_BDIV_Q_THRESHOLD 2350
+
+#define POWM_SEC_TABLE 2,35,164,1068,2500
+
+#define MATRIX22_STRASSEN_THRESHOLD 30
+#define HGCD_THRESHOLD 85
+#define HGCD_APPR_THRESHOLD 95
+#define HGCD_REDUCE_THRESHOLD 5010
+#define GCD_DC_THRESHOLD 393
+#define GCDEXT_DC_THRESHOLD 253
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 11
+#define GET_STR_PRECOMPUTE_THRESHOLD 24
+#define SET_STR_DC_THRESHOLD 119
+#define SET_STR_PRECOMPUTE_THRESHOLD 1084
+
+#define FAC_DSC_THRESHOLD 342
+#define FAC_ODD_THRESHOLD 27
--- /dev/null
+dnl x86-32 mpn_mod_1_1p for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F).
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2009, 2010 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C TODO:
+C * Optimize. The present code was written quite straightforwardly.
+C * Optimize post-loop reduction code; it is from mod_1s_4p, thus overkill.
+C * Write a cps function that uses sse2 insns.
+
+C cycles/limb
+C P6 model 0-8,10-12 -
+C P6 model 9 (Banias) ?
+C P6 model 13 (Dothan) ?
+C P4 model 0-1 (Willamette) ?
+C P4 model 2 (Northwood) 16
+C P4 model 3-4 (Prescott) 18
+
+C INPUT PARAMETERS
+C ap sp + 4
+C n sp + 8
+C b sp + 12
+C cps sp + 16
+
+define(`B1modb', `%mm1')
+define(`B2modb', `%mm2')
+define(`ap', `%edx')
+define(`n', `%eax')
+
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_mod_1_1p)
+ push %ebx
+ mov 8(%esp), ap
+ mov 12(%esp), n
+ mov 20(%esp), %ecx
+ movd 8(%ecx), B1modb
+ movd 12(%ecx), B2modb
+
+ lea -4(ap,n,4), ap
+
+C FIXME: See comment in generic/mod_1_1.c.
+ movd (ap), %mm7
+ movd -4(ap), %mm4
+ pmuludq B1modb, %mm7
+ paddq %mm4, %mm7
+ add $-2, n
+ jz L(end)
+
+ ALIGN(8)
+L(top): movq %mm7, %mm6
+ psrlq $32, %mm7 C rh
+ movd -8(ap), %mm0
+ add $-4, ap
+ pmuludq B2modb, %mm7
+ pmuludq B1modb, %mm6
+ add $-1, n
+ paddq %mm0, %mm7
+ paddq %mm6, %mm7
+ jnz L(top)
+
+L(end): pcmpeqd %mm4, %mm4
+ psrlq $32, %mm4 C 0x00000000FFFFFFFF
+ pand %mm7, %mm4 C rl
+ psrlq $32, %mm7 C rh
+ pmuludq B1modb, %mm7 C rh,cl
+ paddq %mm4, %mm7 C rh,rl
+ movd 4(%ecx), %mm4 C cnt
+ psllq %mm4, %mm7 C rh,rl normalized
+ movq %mm7, %mm2 C rl in low half
+ psrlq $32, %mm7 C rh
+ movd (%ecx), %mm1 C bi
+ pmuludq %mm7, %mm1 C qh,ql
+ paddq %mm2, %mm1 C qh-1,ql
+ movd %mm1, %ecx C ql
+ psrlq $32, %mm1 C qh-1
+ movd 16(%esp), %mm3 C b
+ pmuludq %mm1, %mm3 C (qh-1) * b
+ psubq %mm3, %mm2 C r in low half (could use psubd)
+ movd %mm2, %eax C r
+ mov 16(%esp), %ebx
+ sub %ebx, %eax C r
+ cmp %eax, %ecx
+ lea (%eax,%ebx), %edx
+ cmovc( %edx, %eax)
+ movd %mm4, %ecx C cnt
+ cmp %ebx, %eax
+ jae L(fix)
+ emms
+ pop %ebx
+ shr %cl, %eax
+ ret
+
+L(fix): sub %ebx, %eax
+ emms
+ pop %ebx
+ shr %cl, %eax
+ ret
+EPILOGUE()
+
+PROLOGUE(mpn_mod_1_1p_cps)
+C CAUTION: This is the same code as in k7/mod_1_1.asm
+ push %ebp
+ mov 12(%esp), %ebp
+ push %esi
+ bsr %ebp, %ecx
+ push %ebx
+ xor $31, %ecx
+ mov 16(%esp), %esi
+ sal %cl, %ebp
+ mov %ebp, %edx
+ not %edx
+ mov $-1, %eax
+ div %ebp
+ mov %eax, (%esi) C store bi
+ mov %ecx, 4(%esi) C store cnt
+ xor %ebx, %ebx
+ sub %ebp, %ebx
+ mov $1, %edx
+ shld %cl, %eax, %edx
+ imul %edx, %ebx
+ mul %ebx
+ add %ebx, %edx
+ not %edx
+ imul %ebp, %edx
+ add %edx, %ebp
+ cmp %edx, %eax
+ cmovc( %ebp, %edx)
+ shr %cl, %ebx
+ mov %ebx, 8(%esi) C store B1modb
+ shr %cl, %edx
+ mov %edx, 12(%esi) C store B2modb
+ pop %ebx
+ pop %esi
+ pop %ebp
+ ret
+EPILOGUE()
-dnl mpn_mod_1_4 for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F).
+dnl x86-32 mpn_mod_1s_4p for Pentium 4 and P6 models with SSE2 (i.e. 9,D,E,F).
dnl Contributed to the GNU project by Torbjorn Granlund.
-dnl Copyright 2009 Free Software Foundation, Inc.
+dnl Copyright 2009, 2010 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
C TODO:
C * Optimize. The present code was written quite straightforwardly.
C * Optimize post-loop reduction code.
+C * Write a cps function that uses sse2 insns.
-C cycles/limb
-C P6 model 0-8,10-12) -
-C P6 model 9 (Banias) ?
-C P6 model 13 (Dothan) 3.4
-C P4 model 0-1 (Willamette): ?
-C P4 model 2 (Northwood): 4
-C P4 model 3-4 (Prescott): ?
+C cycles/limb
+C P6 model 0-8,10-12 -
+C P6 model 9 (Banias) ?
+C P6 model 13 (Dothan) 3.4
+C P4 model 0-1 (Willamette) ?
+C P4 model 2 (Northwood) 4
+C P4 model 3-4 (Prescott) 4.5
C INPUT PARAMETERS
C ap sp + 4
define(`B3modb', `%mm3')
define(`B4modb', `%mm4')
define(`B5modb', `%mm5')
-define(`ap', `%edx')
-define(`n', `%eax')
+define(`ap', `%edx')
+define(`n', `%eax')
+ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_mod_1s_4p)
jz L(x)
jmp L(top)
-L(b2): movd (ap), %mm7
- pmuludq B1modb, %mm7
- movd -4(ap), %mm6
- paddq %mm6, %mm7
+L(b2): movd -4(ap), %mm7 C rl
+ punpckldq (ap), %mm7 C rh
lea -20(ap), ap
add $-2, n
jz L(end)
add $-16, ap
add $-4, n
jnz L(top)
-L(end):
- pcmpeqd %mm4, %mm4
+L(end): pcmpeqd %mm4, %mm4
psrlq $32, %mm4 C 0x00000000FFFFFFFF
pand %mm7, %mm4 C rl
psrlq $32, %mm7 C rh
pmuludq B1modb, %mm7 C rh,cl
paddq %mm4, %mm7 C rh,rl
-
L(x): movd 4(%ecx), %mm4 C cnt
psllq %mm4, %mm7 C rh,rl normalized
movq %mm7, %mm2 C rl in low half
ret
EPILOGUE()
+ ALIGN(16)
PROLOGUE(mpn_mod_1s_4p_cps)
+C CAUTION: This is the same code as in k7/mod_1_4.asm
push %ebp
push %edi
push %esi
push %ebx
- sub $12, %esp
- mov 36(%esp), %ebx
+ mov 20(%esp), %ebp C FIXME: avoid bp for 0-idx
+ mov 24(%esp), %ebx
bsr %ebx, %ecx
xor $31, %ecx
- mov %ecx, 4(%esp)
- sal %cl, %ebx
+ sal %cl, %ebx C b << cnt
mov %ebx, %edx
not %edx
mov $-1, %eax
div %ebx
- mov %eax, %esi
- mov $1, %ebp
- sal %cl, %ebp
- neg %ecx
- shr %cl, %eax
- or %eax, %ebp
- mov %ebx, %eax
- neg %eax
- imul %ebp, %eax
- mov %esi, %ecx
- mov %eax, 8(%esp)
- mul %ecx
- mov %edx, %esi
- not %esi
- sub 8(%esp), %esi
- imul %ebx, %esi
- lea (%esi,%ebx), %edx
- cmp %esi, %eax
- cmovb( %edx, %esi)
- mov %esi, %eax
- mul %ecx
- lea (%esi,%edx), %edi
- not %edi
- imul %ebx, %edi
- lea (%edi,%ebx), %edx
- cmp %edi, %eax
- cmovb( %edx, %edi)
+ xor %edi, %edi
+ sub %ebx, %edi
+ mov $1, %esi
+ mov %eax, (%ebp) C store bi
+ mov %ecx, 4(%ebp) C store cnt
+ shld %cl, %eax, %esi
+ imul %edi, %esi
+ mov %eax, %edi
+ mul %esi
+
+ add %esi, %edx
+ shr %cl, %esi
+ mov %esi, 8(%ebp) C store B1modb
+
+ not %edx
+ imul %ebx, %edx
+ lea (%edx,%ebx), %esi
+ cmp %edx, %eax
+ cmovnc( %edx, %esi)
+ mov %edi, %eax
+ mul %esi
+
+ add %esi, %edx
+ shr %cl, %esi
+ mov %esi, 12(%ebp) C store B2modb
+
+ not %edx
+ imul %ebx, %edx
+ lea (%edx,%ebx), %esi
+ cmp %edx, %eax
+ cmovnc( %edx, %esi)
+ mov %edi, %eax
+ mul %esi
+
+ add %esi, %edx
+ shr %cl, %esi
+ mov %esi, 16(%ebp) C store B3modb
+
+ not %edx
+ imul %ebx, %edx
+ lea (%edx,%ebx), %esi
+ cmp %edx, %eax
+ cmovnc( %edx, %esi)
mov %edi, %eax
- mul %ecx
- lea (%edi,%edx), %ebp
- not %ebp
- imul %ebx, %ebp
- lea (%ebp,%ebx), %edx
- cmp %ebp, %eax
- cmovb( %edx, %ebp)
- mov %ebp, %eax
- mul %ecx
- add %ebp, %edx
+ mul %esi
+
+ add %esi, %edx
+ shr %cl, %esi
+ mov %esi, 20(%ebp) C store B4modb
+
not %edx
imul %ebx, %edx
add %edx, %ebx
cmp %edx, %eax
- cmovb( %ebx, %edx)
- mov 32(%esp), %eax
- mov %ecx, (%eax)
- mov 4(%esp), %ecx
- mov %ecx, 4(%eax)
- mov 8(%esp), %ebx
+ cmovnc( %edx, %ebx)
+
shr %cl, %ebx
- mov %ebx, 8(%eax)
- shr %cl, %esi
- mov %esi, 12(%eax)
- shr %cl, %edi
- mov %edi, 16(%eax)
- shr %cl, %ebp
- mov %ebp, 20(%eax)
- shr %cl, %edx
- mov %edx, 24(%eax)
- add $12, %esp
+ mov %ebx, 24(%ebp) C store B5modb
+
pop %ebx
pop %esi
pop %edi
C The dependent chain here is as follows.
C
-C latency
-C psubq s = (src-cbit) - climb 2
-C pmuludq q = s*inverse 8
-C pmuludq prod = q*divisor 8
-C psrlq climb = high(prod) 2
-C --
-C 20
+C latency
+C psubq s = (src-cbit) - climb 2
+C pmuludq q = s*inverse 8
+C pmuludq prod = q*divisor 8
+C psrlq climb = high(prod) 2
+C --
+C 20
C
C Yet the loop measures 19.0 c/l, so obviously there's something gained
C there over a straight reading of the chip documentation.
dnl mpn_mul_1 for Pentium 4 and P6 models with SSE2 (i.e., 9,D,E,F).
-dnl Copyright 2005, 2007 Free Software Foundation, Inc.
+dnl Copyright 2005, 2007, 2011 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
include(`../config.m4')
+C cycles/limb
+C P6 model 0-8,10-12 -
+C P6 model 9 (Banias) 4.17
+C P6 model 13 (Dothan) 4.17
+C P4 model 0-1 (Willamette) 4
+C P4 model 2 (Northwood) 4
+C P4 model 3-4 (Prescott) 4.55
+
C TODO:
C * Tweak eax/edx offsets in loop as to save some lea's
C * Perhaps software pipeline small-case code
-C cycles/limb
-C P6 model 0-8,10-12) -
-C P6 model 9 (Banias) ?
-C P6 model 13 (Dothan) 4.17
-C P4 model 0-1 (Willamette): 4
-C P4 model 2 (Northwood): 4
-C P4 model 3-4 (Prescott): 4.55
-
C INPUT PARAMETERS
C rp sp + 4
C up sp + 8
TEXT
ALIGN(16)
-PROLOGUE(mpn_mul_1c)
- mov 4(%esp), %edx
- mov 8(%esp), %eax
- mov 12(%esp), %ecx
- movd 16(%esp), %mm7
- movd 20(%esp), %mm6
- jmp L(ent)
-EPILOGUE()
- ALIGN(16)
PROLOGUE(mpn_mul_1)
- mov 4(%esp), %edx
+ pxor %mm6, %mm6
+L(ent): mov 4(%esp), %edx
mov 8(%esp), %eax
mov 12(%esp), %ecx
movd 16(%esp), %mm7
- pxor %mm6, %mm6
-L(ent): cmp $4, %ecx
+ cmp $4, %ecx
jnc L(big)
L(lp0): movd (%eax), %mm0
emms
ret
EPILOGUE()
+PROLOGUE(mpn_mul_1c)
+ movd 20(%esp), %mm6
+ jmp L(ent)
+EPILOGUE()
dnl X86-32 and X86-64 mpn_popcount using SSE2.
-dnl Copyright 2006, 2007 Free Software Foundation, Inc.
+dnl Copyright 2006, 2007, 2011 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
include(`../config.m4')
-C 32-bit popcount hamdist
-C cycles/limb cycles/limb
-C P5: -
-C P6 model 0-8,10-12) -
-C P6 model 9 (Banias) ?
-C P6 model 13 (Dothan) 4
-C P4 model 0 (Willamette) ?
-C P4 model 1 (?) ?
-C P4 model 2 (Northwood) 3.9
-C P4 model 3 (Prescott) ?
-C P4 model 4 (Nocona) ?
-C K6: -
-C K7: -
-C K8: ?
-
-C 64-bit popcount hamdist
-C cycles/limb cycles/limb
-C P4 model 4 (Nocona): 8
-C K8: 7.5
-C K10: 3.5
-C P6 core2: 3.68
-C P6 corei7: 3.15
+C 32-bit popcount hamdist
+C cycles/limb cycles/limb
+C P5 -
+C P6 model 0-8,10-12 -
+C P6 model 9 (Banias) ?
+C P6 model 13 (Dothan) 4
+C P4 model 0 (Willamette) ?
+C P4 model 1 (?) ?
+C P4 model 2 (Northwood) 3.9
+C P4 model 3 (Prescott) ?
+C P4 model 4 (Nocona) ?
+C AMD K6 -
+C AMD K7 -
+C AMD K8 ?
+
+C 64-bit popcount hamdist
+C cycles/limb cycles/limb
+C P4 model 4 (Nocona): 8
+C AMD K8,K9 7.5
+C AMD K10 3.5
+C Intel core2 3.68
+C Intel corei 3.15
+C Intel atom 10.8
+C VIA nano 6.5
C TODO
C * Make a mpn_hamdist based on this. Alignment could either be handled by
include(`../config.m4')
-C P4 Willamette, Northwood: 4.0 cycles/limb if dst!=src1 and dst!=src2
-C 6.0 cycles/limb if dst==src1 or dst==src2
-C P4 Prescott: >= 5 cycles/limb
-
-
-C mp_limb_t mpn_sub_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C mp_size_t size);
-C mp_limb_t mpn_sub_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
-C mp_size_t size, mp_limb_t carry);
-C
-C The main loop code is 2x unrolled so that the carry bit can alternate
-C between mm0 and mm1.
+C cycles/limb
+C dst!=src1,2 dst==src1 dst==src2
+C P6 model 0-8,10-12 -
+C P6 model 9 (Banias) ?
+C P6 model 13 (Dothan) ?
+C P4 model 0-1 (Willamette) ?
+C P4 model 2 (Northwood) 4 6 6
+C P4 model 3-4 (Prescott) 4.25 7.5 7.5
defframe(PARAM_CARRY,20)
defframe(PARAM_SIZE, 16)
PROLOGUE(mpn_sub_nc)
deflit(`FRAME',0)
-
movd PARAM_CARRY, %mm0
jmp L(start_nc)
-
EPILOGUE()
ALIGN(8)
deflit(`FRAME',0)
pxor %mm0, %mm0
L(start_nc):
- movl PARAM_SRC1, %eax
- movl %ebx, SAVE_EBX
- movl PARAM_SRC2, %ebx
- movl PARAM_DST, %edx
- movl PARAM_SIZE, %ecx
+ mov PARAM_SRC1, %eax
+ mov %ebx, SAVE_EBX
+ mov PARAM_SRC2, %ebx
+ mov PARAM_DST, %edx
+ mov PARAM_SIZE, %ecx
- leal (%eax,%ecx,4), %eax C src1 end
- leal (%ebx,%ecx,4), %ebx C src2 end
- leal (%edx,%ecx,4), %edx C dst end
- negl %ecx C -size
+ lea (%eax,%ecx,4), %eax C src1 end
+ lea (%ebx,%ecx,4), %ebx C src2 end
+ lea (%edx,%ecx,4), %edx C dst end
+ neg %ecx C -size
L(top):
C eax src1 end
psrlq $63, %mm1
- addl $1, %ecx
+ add $1, %ecx
jz L(done_mm1)
movd (%eax,%ecx,4), %mm0
psrlq $63, %mm0
- addl $1, %ecx
+ add $1, %ecx
jnz L(top)
-
movd %mm0, %eax
- movl SAVE_EBX, %ebx
+ mov SAVE_EBX, %ebx
emms
ret
L(done_mm1):
movd %mm1, %eax
- movl SAVE_EBX, %ebx
+ mov SAVE_EBX, %ebx
emms
ret
dnl Intel Pentium-4 mpn_submul_1 -- Multiply a limb vector with a limb and
dnl subtract the result from a second limb vector.
-dnl Copyright 2001, 2002 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002, 2008, 2010 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
include(`../config.m4')
-C P4: 7 cycles/limb, unstable timing, at least on early Pentium4 silicon
-C (stepping 10).
+C cycles/limb
+C P6 model 0-8,10-12 -
+C P6 model 9 (Banias) 6.8
+C P6 model 13 (Dothan) 6.9
+C P4 model 0-1 (Willamette) ?
+C P4 model 2 (Northwood) 5.87
+C P4 model 3-4 (Prescott) 6.5
-
-C mp_limb_t mpn_submul_1 (mp_ptr dst, mp_srcptr src, mp_size_t size,
-C mp_limb_t mult);
-C mp_limb_t mpn_submul_1c (mp_ptr dst, mp_srcptr src, mp_size_t size,
-C mp_limb_t mult, mp_limb_t carry);
-C
-C This code is not particularly good at 7 c/l. The dependent chain is only
-C 4 c/l and there's only 4 MMX unit instructions, so it's not clear why that
-C speed isn't achieved.
+C This code represents a step forwards compared to the code available before
+C GMP 5.1, but it is not carefully tuned for either P6 or P4. In fact, it is
+C not good for P6. For P4 it saved a bit over 1 c/l for both Northwood and
+C Prescott compared to the old code.
C
C The arrangements made here to get a two instruction dependent chain are
-C slightly subtle. In the loop the carry (or borrow rather) is a negative
-C so that a paddq can be used to give a low limb ready to store, and a high
-C limb ready to become the new carry after a psrlq.
+C slightly subtle. In the loop the carry (or borrow rather) is a negative so
+C that a paddq can be used to give a low limb ready to store, and a high limb
+C ready to become the new carry after a psrlq.
C
-C If the carry was a simple twos complement negative then the psrlq shift
-C would need to bring in 0 bits or 1 bits according to whether the high was
-C zero or non-zero, since a non-zero value would represent a negative
-C needing sign extension. That wouldn't be particularly easy to arrange and
-C certainly would add an instruction to the dependent chain, so instead an
-C offset is applied so that the high limb will be 0xFFFFFFFF+c. With c in
-C the range -0xFFFFFFFF to 0, the value 0xFFFFFFFF+c is in the range 0 to
-C 0xFFFFFFFF and is therefore always positive and can always have 0 bits
-C shifted in, which is what psrlq does.
+C If the carry was a simple twos complement negative then the psrlq shift would
+C need to bring in 0 bits or 1 bits according to whether the high was zero or
+C non-zero, since a non-zero value would represent a negative needing sign
+C extension. That wouldn't be particularly easy to arrange and certainly would
+C add an instruction to the dependent chain, so instead an offset is applied so
+C that the high limb will be 0xFFFFFFFF+c. With c in the range -0xFFFFFFFF to
+C 0, the value 0xFFFFFFFF+c is in the range 0 to 0xFFFFFFFF and is therefore
+C always positive and can always have 0 bits shifted in, which is what psrlq
+C does.
C
C The extra 0xFFFFFFFF must be subtracted before c is used, but that can be
C done off the dependent chain. The total adjustment then is to add
-C 0xFFFFFFFF00000000 to offset the new carry, and subtract
-C 0x00000000FFFFFFFF to remove the offset from the current carry, for a net
-C add of 0xFFFFFFFE00000001. In the code this is applied to the destination
-C limb when fetched.
+C 0xFFFFFFFF00000000 to offset the new carry, and subtract 0x00000000FFFFFFFF
+C to remove the offset from the current carry, for a net add of
+C 0xFFFFFFFE00000001. In the code this is applied to the destination limb when
+C fetched.
C
C It's also possible to view the 0xFFFFFFFF adjustment as a ones-complement
C negative, which is how it's undone for the return value, but that doesn't
pxor %mm1, %mm1 C initial borrow
L(start_1c):
- movl PARAM_SRC, %eax
+ mov PARAM_SRC, %eax
pcmpeqd %mm0, %mm0
movd PARAM_MULTIPLIER, %mm7
pcmpeqd %mm6, %mm6
- movl PARAM_DST, %edx
+ mov PARAM_DST, %edx
psrlq $32, %mm0 C 0x00000000FFFFFFFF
- movl PARAM_SIZE, %ecx
+ mov PARAM_SIZE, %ecx
psllq $32, %mm6 C 0xFFFFFFFF00000000
psubq %mm0, %mm6 C 0xFFFFFFFE00000001
psubq %mm1, %mm0 C 0xFFFFFFFF - borrow
- C eax src, incrementing
- C ebx
- C ecx loop counter, decrementing
- C edx dst, incrementing
- C
- C mm0 0xFFFFFFFF - borrow
- C mm6 0xFFFFFFFE00000001
- C mm7 multiplier
-
-L(loop):
- movd (%eax), %mm1 C src
- leal 4(%eax), %eax
- movd (%edx), %mm2 C dst
- paddq %mm6, %mm2 C add 0xFFFFFFFE00000001
+ movd (%eax), %mm3 C up
+ movd (%edx), %mm4 C rp
+
+ add $-1, %ecx
+ paddq %mm6, %mm4 C add 0xFFFFFFFE00000001
+ pmuludq %mm7, %mm3
+ jnz L(gt1)
+ psubq %mm3, %mm4 C prod
+ paddq %mm4, %mm0 C borrow
+ movd %mm0, (%edx) C result
+ jmp L(rt)
+
+L(gt1): movd 4(%eax), %mm1 C up
+ movd 4(%edx), %mm2 C rp
+
+ add $-1, %ecx
+ jz L(eev)
+
+ ALIGN(16)
+L(top): paddq %mm6, %mm2 C add 0xFFFFFFFE00000001
pmuludq %mm7, %mm1
+ psubq %mm3, %mm4 C prod
+ movd 8(%eax), %mm3 C up
+ paddq %mm4, %mm0 C borrow
+ movd 8(%edx), %mm4 C rp
+ movd %mm0, (%edx) C result
+ psrlq $32, %mm0
+
+ add $-1, %ecx
+ jz L(eod)
+
+ paddq %mm6, %mm4 C add 0xFFFFFFFE00000001
+ pmuludq %mm7, %mm3
psubq %mm1, %mm2 C prod
+ movd 12(%eax), %mm1 C up
paddq %mm2, %mm0 C borrow
- subl $1, %ecx
- movd %mm0, (%edx) C result
+ movd 12(%edx), %mm2 C rp
+ movd %mm0, 4(%edx) C result
psrlq $32, %mm0
- leal 4(%edx), %edx
- jnz L(loop)
+ lea 8(%eax), %eax
+ lea 8(%edx), %edx
+ add $-1, %ecx
+ jnz L(top)
+
+
+L(eev): paddq %mm6, %mm2 C add 0xFFFFFFFE00000001
+ pmuludq %mm7, %mm1
+ psubq %mm3, %mm4 C prod
+ paddq %mm4, %mm0 C borrow
+ movd %mm0, (%edx) C result
+ psrlq $32, %mm0
+ psubq %mm1, %mm2 C prod
+ paddq %mm2, %mm0 C borrow
+ movd %mm0, 4(%edx) C result
+L(rt): psrlq $32, %mm0
movd %mm0, %eax
- notl %eax
+ not %eax
emms
ret
+L(eod): paddq %mm6, %mm4 C add 0xFFFFFFFE00000001
+ pmuludq %mm7, %mm3
+ psubq %mm1, %mm2 C prod
+ paddq %mm2, %mm0 C borrow
+ movd %mm0, 4(%edx) C result
+ psrlq $32, %mm0
+ psubq %mm3, %mm4 C prod
+ paddq %mm4, %mm0 C borrow
+ movd %mm0, 8(%edx) C result
+ jmp L(rt)
EPILOGUE()
C cycles/limb
-C P54: 7.5
-C P55: 7.0
-C P6: 2.5
-C K6: 4.5
-C K7: 5.0
-C P4: 16.5
+C P54 7.5
+C P55 7.0
+C P6 2.5
+C K6 4.5
+C K7 5.0
+C P4 16.5
C mp_limb_t mpn_rshift (mp_ptr dst, mp_srcptr src, mp_size_t size,
C cycles/crossproduct cycles/triangleproduct
-C P5:
-C P6:
-C K6:
-C K7:
-C P4:
+C P5
+C P6
+C K6
+C K7
+C P4
C void mpn_sqr_basecase (mp_ptr dst, mp_srcptr src, mp_size_t size);
--- /dev/null
+dnl x86 mpn_tabselect.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb
+C P5 ?
+C P6 model 0-8,10-12 ?
+C P6 model 9 (Banias) ?
+C P6 model 13 (Dothan) ?
+C P4 model 0 (Willamette) ?
+C P4 model 1 (?) ?
+C P4 model 2 (Northwood) 4.5
+C P4 model 3 (Prescott) ?
+C P4 model 4 (Nocona) ?
+C Intel Atom ?
+C AMD K6 ?
+C AMD K7 3.4
+C AMD K8 ?
+C AMD K10 ?
+
+C NOTES
+C * This has not been tuned for any specific processor. Its speed should not
+C be too bad, though.
+C * Using SSE2 could result in many-fold speedup.
+
+C mpn_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
+define(`rp', `%edi')
+define(`tp', `%esi')
+define(`n', `%ebx')
+define(`nents', `%ecx')
+define(`which', `36(%esp)')
+
+define(`i', `%ebp')
+define(`maskp', `20(%esp)')
+define(`maskn', `32(%esp)')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_tabselect)
+ push %edi
+ push %esi
+ push %ebx
+ push %ebp
+ mov 20(%esp), rp
+ mov 24(%esp), tp
+ mov 28(%esp), n
+ mov 32(%esp), nents
+
+ lea (rp,n,4), rp
+ lea (tp,n,4), tp
+ sub nents, which
+L(outer):
+ mov which, %eax
+ add nents, %eax
+ neg %eax C set CF iff 'which' != k
+ sbb %eax, %eax
+ mov %eax, maskn
+ not %eax
+ mov %eax, maskp
+
+ mov n, i
+ neg i
+
+ ALIGN(16)
+L(top): mov (tp,i,4), %eax
+ and maskp, %eax
+ mov (rp,i,4), %edx
+ and maskn, %edx
+ or %edx, %eax
+ mov %eax, (rp,i,4)
+ inc i
+ js L(top)
+
+L(end): mov n, %eax
+ lea (tp,%eax,4), tp
+ dec nents
+ jne L(outer)
+
+L(outer_end):
+ pop %ebp
+ pop %ebx
+ pop %esi
+ pop %edi
+ ret
+EPILOGUE()
dnl m4 macros for x86 assembler.
-dnl Copyright 1999, 2000, 2001, 2002, 2003, 2007 Free Software Foundation,
-dnl Inc.
+dnl Copyright 1999, 2000, 2001, 2002, 2003, 2007, 2010, 2012 Free Software
+dnl Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
define(CPUVEC_FUNCS_LIST,
``add_n',
+`addlsh1_n',
+`addlsh2_n',
`addmul_1',
+`addmul_2',
+`bdiv_dbm1c',
+`com',
`copyd',
`copyi',
`divexact_1',
-`divexact_by3c',
`divrem_1',
`gcd_1',
`lshift',
+`lshiftc',
`mod_1',
+`mod_1_1p',
+`mod_1_1p_cps',
+`mod_1s_2p',
+`mod_1s_2p_cps',
+`mod_1s_4p',
+`mod_1s_4p_cps',
`mod_34lsub1',
`modexact_1c_odd',
`mul_1',
`mul_basecase',
+`mullo_basecase',
`preinv_divrem_1',
`preinv_mod_1',
+`redc_1',
+`redc_2',
`rshift',
`sqr_basecase',
`sub_n',
+`sublsh1_n',
`submul_1'')
dnl Usage LEA(symbol,reg)
-define(`LEA',`
+define(`LEA',
+m4_assert_numargs(2)
+`ifdef(`PIC',`
define(`EPILOGUE_cpu',
`
L(movl_eip_`'substr($2,1)):
ret_internal
SIZE($'`1, .-$'`1)')
- call L(movl_eip_`'substr($2,1))
- addl $_GLOBAL_OFFSET_TABLE_, $2
- movl $1@GOT($2), $2
-')
-
+ call L(movl_eip_`'substr($2,1))
+ addl $_GLOBAL_OFFSET_TABLE_, $2
+ movl $1@GOT($2), $2
+',`
+ movl `$'$1, $2
+')')
define(`DEF_OBJECT',
m4_assert_numargs_range(1,2)
m4_assert_numargs(1)
` SIZE(`$1',.-`$1')')
+dnl Usage: CALL(funcname)
+dnl
+
+define(`CALL',
+m4_assert_numargs(1)
+`ifdef(`PIC',
+ `call GSYM_PREFIX`'$1@PLT',
+ `call GSYM_PREFIX`'$1')')
+
+ifdef(`PIC',
+`define(`PIC_WITH_EBX')',
+`undefine(`PIC_WITH_EBX')')
+
divert`'dnl
include(`../config.m4')
C cycles/limb
-C K8: 2.167
-C P4: 12.0
-C P6-15: 4.0
+C AMD K8,K9 2.167
+C AMD K10 2.167
+C Intel P4 12.0
+C Intel core2 4.0
+C Intel corei ?
+C Intel atom ?
+C VIA nano ?
C TODO
C * Perhaps handle various n mod 3 sizes better. The code now is too large.
mul %r8
add %rax, %r10
mov -16(bp,n,8), %rax
- mov $0, %r11d
+ mov $0, R32(%r11)
adc %rdx, %r11
mul %r9
add %rax, %r10
mul %r8
add %rax, %r11
mov -8(bp,n,8), %rax
- mov $0, %r12d
+ mov $0, R32(%r12)
adc %rdx, %r12
mul %r9
add %rax, %r11
add %rax, %r12
mov %r11, -8(rp,n,8)
mov (bp,n,8), %rax
- mov $0, %r10d
+ mov $0, R32(%r10)
adc %rdx, %r10
add $3, n
js L(top)
mul %r8
add %rax, %r10
mov -16(bp), %rax
- mov $0, %r11d
+ mov $0, R32(%r11)
adc %rdx, %r11
mul %r9
add %rax, %r10
mul %r8
add %rax, %r11
mov -8(bp), %rax
- mov $0, %r12d
+ mov $0, R32(%r12)
adc %rdx, %r12
mul %r9
add %rax, %r11
mul %r8
add %rax, %r10
mov -8(bp), %rax
- mov $0, %r11d
+ mov $0, R32(%r11)
adc %rdx, %r11
mul %r9
add %rax, %r10
dnl AMD64 mpn_addmul_2 -- Multiply an n-limb vector with a 2-limb vector and
dnl add the result to a third limb vector.
-dnl Copyright 2008 Free Software Foundation, Inc.
+dnl Copyright 2008, 2011, 2012 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
C cycles/limb
-C K8,K9: 2.375
-C K10: 2.375
-C P4: ?
-C P6 core2: 4.45
-C P6 corei7: 4.35
+C AMD K8,K9 2.375
+C AMD K10 2.375
+C Intel P4 15-16
+C Intel core2 4.45
+C Intel NHM 4.32
+C Intel SBR 3.4
+C Intel atom ?
+C VIA nano 4.4
C This code is the result of running a code generation and optimization tool
C suite written by David Harvey and Torbjorn Granlund.
C TODO
-C * Work on feed-in and wind-down code.
-C * Convert "mov $0" to "xor".
-C * Adjust initial lea to save some bytes.
-C * Perhaps adjust n from n_param&3 value?
+C * Tune feed-in and wind-down code.
C INPUT PARAMETERS
define(`rp', `%rdi')
define(`w3', `%r10')
define(`n', `%r11')
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_addmul_2)
+ FUNC_ENTRY(4)
+ mov n_param, n
push %rbx
push %rbp
- mov (vp), v0
+ mov 0(vp), v0
mov 8(vp), v1
- mov n_param, n
- neg n
- lea -32(up,n_param,8), up
- lea -32(rp,n_param,8), rp
-
- and $3, R32(n_param)
- jz L(am2p0)
- cmp $2, R32(n_param)
- jc L(am2p1)
- jz L(am2p2)
-L(am2p3):
- mov 32(up,n,8), %rax
+ mov R32(n_param), R32(%rbx)
+ mov (up), %rax
+ lea -8(up,n_param,8), up
+ lea -8(rp,n_param,8), rp
mul v0
- mov %rax, w1
- mov 32(up,n,8), %rax
+ neg n
+ and $3, R32(%rbx)
+ jz L(b0)
+ cmp $2, R32(%rbx)
+ jc L(b1)
+ jz L(b2)
+
+L(b3): mov %rax, w1
mov %rdx, w2
xor R32(w3), R32(w3)
- add $2, n
- jmp L(am3)
-L(am2p0):
- mov 32(up,n,8), %rax
- mul v0
- mov %rax, w0
- mov 32(up,n,8), %rax
- mov %rdx, w1
- xor R32(w2), R32(w2)
- add $3, n
- jmp L(am0)
-L(am2p1):
- mov 32(up,n,8), %rax
- mul v0
- mov %rax, w3
- mov 32(up,n,8), %rax
- mov %rdx, w0
- xor R32(w1), R32(w1)
- jmp L(am1)
-L(am2p2):
- mov 32(up,n,8), %rax
- mul v0
- mov %rax, w2
- mov 32(up,n,8), %rax
+ mov 8(up,n,8), %rax
+ dec n
+ jmp L(lo3)
+
+L(b2): mov %rax, w2
+ mov 8(up,n,8), %rax
mov %rdx, w3
xor R32(w0), R32(w0)
+ add $-2, n
+ jmp L(lo2)
+
+L(b1): mov %rax, w3
+ mov 8(up,n,8), %rax
+ mov %rdx, w0
xor R32(w1), R32(w1)
- add $1, n
- jmp L(am2)
+ inc n
+ jmp L(lo1)
- ALIGN(32)
-L(top):
- add w3, (rp,n,8) C 0 21
- adc %rax, w0 C 1 24
+L(b0): mov $0, R32(w3)
+ mov %rax, w0
mov 8(up,n,8), %rax
- adc %rdx, w1 C 3 26
+ mov %rdx, w1
+ xor R32(w2), R32(w2)
+ jmp L(lo0)
+
+ ALIGN(32)
+L(top): mov $0, R32(w1)
+ mul v0
+ add %rax, w3
+ mov (up,n,8), %rax
+ adc %rdx, w0
+ adc $0, R32(w1)
+L(lo1): mul v1
+ add w3, (rp,n,8)
+ mov $0, R32(w3)
+ adc %rax, w0
mov $0, R32(w2)
+ mov 8(up,n,8), %rax
+ adc %rdx, w1
mul v0
- add %rax, w0 C 2 26
+ add %rax, w0
mov 8(up,n,8), %rax
- adc %rdx, w1 C 4 28
- adc $0, R32(w2) C 6 30
-L(am0): mul v1
- add w0, 8(rp,n,8) C 3 27
- adc %rax, w1 C 6 30
- adc %rdx, w2 C 8 32
+ adc %rdx, w1
+ adc $0, R32(w2)
+L(lo0): mul v1
+ add w0, 8(rp,n,8)
+ adc %rax, w1
+ adc %rdx, w2
mov 16(up,n,8), %rax
- mov $0, R32(w3)
mul v0
- add %rax, w1 C 8
+ add %rax, w1
+ adc %rdx, w2
+ adc $0, R32(w3)
mov 16(up,n,8), %rax
- adc %rdx, w2 C 10
- adc $0, R32(w3) C 12
-L(am3): mul v1
- add w1, 16(rp,n,8) C 9
- adc %rax, w2 C 12
+L(lo3): mul v1
+ add w1, 16(rp,n,8)
+ adc %rax, w2
+ adc %rdx, w3
+ xor R32(w0), R32(w0)
mov 24(up,n,8), %rax
- adc %rdx, w3 C 14
mul v0
- mov $0, R32(w0)
- add %rax, w2 C 14
- adc %rdx, w3 C 16
- mov $0, R32(w1)
+ add %rax, w2
mov 24(up,n,8), %rax
- adc $0, R32(w0) C 18
-L(am2): mul v1
- add w2, 24(rp,n,8) C 15
- adc %rax, w3 C 18
- adc %rdx, w0 C 20
- mov 32(up,n,8), %rax
- mul v0
- add %rax, w3 C 20
+ adc %rdx, w3
+ adc $0, R32(w0)
+L(lo2): mul v1
+ add w2, 24(rp,n,8)
+ adc %rax, w3
+ adc %rdx, w0
mov 32(up,n,8), %rax
- adc %rdx, w0 C 22
- adc $0, R32(w1) C 24
-L(am1): mul v1
add $4, n
js L(top)
- add w3, (rp,n,8)
+L(end): xor R32(w1), R32(w1)
+ mul v0
+ add %rax, w3
+ mov (up), %rax
+ adc %rdx, w0
+ adc R32(w1), R32(w1)
+ mul v1
+ add w3, (rp)
adc %rax, w0
adc %rdx, w1
- mov w0, 8(rp,n,8)
+ mov w0, 8(rp)
mov w1, %rax
pop %rbp
pop %rbx
+ FUNC_EXIT()
ret
EPILOGUE()
dnl AMD64 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
dnl AMD64 mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
-dnl Copyright 2003, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
+dnl Copyright 2003, 2005, 2006, 2007, 2008, 2009, 2011, 2012 Free Software
+dnl Foundation, Inc.
dnl This file is part of the GNU MP Library.
C cycles/limb
-C K8,K9: 2
-C K10: 2
-C P4: 13
-C P6 core2: 3.45
-C P6 corei7: 3.45
-C P6 atom: ?
+C AMD K8,K9 2
+C AMD K10 2
+C Intel P4 13
+C Intel core2 3.45
+C Intel corei 3.45
+C Intel atom ?
+C VIA nano ?
C Sometimes speed degenerates, supposedly related to that some operand
define(`n', `%rcx')
ifdef(`OPERATION_addlsh1_n', `
- define(ADDSUB, add)
- define(ADCSBB, adc)
- define(func, mpn_addlsh1_n)')
+ define(ADDSUB, add)
+ define(ADCSBB, adc)
+ define(func, mpn_addlsh1_n)')
ifdef(`OPERATION_rsblsh1_n', `
- define(ADDSUB, sub)
- define(ADCSBB, sbb)
- define(func, mpn_rsblsh1_n)')
+ define(ADDSUB, sub)
+ define(ADCSBB, sbb)
+ define(func, mpn_rsblsh1_n)')
MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_rsblsh1_n)
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(func)
+ FUNC_ENTRY(4)
push %rbp
mov (vp), %r8
movslq R32(%rbp), %rax')
pop %rbp
+ FUNC_EXIT()
ret
EPILOGUE()
-dnl AMD64 mpn_addlsh2_n and mpn_rsblsh2_n. R = 2*V +- U.
-dnl ("rsb" means reversed subtract, name mandated by mpn_sublsh2_n which
-dnl subtacts the shifted operand from the unshifted operand.)
+dnl AMD64 mpn_addlsh2_n -- rp[] = up[] + (vp[] << 2)
+dnl AMD64 mpn_rsblsh2_n -- rp[] = (vp[] << 2) - up[]
-dnl Copyright 2009 Free Software Foundation, Inc.
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2009, 2010, 2011 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
-
-C cycles/limb
-C K8,K9: 2
-C K10: 2
-C P4: ?
-C P6 core2: 3
-C P6 corei7: 2.75
-C P6 atom: ?
-
-C INPUT PARAMETERS
-define(`rp', `%rdi')
-define(`up', `%rsi')
-define(`vp', `%rdx')
-define(`n', `%rcx')
+define(LSH, 2)
+define(RSH, 62)
ifdef(`OPERATION_addlsh2_n',`
- define(ADDSUB, `add')
- define(ADCSBB, `adc')
- define(func, mpn_addlsh2_n)')
+ define(ADDSUB, add)
+ define(ADCSBB, adc)
+ define(func, mpn_addlsh2_n)')
ifdef(`OPERATION_rsblsh2_n',`
- define(ADDSUB, `sub')
- define(ADCSBB, `sbb')
- define(func, mpn_rsblsh2_n)')
+ define(ADDSUB, sub)
+ define(ADCSBB, sbb)
+ define(func, mpn_rsblsh2_n)')
MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_rsblsh2_n)
-ASM_START()
- TEXT
- ALIGN(16)
-PROLOGUE(func)
- push %r12
- push %r13
- push %r14
- push %r15
-
- mov (vp), %r8
- lea (,%r8,4), %r12
- shr $62, %r8
-
- mov R32(n), R32(%rax)
- lea (rp,n,8), rp
- lea (up,n,8), up
- lea (vp,n,8), vp
- neg n
- and $3, R8(%rax)
- je L(b00)
- cmp $2, R8(%rax)
- jc L(b01)
- je L(b10)
-
-L(b11): mov 8(vp,n,8), %r10
- lea (%r8,%r10,4), %r14
- shr $62, %r10
- mov 16(vp,n,8), %r11
- lea (%r10,%r11,4), %r15
- shr $62, %r11
- ADDSUB (up,n,8), %r12
- ADCSBB 8(up,n,8), %r14
- ADCSBB 16(up,n,8), %r15
- sbb R32(%rax), R32(%rax) C save carry for next
- mov %r12, (rp,n,8)
- mov %r14, 8(rp,n,8)
- mov %r15, 16(rp,n,8)
- add $3, n
- js L(top)
- jmp L(end)
-
-L(b01): mov %r8, %r11
- ADDSUB (up,n,8), %r12
- sbb R32(%rax), R32(%rax) C save carry for next
- mov %r12, (rp,n,8)
- add $1, n
- js L(top)
- jmp L(end)
-
-L(b10): mov 8(vp,n,8), %r11
- lea (%r8,%r11,4), %r15
- shr $62, %r11
- ADDSUB (up,n,8), %r12
- ADCSBB 8(up,n,8), %r15
- sbb R32(%rax), R32(%rax) C save carry for next
- mov %r12, (rp,n,8)
- mov %r15, 8(rp,n,8)
- add $2, n
- js L(top)
- jmp L(end)
-
-L(b00): mov 8(vp,n,8), %r9
- mov 16(vp,n,8), %r10
- jmp L(e00)
-
- ALIGN(16)
-L(top): mov 16(vp,n,8), %r10
- mov (vp,n,8), %r8
- mov 8(vp,n,8), %r9
- lea (%r11,%r8,4), %r12
- shr $62, %r8
-L(e00): lea (%r8,%r9,4), %r13
- shr $62, %r9
- mov 24(vp,n,8), %r11
- lea (%r9,%r10,4), %r14
- shr $62, %r10
- lea (%r10,%r11,4), %r15
- shr $62, %r11
- add R32(%rax), R32(%rax) C restore carry
- ADCSBB (up,n,8), %r12
- ADCSBB 8(up,n,8), %r13
- ADCSBB 16(up,n,8), %r14
- ADCSBB 24(up,n,8), %r15
- mov %r12, (rp,n,8)
- mov %r13, 8(rp,n,8)
- mov %r14, 16(rp,n,8)
- sbb R32(%rax), R32(%rax) C save carry for next
- mov %r15, 24(rp,n,8)
- add $4, n
- js L(top)
-L(end):
-
-ifdef(`OPERATION_addlsh2_n',`
- sub R32(%r11), R32(%rax)
- neg R32(%rax)')
-ifdef(`OPERATION_rsblsh2_n',`
- add R32(%r11), R32(%rax)
- movslq R32(%rax), %rax')
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
- pop %r15
- pop %r14
- pop %r13
- pop %r12
- ret
-EPILOGUE()
+include_mpn(`x86_64/aorrlshC_n.asm')
--- /dev/null
+dnl AMD64 mpn_addlshC_n -- rp[] = up[] + (vp[] << C)
+dnl AMD64 mpn_rsblshC_n -- rp[] = (vp[] << C) - up[]
+
+dnl Copyright 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+
+C cycles/limb
+C AMD K8,K9 2
+C AMD K10 2
+C Intel P4 ?
+C Intel core2 3
+C Intel NHM 2.75
+C Intel SBR 2.55
+C Intel atom ?
+C VIA nano ?
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`vp', `%rdx')
+define(`n', `%rcx')
+
+define(M, eval(m4_lshift(1,LSH)))
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(func)
+ FUNC_ENTRY(4)
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ mov (vp), %r8
+ lea (,%r8,M), %r12
+ shr $RSH, %r8
+
+ mov R32(n), R32(%rax)
+ lea (rp,n,8), rp
+ lea (up,n,8), up
+ lea (vp,n,8), vp
+ neg n
+ and $3, R8(%rax)
+ je L(b00)
+ cmp $2, R8(%rax)
+ jc L(b01)
+ je L(b10)
+
+L(b11): mov 8(vp,n,8), %r10
+ lea (%r8,%r10,M), %r14
+ shr $RSH, %r10
+ mov 16(vp,n,8), %r11
+ lea (%r10,%r11,M), %r15
+ shr $RSH, %r11
+ ADDSUB (up,n,8), %r12
+ ADCSBB 8(up,n,8), %r14
+ ADCSBB 16(up,n,8), %r15
+ sbb R32(%rax), R32(%rax) C save carry for next
+ mov %r12, (rp,n,8)
+ mov %r14, 8(rp,n,8)
+ mov %r15, 16(rp,n,8)
+ add $3, n
+ js L(top)
+ jmp L(end)
+
+L(b01): mov %r8, %r11
+ ADDSUB (up,n,8), %r12
+ sbb R32(%rax), R32(%rax) C save carry for next
+ mov %r12, (rp,n,8)
+ add $1, n
+ js L(top)
+ jmp L(end)
+
+L(b10): mov 8(vp,n,8), %r11
+ lea (%r8,%r11,M), %r15
+ shr $RSH, %r11
+ ADDSUB (up,n,8), %r12
+ ADCSBB 8(up,n,8), %r15
+ sbb R32(%rax), R32(%rax) C save carry for next
+ mov %r12, (rp,n,8)
+ mov %r15, 8(rp,n,8)
+ add $2, n
+ js L(top)
+ jmp L(end)
+
+L(b00): mov 8(vp,n,8), %r9
+ mov 16(vp,n,8), %r10
+ jmp L(e00)
+
+ ALIGN(16)
+L(top): mov 16(vp,n,8), %r10
+ mov (vp,n,8), %r8
+ mov 8(vp,n,8), %r9
+ lea (%r11,%r8,M), %r12
+ shr $RSH, %r8
+L(e00): lea (%r8,%r9,M), %r13
+ shr $RSH, %r9
+ mov 24(vp,n,8), %r11
+ lea (%r9,%r10,M), %r14
+ shr $RSH, %r10
+ lea (%r10,%r11,M), %r15
+ shr $RSH, %r11
+ add R32(%rax), R32(%rax) C restore carry
+ ADCSBB (up,n,8), %r12
+ ADCSBB 8(up,n,8), %r13
+ ADCSBB 16(up,n,8), %r14
+ ADCSBB 24(up,n,8), %r15
+ mov %r12, (rp,n,8)
+ mov %r13, 8(rp,n,8)
+ mov %r14, 16(rp,n,8)
+ sbb R32(%rax), R32(%rax) C save carry for next
+ mov %r15, 24(rp,n,8)
+ add $4, n
+ js L(top)
+L(end):
+
+ifelse(ADDSUB,add,`
+ sub R32(%r11), R32(%rax)
+ neg R32(%rax)
+',`
+ add R32(%r11), R32(%rax)
+ movslq R32(%rax), %rax
+')
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ FUNC_EXIT()
+ ret
+EPILOGUE()
dnl AMD64 mpn_addlsh_n and mpn_rsblsh_n. R = V2^k +- U.
-dnl ("rsb" means reversed subtract, name mandated by mpn_sublsh1_n which
-dnl subtacts the shifted operand from the unshifted operand.)
-dnl Copyright 2006 Free Software Foundation, Inc.
+dnl Copyright 2006, 2010, 2011, 2012 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
C cycles/limb
-C K8,K9: 3.25 (mpn_lshift + mpn_add_n costs about 4.1 c/l)
-C K10: 3.25 (mpn_lshift + mpn_add_n costs about 4.1 c/l)
-C P4: 14
-C P6-15: 4
+C AMD K8,K9 3.1 < 3.85 for lshift + add_n
+C AMD K10 3.1 < 3.85 for lshift + add_n
+C Intel P4 14.6 > 7.33 for lshift + add_n
+C Intel core2 3.87 > 3.27 for lshift + add_n
+C Intel NHM 4 > 3.75 for lshift + add_n
+C Intel SBR (5.8) > 3.46 for lshift + add_n
+C Intel atom (7.75) < 8.75 for lshift + add_n
+C VIA nano 4.7 < 6.25 for lshift + add_n
C This was written quickly and not optimized at all. Surely one could get
C closer to 3 c/l or perhaps even under 3 c/l. Ideas:
define(`cnt', `%r8')
ifdef(`OPERATION_addlsh_n',`
- define(ADDSUBC, `adc')
+ define(ADCSBB, `adc')
define(func, mpn_addlsh_n)
')
ifdef(`OPERATION_rsblsh_n',`
- define(ADDSUBC, `sbb')
+ define(ADCSBB, `sbb')
define(func, mpn_rsblsh_n)
')
MULFUNC_PROLOGUE(mpn_addlsh_n mpn_rsblsh_n)
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(func)
-
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8d ')
push %r12
push %r13
push %r14
- push %r15
+ push %rbp
push %rbx
mov n, %rax
- xor %ebx, %ebx C clear carry save register
- mov %r8d, %ecx C shift count
- xor %r15d, %r15d C limb carry
+ xor R32(%rbx), R32(%rbx) C clear carry save register
+ mov R32(%r8), R32(%rcx) C shift count
+ xor R32(%rbp), R32(%rbp) C limb carry
- mov %eax, %r11d
- and $3, %r11d
+ mov R32(%rax), R32(%r11)
+ and $3, R32(%r11)
je L(4)
- sub $1, %r11d
+ sub $1, R32(%r11)
-L(oopette):
- mov 0(vp), %r8
+L(012): mov (vp), %r8
mov %r8, %r12
- shl %cl, %r8
- or %r15, %r8
- neg %cl
- mov %r12, %r15
- shr %cl, %r15
- neg %cl
- add %ebx, %ebx
- ADDSUBC 0(up), %r8
- mov %r8, 0(rp)
- sbb %ebx, %ebx
+ shl R8(%rcx), %r8
+ or %rbp, %r8
+ neg R8(%rcx)
+ mov %r12, %rbp
+ shr R8(%rcx), %rbp
+ neg R8(%rcx)
+ add R32(%rbx), R32(%rbx)
+ ADCSBB (up), %r8
+ mov %r8, (rp)
+ sbb R32(%rbx), R32(%rbx)
lea 8(up), up
lea 8(vp), vp
lea 8(rp), rp
- sub $1, %r11d
- jnc L(oopette)
+ sub $1, R32(%r11)
+ jnc L(012)
-L(4):
- sub $4, %rax
+L(4): sub $4, %rax
jc L(end)
-L(oop):
- mov 0(vp), %r8
+ ALIGN(16)
+L(top): mov (vp), %r8
mov %r8, %r12
mov 8(vp), %r9
mov %r9, %r13
mov %r10, %r14
mov 24(vp), %r11
- shl %cl, %r8
- shl %cl, %r9
- shl %cl, %r10
- or %r15, %r8
- mov %r11, %r15
- shl %cl, %r11
+ shl R8(%rcx), %r8
+ shl R8(%rcx), %r9
+ shl R8(%rcx), %r10
+ or %rbp, %r8
+ mov %r11, %rbp
+ shl R8(%rcx), %r11
- neg %cl
+ neg R8(%rcx)
- shr %cl, %r12
- shr %cl, %r13
- shr %cl, %r14
- shr %cl, %r15 C used next loop
+ shr R8(%rcx), %r12
+ shr R8(%rcx), %r13
+ shr R8(%rcx), %r14
+ shr R8(%rcx), %rbp C used next iteration
or %r12, %r9
or %r13, %r10
or %r14, %r11
- neg %cl
+ neg R8(%rcx)
- add %ebx, %ebx C restore carry flag
+ add R32(%rbx), R32(%rbx) C restore carry flag
- ADDSUBC 0(up), %r8
- ADDSUBC 8(up), %r9
- ADDSUBC 16(up), %r10
- ADDSUBC 24(up), %r11
+ ADCSBB (up), %r8
+ ADCSBB 8(up), %r9
+ ADCSBB 16(up), %r10
+ ADCSBB 24(up), %r11
- mov %r8, 0(rp)
+ mov %r8, (rp)
mov %r9, 8(rp)
mov %r10, 16(rp)
mov %r11, 24(rp)
- sbb %ebx, %ebx C save carry flag
+ sbb R32(%rbx), R32(%rbx) C save carry flag
lea 32(up), up
lea 32(vp), vp
lea 32(rp), rp
sub $4, %rax
- jnc L(oop)
-L(end):
- add %ebx, %ebx
- ADDSUBC $0, %r15
- mov %r15, %rax
+ jnc L(top)
+
+L(end): add R32(%rbx), R32(%rbx)
+ ADCSBB $0, %rbp
+ mov %rbp, %rax
pop %rbx
- pop %r15
+ pop %rbp
pop %r14
pop %r13
pop %r12
-
+ FUNC_EXIT()
ret
EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_add_err1_n, mpn_sub_err1_n
+
+dnl Contributed by David Harvey.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 2.75 (most alignments, degenerates to 3 c/l for some aligments)
+C AMD K10 ?
+C Intel P4 ?
+C Intel core2 ?
+C Intel corei ?
+C Intel atom ?
+C VIA nano ?
+
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`vp', `%rdx')
+define(`ep', `%rcx')
+define(`yp', `%r8')
+define(`n', `%r9')
+define(`cy_param', `8(%rsp)')
+
+define(`el', `%rbx')
+define(`eh', `%rbp')
+define(`t0', `%r10')
+define(`t1', `%r11')
+define(`t2', `%r12')
+define(`t3', `%r13')
+define(`w0', `%r14')
+define(`w1', `%r15')
+
+ifdef(`OPERATION_add_err1_n', `
+ define(ADCSBB, adc)
+ define(func, mpn_add_err1_n)')
+ifdef(`OPERATION_sub_err1_n', `
+ define(ADCSBB, sbb)
+ define(func, mpn_sub_err1_n)')
+
+MULFUNC_PROLOGUE(mpn_add_err1_n mpn_sub_err1_n)
+
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(func)
+ mov cy_param, %rax
+
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ lea (up,n,8), up
+ lea (vp,n,8), vp
+ lea (rp,n,8), rp
+
+ mov R32(n), R32(%r10)
+ and $3, R32(%r10)
+ jz L(0mod4)
+ cmp $2, R32(%r10)
+ jc L(1mod4)
+ jz L(2mod4)
+L(3mod4):
+ xor R32(el), R32(el)
+ xor R32(eh), R32(eh)
+ xor R32(t0), R32(t0)
+ xor R32(t1), R32(t1)
+ lea -24(yp,n,8), yp
+ neg n
+
+ shr $1, %al C restore carry
+ mov (up,n,8), w0
+ mov 8(up,n,8), w1
+ ADCSBB (vp,n,8), w0
+ mov w0, (rp,n,8)
+ cmovc 16(yp), el
+ ADCSBB 8(vp,n,8), w1
+ mov w1, 8(rp,n,8)
+ cmovc 8(yp), t0
+ mov 16(up,n,8), w0
+ ADCSBB 16(vp,n,8), w0
+ mov w0, 16(rp,n,8)
+ cmovc (yp), t1
+ setc %al C save carry
+ add t0, el
+ adc $0, eh
+ add t1, el
+ adc $0, eh
+
+ add $3, n
+ jnz L(loop)
+ jmp L(end)
+
+ ALIGN(16)
+L(0mod4):
+ xor R32(el), R32(el)
+ xor R32(eh), R32(eh)
+ lea (yp,n,8), yp
+ neg n
+ jmp L(loop)
+
+ ALIGN(16)
+L(1mod4):
+ xor R32(el), R32(el)
+ xor R32(eh), R32(eh)
+ lea -8(yp,n,8), yp
+ neg n
+
+ shr $1, %al C restore carry
+ mov (up,n,8), w0
+ ADCSBB (vp,n,8), w0
+ mov w0, (rp,n,8)
+ cmovc (yp), el
+ setc %al C save carry
+
+ add $1, n
+ jnz L(loop)
+ jmp L(end)
+
+ ALIGN(16)
+L(2mod4):
+ xor R32(el), R32(el)
+ xor R32(eh), R32(eh)
+ xor R32(t0), R32(t0)
+ lea -16(yp,n,8), yp
+ neg n
+
+ shr $1, %al C restore carry
+ mov (up,n,8), w0
+ mov 8(up,n,8), w1
+ ADCSBB (vp,n,8), w0
+ mov w0, (rp,n,8)
+ cmovc 8(yp), el
+ ADCSBB 8(vp,n,8), w1
+ mov w1, 8(rp,n,8)
+ cmovc (yp), t0
+ setc %al C save carry
+ add t0, el
+ adc $0, eh
+
+ add $2, n
+ jnz L(loop)
+ jmp L(end)
+
+ ALIGN(32)
+L(loop):
+ shr $1, %al C restore carry
+ mov -8(yp), t0
+ mov $0, R32(t3)
+ mov (up,n,8), w0
+ mov 8(up,n,8), w1
+ ADCSBB (vp,n,8), w0
+ cmovnc t3, t0
+ ADCSBB 8(vp,n,8), w1
+ mov -16(yp), t1
+ mov w0, (rp,n,8)
+ mov 16(up,n,8), w0
+ mov w1, 8(rp,n,8)
+ cmovnc t3, t1
+ mov -24(yp), t2
+ ADCSBB 16(vp,n,8), w0
+ cmovnc t3, t2
+ mov 24(up,n,8), w1
+ ADCSBB 24(vp,n,8), w1
+ cmovc -32(yp), t3
+ setc %al C save carry
+ add t0, el
+ adc $0, eh
+ add t1, el
+ adc $0, eh
+ add t2, el
+ adc $0, eh
+ mov w0, 16(rp,n,8)
+ add t3, el
+ lea -32(yp), yp
+ adc $0, eh
+ mov w1, 24(rp,n,8)
+ add $4, n
+ jnz L(loop)
+
+L(end):
+ mov el, (ep)
+ mov eh, 8(ep)
+
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+ ret
+EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_add_err2_n, mpn_sub_err2_n
+
+dnl Contributed by David Harvey.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 4.5
+C AMD K10 ?
+C Intel P4 ?
+C Intel core2 6.9
+C Intel corei ?
+C Intel atom ?
+C VIA nano ?
+
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`vp', `%rdx')
+define(`ep', `%rcx')
+define(`yp1', `%r8')
+define(`yp2', `%r9')
+define(`n_param', `8(%rsp)')
+define(`cy_param', `16(%rsp)')
+
+define(`cy1', `%r14')
+define(`cy2', `%rax')
+
+define(`n', `%r10')
+
+define(`w', `%rbx')
+define(`e1l', `%rbp')
+define(`e1h', `%r11')
+define(`e2l', `%r12')
+define(`e2h', `%r13')
+
+
+ifdef(`OPERATION_add_err2_n', `
+ define(ADCSBB, adc)
+ define(func, mpn_add_err2_n)')
+ifdef(`OPERATION_sub_err2_n', `
+ define(ADCSBB, sbb)
+ define(func, mpn_sub_err2_n)')
+
+MULFUNC_PROLOGUE(mpn_add_err2_n mpn_sub_err2_n)
+
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(func)
+ mov cy_param, cy2
+ mov n_param, n
+
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+
+ xor R32(e1l), R32(e1l)
+ xor R32(e1h), R32(e1h)
+ xor R32(e2l), R32(e2l)
+ xor R32(e2h), R32(e2h)
+
+ sub yp1, yp2
+
+ lea (rp,n,8), rp
+ lea (up,n,8), up
+ lea (vp,n,8), vp
+
+ test $1, n
+ jnz L(odd)
+
+ lea -8(yp1,n,8), yp1
+ neg n
+ jmp L(top)
+
+ ALIGN(16)
+L(odd):
+ lea -16(yp1,n,8), yp1
+ neg n
+ shr $1, cy2
+ mov (up,n,8), w
+ ADCSBB (vp,n,8), w
+ cmovc 8(yp1), e1l
+ cmovc 8(yp1,yp2), e2l
+ mov w, (rp,n,8)
+ sbb cy2, cy2
+ inc n
+ jz L(end)
+
+ ALIGN(16)
+L(top):
+ mov (up,n,8), w
+ shr $1, cy2 C restore carry
+ ADCSBB (vp,n,8), w
+ mov w, (rp,n,8)
+ sbb cy1, cy1 C generate mask, preserve CF
+
+ mov 8(up,n,8), w
+ ADCSBB 8(vp,n,8), w
+ mov w, 8(rp,n,8)
+ sbb cy2, cy2 C generate mask, preserve CF
+
+ mov (yp1), w C (e1h:e1l) += cy1 * yp1 limb
+ and cy1, w
+ add w, e1l
+ adc $0, e1h
+
+ and (yp1,yp2), cy1 C (e2h:e2l) += cy1 * yp2 limb
+ add cy1, e2l
+ adc $0, e2h
+
+ mov -8(yp1), w C (e1h:e1l) += cy2 * next yp1 limb
+ and cy2, w
+ add w, e1l
+ adc $0, e1h
+
+ mov -8(yp1,yp2), w C (e2h:e2l) += cy2 * next yp2 limb
+ and cy2, w
+ add w, e2l
+ adc $0, e2h
+
+ add $2, n
+ lea -16(yp1), yp1
+ jnz L(top)
+L(end):
+
+ mov e1l, (ep)
+ mov e1h, 8(ep)
+ mov e2l, 16(ep)
+ mov e2h, 24(ep)
+
+ and $1, %eax C return carry
+
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+ ret
+EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_add_err3_n, mpn_sub_err3_n
+
+dnl Contributed by David Harvey.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 7.0
+C AMD K10 ?
+C Intel P4 ?
+C Intel core2 ?
+C Intel corei ?
+C Intel atom ?
+C VIA nano ?
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`vp', `%rdx')
+define(`ep', `%rcx')
+define(`yp1', `%r8')
+define(`yp2', `%r9')
+define(`yp3_param', `8(%rsp)')
+define(`n_param', `16(%rsp)')
+define(`cy_param', `24(%rsp)')
+
+define(`n', `%r10')
+define(`yp3', `%rcx')
+define(`t', `%rbx')
+
+define(`e1l', `%rbp')
+define(`e1h', `%r11')
+define(`e2l', `%r12')
+define(`e2h', `%r13')
+define(`e3l', `%r14')
+define(`e3h', `%r15')
+
+
+
+ifdef(`OPERATION_add_err3_n', `
+ define(ADCSBB, adc)
+ define(func, mpn_add_err3_n)')
+ifdef(`OPERATION_sub_err3_n', `
+ define(ADCSBB, sbb)
+ define(func, mpn_sub_err3_n)')
+
+MULFUNC_PROLOGUE(mpn_add_err3_n mpn_sub_err3_n)
+
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(func)
+ mov cy_param, %rax
+ mov n_param, n
+
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ push ep
+ mov 64(%rsp), yp3 C load from yp3_param
+
+ xor R32(e1l), R32(e1l)
+ xor R32(e1h), R32(e1h)
+ xor R32(e2l), R32(e2l)
+ xor R32(e2h), R32(e2h)
+ xor R32(e3l), R32(e3l)
+ xor R32(e3h), R32(e3h)
+
+ sub yp1, yp2
+ sub yp1, yp3
+
+ lea -8(yp1,n,8), yp1
+ lea (rp,n,8), rp
+ lea (up,n,8), up
+ lea (vp,n,8), vp
+ neg n
+
+ ALIGN(16)
+L(top):
+ shr $1, %rax C restore carry
+ mov (up,n,8), %rax
+ ADCSBB (vp,n,8), %rax
+ mov %rax, (rp,n,8)
+ sbb %rax, %rax C save carry and generate mask
+
+ mov (yp1), t
+ and %rax, t
+ add t, e1l
+ adc $0, e1h
+
+ mov (yp1,yp2), t
+ and %rax, t
+ add t, e2l
+ adc $0, e2h
+
+ mov (yp1,yp3), t
+ and %rax, t
+ add t, e3l
+ adc $0, e3h
+
+ lea -8(yp1), yp1
+ inc n
+ jnz L(top)
+
+L(end):
+ and $1, %eax
+ pop ep
+
+ mov e1l, (ep)
+ mov e1h, 8(ep)
+ mov e2l, 16(ep)
+ mov e2h, 24(ep)
+ mov e3l, 32(ep)
+ mov e3h, 40(ep)
+
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+ ret
+EPILOGUE()
dnl AMD64 mpn_add_n, mpn_sub_n
-dnl Copyright 2003, 2004, 2005, 2007, 2008, 2010 Free Software Foundation,
-dnl Inc.
+dnl Copyright 2003, 2004, 2005, 2007, 2008, 2010, 2011, 2012 Free Software
+dnl Foundation, Inc.
dnl This file is part of the GNU MP Library.
C AMD K8,K9 1.5
C AMD K10 1.5
C Intel P4 ?
-C Intel core2 4.9
-C Intel corei ?
+C Intel core2 4.9
+C Intel NHM 5.5
+C Intel SBR 1.59
C Intel atom 4
C VIA nano 3.25
-C The inner loop of this code is the result of running a code generation and
+C The loop of this code is the result of running a code generation and
C optimization tool suite written by David Harvey and Torbjorn Granlund.
C INPUT PARAMETERS
-define(`rp', `%rdi')
-define(`up', `%rsi')
-define(`vp', `%rdx')
-define(`n', `%rcx')
-define(`cy', `%r8') C (only for mpn_add_nc)
+define(`rp', `%rdi') C rcx
+define(`up', `%rsi') C rdx
+define(`vp', `%rdx') C r8
+define(`n', `%rcx') C r9
+define(`cy', `%r8') C rsp+40 (only for mpn_add_nc)
ifdef(`OPERATION_add_n', `
define(ADCSBB, adc)
MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(func_nc)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8 ')
mov R32(n), R32(%rax)
shr $2, n
and $3, R32(%rax)
EPILOGUE()
ALIGN(16)
PROLOGUE(func)
+ FUNC_ENTRY(4)
mov R32(n), R32(%rax)
shr $2, n
and $3, R32(%rax)
jnz L(2)
ADCSBB (vp), %r8
mov %r8, (rp)
- adc %eax, %eax
+ adc R32(%rax), R32(%rax)
+ FUNC_EXIT()
ret
L(2): dec R32(%rax)
ADCSBB 8(vp), %r9
mov %r8, (rp)
mov %r9, 8(rp)
- adc %eax, %eax
+ adc R32(%rax), R32(%rax)
+ FUNC_EXIT()
ret
L(3): mov 16(up), %r10
mov %r9, 8(rp)
mov %r10, 16(rp)
setc R8(%rax)
+ FUNC_EXIT()
ret
ALIGN(16)
inc R32(%rax)
dec R32(%rax)
jnz L(lt4)
- adc %eax, %eax
+ adc R32(%rax), R32(%rax)
+ FUNC_EXIT()
ret
EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_addcnd_n, mpn_subcnd_n
+
+dnl Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 2.25
+C AMD K10 2
+C AMD bd1 3.55
+C AMD bobcat 2.5
+C Intel P4 13
+C Intel core2 2.9
+C Intel NHM 2.9
+C Intel SBR 2.4
+C Intel atom 6.5
+C VIA nano 3
+
+C NOTES
+C * It might seem natural to use the cmov insn here, but since this function
+C is supposed to have the exact same execution pattern for cnd true and
+C false, and since cmov's documentation is not clear about wheather it
+C actually reads both source operands and writes the register for a false
+C condition, we cannot use it.
+C * Two cases could be optimised: (1) addcnd_n could use ADCSBB-from-memory
+C to save one insn/limb, and (2) when up=rp addcnd_n and subcnd_n could use
+C ADCSBB-to-memory, again saving 1 insn/limb.
+C * This runs optimally at decoder bandwidth on K10. It has not been tuned
+C for any other processor.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`vp', `%rdx')
+define(`n', `%rcx')
+define(`cnd', `%r8')
+
+ifdef(`OPERATION_addcnd_n', `
+ define(ADDSUB, add)
+ define(ADCSBB, adc)
+ define(func, mpn_addcnd_n)')
+ifdef(`OPERATION_subcnd_n', `
+ define(ADDSUB, sub)
+ define(ADCSBB, sbb)
+ define(func, mpn_subcnd_n)')
+
+MULFUNC_PROLOGUE(mpn_addcnd_n mpn_subcnd_n)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(func)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8 ')
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+
+ neg cnd
+ sbb cnd, cnd C make cnd mask
+
+ lea (vp,n,8), vp
+ lea (up,n,8), up
+ lea (rp,n,8), rp
+
+ mov R32(n), R32(%rax)
+ neg n
+ and $3, R32(%rax)
+ jz L(top) C carry-save reg rax = 0 in this arc
+ cmp $2, R32(%rax)
+ jc L(b1)
+ jz L(b2)
+
+L(b3): mov (vp,n,8), %r12
+ mov 8(vp,n,8), %r13
+ mov 16(vp,n,8), %r14
+ mov (up,n,8), %r10
+ mov 8(up,n,8), %rbx
+ mov 16(up,n,8), %rbp
+ and cnd, %r12
+ and cnd, %r13
+ and cnd, %r14
+ ADDSUB %r12, %r10
+ ADCSBB %r13, %rbx
+ ADCSBB %r14, %rbp
+ sbb R32(%rax), R32(%rax) C save carry
+ mov %r10, (rp,n,8)
+ mov %rbx, 8(rp,n,8)
+ mov %rbp, 16(rp,n,8)
+ add $3, n
+ js L(top)
+ jmp L(end)
+
+L(b2): mov (vp,n,8), %r12
+ mov 8(vp,n,8), %r13
+ mov (up,n,8), %r10
+ mov 8(up,n,8), %rbx
+ and cnd, %r12
+ and cnd, %r13
+ ADDSUB %r12, %r10
+ ADCSBB %r13, %rbx
+ sbb R32(%rax), R32(%rax) C save carry
+ mov %r10, (rp,n,8)
+ mov %rbx, 8(rp,n,8)
+ add $2, n
+ js L(top)
+ jmp L(end)
+
+L(b1): mov (vp,n,8), %r12
+ mov (up,n,8), %r10
+ and cnd, %r12
+ ADDSUB %r12, %r10
+ sbb R32(%rax), R32(%rax) C save carry
+ mov %r10, (rp,n,8)
+ add $1, n
+ jns L(end)
+
+ ALIGN(16)
+L(top): mov (vp,n,8), %r12
+ mov 8(vp,n,8), %r13
+ mov 16(vp,n,8), %r14
+ mov 24(vp,n,8), %r11
+ mov (up,n,8), %r10
+ mov 8(up,n,8), %rbx
+ mov 16(up,n,8), %rbp
+ mov 24(up,n,8), %r9
+ and cnd, %r12
+ and cnd, %r13
+ and cnd, %r14
+ and cnd, %r11
+ add R32(%rax), R32(%rax) C restore carry
+ ADCSBB %r12, %r10
+ ADCSBB %r13, %rbx
+ ADCSBB %r14, %rbp
+ ADCSBB %r11, %r9
+ sbb R32(%rax), R32(%rax) C save carry
+ mov %r10, (rp,n,8)
+ mov %rbx, 8(rp,n,8)
+ mov %rbp, 16(rp,n,8)
+ mov %r9, 24(rp,n,8)
+ add $4, n
+ js L(top)
+
+L(end): neg R32(%rax)
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+ FUNC_EXIT()
+ ret
+EPILOGUE()
dnl AMD64 mpn_addmul_1 and mpn_submul_1.
-dnl Copyright 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
+dnl Copyright 2003, 2004, 2005, 2007, 2008, 2011, 2012 Free Software
+dnl Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
C cycles/limb
-C K8,K9: 2.5
-C K10: 2.5
-C P4: 14.9
-C P6 core2: 5.09
-C P6 corei7:
-C P6 atom: 21.3
-
-C The inner loop of this code is the result of running a code generation and
+C AMD K8,K9 2.5
+C AMD K10 2.5
+C AMD bd1 5.0
+C AMD bobcat 6.17
+C Intel P4 14.9
+C Intel core2 5.09
+C Intel NHM 4.9
+C Intel SBR 4.0
+C Intel atom 21.3
+C VIA nano 5.0
+
+C The loop of this code is the result of running a code generation and
C optimization tool suite written by David Harvey and Torbjorn Granlund.
-C TODO:
-C * The inner loop is great, but the prologue and epilogue code was
-C quickly written. Tune it!
+C TODO
+C * The loop is great, but the prologue and epilogue code was quickly written.
+C Tune it!
-C INPUT PARAMETERS
-define(`rp', `%rdi')
-define(`up', `%rsi')
-define(`n_param',`%rdx')
-define(`vl', `%rcx')
+define(`rp', `%rdi') C rcx
+define(`up', `%rsi') C rdx
+define(`n_param', `%rdx') C r8
+define(`vl', `%rcx') C r9
-define(`n', `%r11')
+define(`n', `%r11')
ifdef(`OPERATION_addmul_1',`
define(`ADDSUB', `add')
define(`func', `mpn_submul_1')
')
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+IFDOS(` define(`up', ``%rsi'') ') dnl
+IFDOS(` define(`rp', ``%rcx'') ') dnl
+IFDOS(` define(`vl', ``%r9'') ') dnl
+IFDOS(` define(`r9', ``rdi'') ') dnl
+IFDOS(` define(`n', ``%r8'') ') dnl
+IFDOS(` define(`r8', ``r11'') ') dnl
+
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(func)
+
+IFDOS(``push %rsi '')
+IFDOS(``push %rdi '')
+IFDOS(``mov %rdx, %rsi '')
+
mov (up), %rax C read first u limb early
push %rbx
- mov n_param, %rbx C move away n from rdx, mul uses it
+IFSTD(` mov n_param, %rbx ') C move away n from rdx, mul uses it
+IFDOS(` mov n, %rbx ')
mul vl
- mov %rbx, %r11
+IFSTD(` mov %rbx, n ')
and $3, R32(%rbx)
jz L(b0)
adc %rax, %r9
mov (up,n,8), %rax
adc %rdx, %r8
- mov $0, %r10d
+ mov $0, R32(%r10)
L(L1): mul vl
ADDSUB %r9, 8(rp,n,8)
adc %rax, %r8
L(L3): mov 16(up,n,8), %rax
mul vl
ADDSUB %rbx, 24(rp,n,8)
- mov $0, %r8d # zero
- mov %r8, %rbx # zero
+ mov $0, R32(%r8) C zero
+ mov %r8, %rbx C zero
adc %rax, %r10
mov 24(up,n,8), %rax
- mov %r8, %r9 # zero
+ mov %r8, %r9 C zero
adc %rdx, %r9
L(L2): mul vl
add $4, n
mov %rdx, %rax
pop %rbx
+IFDOS(``pop %rdi '')
+IFDOS(``pop %rsi '')
ret
EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
+dnl AMD64 mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
+dnl Optimised for Intel Atom.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C TODO
+C * This code is slightly large at 433 bytes.
+C * sublsh1_n.asm and this file use the same basic pattern.
+
+C cycles/limb
+C AMD K8,K9 ?
+C AMD K10 ?
+C Intel P4 ?
+C Intel core2 ?
+C Intel NHM ?
+C Intel SBR ?
+C Intel atom 4.875 (4.75 is probably possible)
+C VIA nano ?
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`vp', `%rdx')
+define(`n', `%rcx')
+define(`cy', `%r8')
+
+ifdef(`OPERATION_addlsh1_n', `
+ define(ADDSUB, add)
+ define(ADCSBB, adc)
+ define(func_n, mpn_addlsh1_n)
+ define(func_nc, mpn_addlsh1_nc)')
+ifdef(`OPERATION_rsblsh1_n', `
+ define(ADDSUB, sub)
+ define(ADCSBB, sbb)
+ define(func_n, mpn_rsblsh1_n)
+ define(func_nc, mpn_rsblsh1_nc)')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_addlsh1_nc mpn_rsblsh1_n mpn_rsblsh1_nc)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(func_n)
+ FUNC_ENTRY(4)
+ push %rbp
+ xor R32(%rbp), R32(%rbp)
+L(ent): mov R32(n), R32(%rax)
+ and $3, R32(%rax)
+ jz L(b0)
+ cmp $2, R32(%rax)
+ jz L(b2)
+ jg L(b3)
+
+L(b1): mov (vp), %r8
+ add %r8, %r8
+ lea 8(vp), vp
+ sbb R32(%rax), R32(%rax) C save scy
+ add R32(%rbp), R32(%rbp) C restore acy
+ ADCSBB (up), %r8
+ mov %r8, (rp)
+ sbb R32(%rbp), R32(%rbp) C save acy
+ lea 8(up), up
+ lea 8(rp), rp
+ jmp L(b0)
+
+L(b2): mov (vp), %r8
+ add %r8, %r8
+ mov 8(vp), %r9
+ adc %r9, %r9
+ lea 16(vp), vp
+ sbb R32(%rax), R32(%rax) C save scy
+ add R32(%rbp), R32(%rbp) C restore acy
+ ADCSBB (up), %r8
+ mov %r8, (rp)
+ ADCSBB 8(up), %r9
+ mov %r9, 8(rp)
+ sbb R32(%rbp), R32(%rbp) C save acy
+ lea 16(up), up
+ lea 16(rp), rp
+ jmp L(b0)
+
+L(b3): mov (vp), %r8
+ add %r8, %r8
+ mov 8(vp), %r9
+ adc %r9, %r9
+ mov 16(vp), %r10
+ adc %r10, %r10
+ lea 24(vp), vp
+ sbb R32(%rax), R32(%rax) C save scy
+ add R32(%rbp), R32(%rbp) C restore acy
+ ADCSBB (up), %r8
+ mov %r8, (rp)
+ ADCSBB 8(up), %r9
+ mov %r9, 8(rp)
+ ADCSBB 16(up), %r10
+ mov %r10, 16(rp)
+ sbb R32(%rbp), R32(%rbp) C save acy
+ lea 24(up), up
+ lea 24(rp), rp
+
+L(b0): test $4, R8(n)
+ jz L(skp)
+ add R32(%rax), R32(%rax) C restore scy
+ mov (vp), %r8
+ adc %r8, %r8
+ mov 8(vp), %r9
+ adc %r9, %r9
+ mov 16(vp), %r10
+ adc %r10, %r10
+ mov 24(vp), %r11
+ adc %r11, %r11
+ lea 32(vp), vp
+ sbb R32(%rax), R32(%rax) C save scy
+ add R32(%rbp), R32(%rbp) C restore acy
+ ADCSBB (up), %r8
+ mov %r8, (rp)
+ ADCSBB 8(up), %r9
+ mov %r9, 8(rp)
+ ADCSBB 16(up), %r10
+ mov %r10, 16(rp)
+ ADCSBB 24(up), %r11
+ mov %r11, 24(rp)
+ lea 32(up), up
+ lea 32(rp), rp
+ sbb R32(%rbp), R32(%rbp) C save acy
+
+L(skp): cmp $8, n
+ jl L(rtn)
+
+ push %r12
+ push %r13
+ push %r14
+ push %rbx
+ lea -64(rp), rp
+ jmp L(x)
+
+ ALIGN(16)
+L(top): add R32(%rax), R32(%rax) C restore scy
+ lea 64(rp), rp
+ mov (vp), %r8
+ adc %r8, %r8
+ mov 8(vp), %r9
+ adc %r9, %r9
+ mov 16(vp), %r10
+ adc %r10, %r10
+ mov 24(vp), %r11
+ adc %r11, %r11
+ mov 32(vp), %r12
+ adc %r12, %r12
+ mov 40(vp), %r13
+ adc %r13, %r13
+ mov 48(vp), %r14
+ adc %r14, %r14
+ mov 56(vp), %rbx
+ adc %rbx, %rbx
+ lea 64(vp), vp
+ sbb R32(%rax), R32(%rax) C save scy
+ add R32(%rbp), R32(%rbp) C restore acy
+ ADCSBB (up), %r8
+ mov %r8, (rp)
+ ADCSBB 8(up), %r9
+ mov %r9, 8(rp)
+ ADCSBB 16(up), %r10
+ mov %r10, 16(rp)
+ ADCSBB 24(up), %r11
+ mov %r11, 24(rp)
+ ADCSBB 32(up), %r12
+ mov %r12, 32(rp)
+ ADCSBB 40(up), %r13
+ mov %r13, 40(rp)
+ ADCSBB 48(up), %r14
+ mov %r14, 48(rp)
+ ADCSBB 56(up), %rbx
+ mov %rbx, 56(rp)
+ sbb R32(%rbp), R32(%rbp) C save acy
+ lea 64(up), up
+L(x): sub $8, n
+ jge L(top)
+
+L(end): pop %rbx
+ pop %r14
+ pop %r13
+ pop %r12
+L(rtn):
+ifdef(`OPERATION_addlsh1_n',`
+ add R32(%rbp), R32(%rax)
+ neg R32(%rax)')
+ifdef(`OPERATION_rsblsh1_n',`
+ sub R32(%rax), R32(%rbp)
+ movslq R32(%rbp), %rax')
+
+ pop %rbp
+ FUNC_EXIT()
+ ret
+EPILOGUE()
+PROLOGUE(func_nc)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8 ')
+ push %rbp
+ neg %r8 C set CF
+ sbb R32(%rbp), R32(%rbp) C save acy
+ jmp L(ent)
+EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_addlsh2_n -- rp[] = up[] + (vp[] << 2)
+dnl AMD64 mpn_rsblsh2_n -- rp[] = (vp[] << 2) - up[]
+dnl Optimised for Intel Atom.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 ?
+C AMD K10 ?
+C Intel P4 ?
+C Intel core2 ?
+C Intel NHM ?
+C Intel SBR ?
+C Intel atom 5.75
+C VIA nano ?
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`vp', `%rdx')
+define(`n', `%rcx')
+
+define(`LSH', 2)
+define(`RSH', 62)
+define(M, eval(m4_lshift(1,LSH)))
+
+ifdef(`OPERATION_addlsh2_n', `
+ define(ADDSUB, add)
+ define(ADCSBB, adc)
+ define(func_n, mpn_addlsh2_n)
+ define(func_nc, mpn_addlsh2_nc)')
+ifdef(`OPERATION_rsblsh2_n', `
+ define(ADDSUB, sub)
+ define(ADCSBB, sbb)
+ define(func_n, mpn_rsblsh2_n)
+ define(func_nc, mpn_rsblsh2_nc)')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_rsblsh2_n)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(func_n)
+ FUNC_ENTRY(4)
+ push %rbx
+ push %rbp
+
+ mov R32(n), R32(%rax)
+ and $3, R32(%rax)
+ jz L(b0) C we rely on rax = 0 at target
+ cmp $2, R32(%rax)
+ mov $0, R32(%rax)
+ jz L(b2)
+ jg L(b3)
+
+L(b1): mov (vp), %r9
+ lea (%rax,%r9,M), %rbp
+ shr $RSH, %r9
+ sub $1, n
+ lea -8(up), up
+ lea -8(rp), rp
+ jz L(cj1)
+ mov 8(vp), %r10
+ lea (%r9,%r10,M), %r9
+ shr $RSH, %r10
+ mov 16(vp), %r11
+ lea 24(vp), vp
+ mov (vp), %r8
+ lea (%r10,%r11,M), %r10
+ shr $RSH, %r11
+ add R32(%rax), R32(%rax)
+ jmp L(L1)
+
+L(b2): lea -32(rp), rp
+ mov (vp), %r8
+ lea -32(up), up
+ lea (%rax,%r8,M), %rbx
+ shr $RSH, %r8
+ mov 8(vp), %r9
+ sub $2, n
+ jle L(end)
+ jmp L(top)
+
+L(b3): lea -24(up), up
+ mov (vp), %r11
+ lea -24(rp), rp
+ mov 8(vp), %r8
+ lea (%rax,%r11,M), %r10
+ shr $RSH, %r11
+ lea 8(vp), vp
+ lea (%r11,%r8,M), %rbx
+ add $1, n
+ jmp L(L3)
+
+L(b0): lea -16(up), up
+ mov (vp), %r10
+ lea (%rax,%r10,M), %r9
+ shr $RSH, %r10
+ mov 8(vp), %r11
+ lea -16(rp), rp
+ mov 16(vp), %r8
+ lea (%r10,%r11,M), %r10
+ shr $RSH, %r11
+ add R32(%rax), R32(%rax)
+ lea 16(vp), vp
+ jmp L(L0)
+
+ ALIGN(16)
+L(top): lea (%r8,%r9,M), %rbp
+ shr $RSH, %r9
+ lea 32(up), up
+ mov 16(vp), %r10
+ lea (%r9,%r10,M), %r9
+ shr $RSH, %r10
+ mov 24(vp), %r11
+ lea 32(rp), rp
+ lea 32(vp), vp
+ mov (vp), %r8
+ lea (%r10,%r11,M), %r10
+ shr $RSH, %r11
+ add R32(%rax), R32(%rax)
+ ADCSBB (up), %rbx
+ mov %rbx, (rp)
+L(L1): ADCSBB 8(up), %rbp
+ mov %rbp, 8(rp)
+L(L0): ADCSBB 16(up), %r9
+ lea (%r11,%r8,M), %rbx
+ mov %r9, 16(rp)
+L(L3): ADCSBB 24(up), %r10
+ sbb R32(%rax), R32(%rax)
+L(L2): shr $RSH, %r8
+ mov 8(vp), %r9
+ mov %r10, 24(rp)
+ sub $4, n
+ jg L(top)
+
+L(end): lea (%r8,%r9,M), %rbp
+ shr $RSH, %r9
+ lea 32(up), up
+ lea 32(rp), rp
+ add R32(%rax), R32(%rax)
+ ADCSBB (up), %rbx
+ mov %rbx, (rp)
+L(cj1): ADCSBB 8(up), %rbp
+ mov %rbp, 8(rp)
+
+ifdef(`OPERATION_addlsh2_n',`
+ mov R32(n), R32(%rax) C zero rax
+ adc %r9, %rax')
+ifdef(`OPERATION_rsblsh2_n',`
+ sbb n, %r9 C subtract 0
+ mov %r9, %rax')
+
+ pop %rbp
+ pop %rbx
+ FUNC_EXIT()
+ ret
+EPILOGUE()
dnl X86-64 mpn_add_n, mpn_sub_n, optimized for Intel Atom.
-dnl Copyright 2003, 2004, 2005, 2007, 2008, 2010 Free Software Foundation, Inc.
+dnl Copyright 2003, 2004, 2005, 2007, 2008, 2010, 2011, 2012 Free Software
+dnl Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
-
-C cycles/limb
-C K8,K9: 1.85
-C K10: ?
-C P4: ?
-C P6-15 (Core2): ?
-C P6-28 (Atom): 3
-
-C INPUT PARAMETERS
-define(`rp', `%rdi')
-define(`up', `%rsi')
-define(`vp', `%rdx')
-define(`n', `%rcx')
-define(`cy', `%r8') C (only for mpn_add_nc)
-
-ifdef(`OPERATION_add_n', `
- define(ADCSBB, adc)
- define(func, mpn_add_n)
- define(func_nc, mpn_add_nc)')
-ifdef(`OPERATION_sub_n', `
- define(ADCSBB, sbb)
- define(func, mpn_sub_n)
- define(func_nc, mpn_sub_nc)')
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
-
-ASM_START()
- TEXT
- ALIGN(16)
-PROLOGUE(func_nc)
- jmp L(ent)
-EPILOGUE()
-PROLOGUE(func)
- xor %r8, %r8
-L(ent):
- mov R32(%rcx), R32(%rax)
- shr $2, %rcx
- and $3, R32(%rax)
- jz L(b0)
- cmp $2, R32(%rax)
- jz L(b2)
- jg L(b3)
-
-L(b1): mov (%rsi), %r10
- test %rcx, %rcx
- jnz L(gt1)
- shr R32(%r8) C Set CF from argument
- ADCSBB (%rdx), %r10
- mov %r10, (%rdi)
- mov R32(%rcx), R32(%rax) C zero rax
- adc R32(%rax), R32(%rax)
- ret
-L(gt1): shr R32(%r8)
- ADCSBB (%rdx), %r10
- mov 8(%rsi), %r11
- lea 16(%rsi), %rsi
- lea -16(%rdx), %rdx
- lea -16(%rdi), %rdi
- jmp L(m1)
-
-L(b2): mov (%rsi), %r9
- mov 8(%rsi), %r10
- lea -8(%rdx), %rdx
- test %rcx, %rcx
- jnz L(gt2)
- shr R32(%r8)
- lea -40(%rdi), %rdi
- jmp L(e2)
-L(gt2): shr R32(%r8)
- ADCSBB 8(%rdx), %r9
- mov 16(%rsi), %r11
- lea -8(%rsi), %rsi
- lea -8(%rdi), %rdi
- jmp L(m2)
-
-L(b3): mov (%rsi), %rax
- mov 8(%rsi), %r9
- mov 16(%rsi), %r10
- test %rcx, %rcx
- jnz L(gt3)
- shr R32(%r8)
- lea -32(%rdi), %rdi
- jmp L(e3)
-L(gt3): shr R32(%r8)
- ADCSBB (%rdx), %rax
- jmp L(m3)
-
-L(b0): mov (%rsi), %r11
- neg R32(%r8)
- lea -24(%rdx), %rdx
- lea -24(%rdi), %rdi
- lea 8(%rsi), %rsi
- jmp L(m0)
-
- ALIGN(8)
-L(top): mov %r11, 24(%rdi)
- ADCSBB (%rdx), %rax
- lea 32(%rdi), %rdi
-L(m3): mov %rax, (%rdi)
- ADCSBB 8(%rdx), %r9
- mov 24(%rsi), %r11
-L(m2): mov %r9, 8(%rdi)
- ADCSBB 16(%rdx), %r10
- lea 32(%rsi), %rsi
-L(m1): mov %r10, 16(%rdi)
-L(m0): ADCSBB 24(%rdx), %r11
- mov (%rsi), %rax
- mov 8(%rsi), %r9
- lea 32(%rdx), %rdx
- dec %rcx
- mov 16(%rsi), %r10
- jnz L(top)
-
- mov %r11, 24(%rdi)
-L(e3): ADCSBB (%rdx), %rax
- mov %rax, 32(%rdi)
-L(e2): ADCSBB 8(%rdx), %r9
- mov %r9, 40(%rdi)
-L(e1): ADCSBB 16(%rdx), %r10
- mov %r10, 48(%rdi)
- mov R32(%rcx), R32(%rax) C zero rax
- adc R32(%rax), R32(%rax)
- ret
-EPILOGUE()
+include_mpn(`x86_64/coreisbr/aors_n.asm')
--- /dev/null
+dnl X86-64 mpn_copyd optimised for Intel Sandy Bridge.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_copyd)
+include_mpn(`x86_64/fastsse/copyd-palignr.asm')
--- /dev/null
+dnl X86-64 mpn_copyi optimised for Intel Sandy Bridge.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_copyi)
+include_mpn(`x86_64/fastsse/copyi-palignr.asm')
--- /dev/null
+dnl AMD64 mpn_divexact_1 -- mpn by limb exact division.
+
+dnl Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_divexact_1)
+include_mpn(`x86_64/nano/dive_1.asm')
/* Intel Atom/64 gmp-mparam.h -- Compiler/machine parameter header file.
Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
-2008, 2009, 2010 Free Software Foundation, Inc.
+2008, 2009, 2010, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#define GMP_LIMB_BITS 64
#define BYTES_PER_MP_LIMB 8
+#define SHLD_SLOW 1
+#define SHRD_SLOW 1
+
+/* These routines exists for all x86_64 chips, but they are slower on Atom
+ than separate add/sub and shift. Make sure they are not really used. */
+#undef HAVE_NATIVE_mpn_rsh1add_n
+#undef HAVE_NATIVE_mpn_rsh1sub_n
+
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 37
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 8
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 14
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 69
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 4
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD MP_SIZE_T_MAX
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10
#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 32
+#define BMOD_1_TO_MOD_1_THRESHOLD 15
#define MUL_TOOM22_THRESHOLD 10
#define MUL_TOOM33_THRESHOLD 66
#define MUL_TOOM8H_THRESHOLD 236
#define MUL_TOOM32_TO_TOOM43_THRESHOLD 65
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 76
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 73
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 66
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 131
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 81
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 78
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 88
#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
#define SQR_TOOM2_THRESHOLD 16
#define SQR_TOOM3_THRESHOLD 65
#define SQR_TOOM4_THRESHOLD 166
-#define SQR_TOOM6_THRESHOLD 226
+#define SQR_TOOM6_THRESHOLD 222
#define SQR_TOOM8_THRESHOLD 333
-#define MULMOD_BNM1_THRESHOLD 9
-#define SQRMOD_BNM1_THRESHOLD 9
+#define MULMID_TOOM42_THRESHOLD 14
+
+#define MULMOD_BNM1_THRESHOLD 7
+#define SQRMOD_BNM1_THRESHOLD 10
-#define MUL_FFT_MODF_THRESHOLD 208 /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD 212 /* k = 5 */
#define MUL_FFT_TABLE3 \
- { { 208, 5}, { 7, 4}, { 15, 5}, { 11, 6}, \
- { 6, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
- { 13, 7}, { 7, 6}, { 15, 7}, { 9, 6}, \
- { 19, 7}, { 13, 8}, { 7, 7}, { 16, 8}, \
- { 9, 7}, { 19, 8}, { 11, 7}, { 23, 8}, \
- { 13, 9}, { 7, 8}, { 15, 7}, { 31, 8}, \
- { 19, 9}, { 11, 8}, { 25,10}, { 7, 9}, \
- { 15, 8}, { 33, 9}, { 19, 8}, { 39, 9}, \
- { 23,10}, { 15, 9}, { 39,10}, { 23, 9}, \
- { 47,11}, { 15,10}, { 31, 9}, { 63, 8}, \
- { 127, 9}, { 67,10}, { 39, 9}, { 79, 8}, \
- { 159,10}, { 47,11}, { 31,10}, { 63, 9}, \
- { 127, 8}, { 255, 7}, { 511,10}, { 71, 9}, \
- { 143, 8}, { 287, 7}, { 575,10}, { 79, 9}, \
- { 159, 8}, { 319,11}, { 47, 9}, { 191,12}, \
- { 31,11}, { 63,10}, { 127, 9}, { 255, 8}, \
- { 511,10}, { 143, 9}, { 287, 8}, { 575,11}, \
- { 79,10}, { 159, 9}, { 319,10}, { 175, 9}, \
- { 351, 8}, { 703, 7}, { 1407,10}, { 191, 9}, \
- { 415,11}, { 111,10}, { 223, 9}, { 447,12}, \
- { 63,11}, { 127,10}, { 255, 9}, { 511,11}, \
- { 143,10}, { 287, 9}, { 575, 8}, { 1151,10}, \
- { 319,11}, { 175,10}, { 351, 9}, { 703, 8}, \
- { 1407,11}, { 191,10}, { 383, 9}, { 767,10}, \
- { 415,11}, { 223,10}, { 447, 9}, { 895,13}, \
- { 63,12}, { 127,11}, { 255,10}, { 511,11}, \
- { 287,10}, { 575, 9}, { 1151,12}, { 159,11}, \
- { 319,10}, { 639,11}, { 351,10}, { 703, 9}, \
- { 1407,12}, { 191,11}, { 383,10}, { 767,11}, \
- { 415,12}, { 223,11}, { 447,10}, { 895,11}, \
- { 479,13}, { 127,12}, { 255,11}, { 511,12}, \
- { 287,11}, { 575,10}, { 1151,12}, { 319,11}, \
- { 639,12}, { 351,11}, { 703,10}, { 1407,13}, \
- { 191,12}, { 383,11}, { 767,12}, { 415,11}, \
- { 831,10}, { 1663,12}, { 447,11}, { 895,14}, \
- { 127,13}, { 255,12}, { 511,11}, { 1023,12}, \
- { 575,11}, { 1151,13}, { 319,12}, { 703,11}, \
- { 1407,13}, { 383,12}, { 831,13}, { 447,12}, \
- { 895,14}, { 255,13}, { 511,12}, { 1023,13}, \
- { 575,12}, { 1151,13}, { 703,12}, { 1407,14}, \
- { 383,13}, { 831,12}, { 1663,13}, { 895,15}, \
- { 255,14}, { 511,13}, { 1023,12}, { 2175,13}, \
- { 1151,14}, { 639,13}, { 1407,12}, { 2815,14}, \
- { 767,13}, { 1663,14}, { 895,13}, { 1919,15}, \
- { 511,14}, { 1023,13}, { 2047,14}, { 1151,13}, \
- { 2431,14}, { 1407,13}, { 2815,15}, { 767,14}, \
- { 1663,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
+ { { 220, 5}, { 7, 4}, { 15, 5}, { 13, 6}, \
+ { 7, 5}, { 15, 6}, { 13, 7}, { 7, 6}, \
+ { 15, 7}, { 8, 6}, { 17, 7}, { 13, 8}, \
+ { 7, 7}, { 17, 8}, { 9, 7}, { 19, 8}, \
+ { 11, 7}, { 23, 8}, { 13, 9}, { 7, 8}, \
+ { 15, 7}, { 31, 8}, { 19, 9}, { 11, 8}, \
+ { 25,10}, { 7, 9}, { 15, 8}, { 33, 9}, \
+ { 19, 8}, { 39, 9}, { 23,10}, { 15, 9}, \
+ { 39,10}, { 23, 9}, { 47,11}, { 15,10}, \
+ { 31, 9}, { 67,10}, { 39, 9}, { 79, 8}, \
+ { 159,10}, { 47, 9}, { 95, 8}, { 191, 7}, \
+ { 383,10}, { 55,11}, { 31,10}, { 63, 9}, \
+ { 127, 8}, { 255,10}, { 71, 9}, { 143,10}, \
+ { 79, 9}, { 159,11}, { 47,10}, { 95, 9}, \
+ { 191, 8}, { 383,12}, { 31,11}, { 63, 9}, \
+ { 255,10}, { 143, 9}, { 287,11}, { 79,10}, \
+ { 159, 9}, { 319,10}, { 175, 9}, { 351, 8}, \
+ { 703,11}, { 95,10}, { 191, 9}, { 383,10}, \
+ { 207, 9}, { 415,11}, { 111,10}, { 223, 9}, \
+ { 447,12}, { 63,11}, { 127,10}, { 255, 9}, \
+ { 511,11}, { 143, 9}, { 575,10}, { 319,11}, \
+ { 175,10}, { 351, 9}, { 703,11}, { 191,10}, \
+ { 383,11}, { 223,13}, { 8192,14}, { 16384,15}, \
+ { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
{ 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
{8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 193
-#define MUL_FFT_THRESHOLD 1728
+#define MUL_FFT_TABLE3_SIZE 101
+#define MUL_FFT_THRESHOLD 2112
-#define SQR_FFT_MODF_THRESHOLD 208 /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD 184 /* k = 5 */
#define SQR_FFT_TABLE3 \
- { { 208, 5}, { 7, 4}, { 15, 5}, { 11, 6}, \
- { 6, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
- { 13, 7}, { 7, 6}, { 15, 7}, { 8, 6}, \
- { 17, 7}, { 17, 8}, { 9, 7}, { 19, 8}, \
- { 11, 7}, { 23, 8}, { 13, 9}, { 7, 8}, \
- { 19, 9}, { 11, 8}, { 25,10}, { 7, 9}, \
- { 15, 8}, { 31, 9}, { 19, 8}, { 39, 9}, \
- { 23,10}, { 15, 9}, { 39,10}, { 23, 9}, \
- { 47,11}, { 15,10}, { 31, 9}, { 67,10}, \
- { 39, 9}, { 79, 8}, { 159,10}, { 47,11}, \
- { 31,10}, { 63, 9}, { 127, 8}, { 255,10}, \
- { 71, 9}, { 143, 8}, { 287, 7}, { 575, 9}, \
- { 159, 8}, { 319,11}, { 47, 9}, { 191,12}, \
- { 31,11}, { 63,10}, { 127, 9}, { 255, 8}, \
- { 511,10}, { 143, 9}, { 287, 8}, { 575,10}, \
- { 159, 9}, { 319, 8}, { 639, 9}, { 351, 8}, \
- { 703,10}, { 191, 9}, { 383,10}, { 207, 9}, \
- { 415,11}, { 111,10}, { 223,12}, { 63,11}, \
- { 127,10}, { 255, 9}, { 511,11}, { 143,10}, \
- { 287, 9}, { 575,11}, { 159,10}, { 319, 9}, \
- { 639,11}, { 175,10}, { 351, 9}, { 703,11}, \
- { 191,10}, { 383,11}, { 207,10}, { 415,11}, \
- { 223,10}, { 447,13}, { 63,12}, { 127,11}, \
- { 255,10}, { 511,11}, { 287,10}, { 575,12}, \
- { 159,11}, { 319,10}, { 639,11}, { 351,10}, \
- { 703,12}, { 191,11}, { 383,10}, { 767,11}, \
- { 415,12}, { 223,11}, { 447,13}, { 127,12}, \
- { 255,11}, { 543,12}, { 287,11}, { 575,12}, \
- { 319,11}, { 639,12}, { 351,13}, { 191,12}, \
- { 383,11}, { 767,12}, { 415,11}, { 831,12}, \
- { 479,13}, { 255,10}, { 2047,12}, { 575,13}, \
- { 319,11}, { 1279,12}, { 703,13}, { 383,12}, \
- { 831,13}, { 447,12}, { 895,14}, { 255,13}, \
- { 511,12}, { 1023,13}, { 575,12}, { 1151,13}, \
- { 703,14}, { 383,13}, { 831,12}, { 1663,13}, \
- { 895,15}, { 255,14}, { 511,13}, { 1151,14}, \
- { 639,13}, { 1407,12}, { 2815,14}, { 767,13}, \
- { 1663,14}, { 895,13}, { 1791,15}, { 32768,16}, \
- { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
- {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 160
+ { { 188, 5}, { 6, 4}, { 13, 5}, { 7, 4}, \
+ { 15, 5}, { 11, 6}, { 6, 5}, { 13, 6}, \
+ { 7, 5}, { 15, 6}, { 13, 7}, { 7, 6}, \
+ { 15, 7}, { 13, 8}, { 7, 7}, { 17, 8}, \
+ { 9, 7}, { 19, 8}, { 11, 7}, { 23, 8}, \
+ { 13, 9}, { 7, 8}, { 19, 9}, { 11, 8}, \
+ { 25,10}, { 7, 9}, { 15, 8}, { 33, 9}, \
+ { 19, 8}, { 39, 9}, { 23,10}, { 15, 9}, \
+ { 39,10}, { 23, 9}, { 47,11}, { 15,10}, \
+ { 31, 9}, { 67,10}, { 39, 9}, { 79,10}, \
+ { 47, 7}, { 383, 9}, { 103,11}, { 31,10}, \
+ { 63, 9}, { 127, 8}, { 255, 7}, { 511, 8}, \
+ { 287, 7}, { 575,10}, { 79, 9}, { 159, 8}, \
+ { 319,11}, { 47, 9}, { 191,12}, { 31,11}, \
+ { 63,10}, { 127, 9}, { 255,10}, { 143, 7}, \
+ { 1151, 9}, { 351,11}, { 95,10}, { 191,11}, \
+ { 111,10}, { 223,12}, { 63, 9}, { 511,11}, \
+ { 143,10}, { 287, 9}, { 575,11}, { 159,10}, \
+ { 319, 9}, { 639,11}, { 175,10}, { 351, 9}, \
+ { 703,12}, { 95,11}, { 191,10}, { 383,11}, \
+ { 207,10}, { 415,11}, { 223,10}, { 447,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16}, { 65536,17}, \
+ { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+ {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 95
#define SQR_FFT_THRESHOLD 1600
-#define MULLO_BASECASE_THRESHOLD 0
-#define MULLO_DC_THRESHOLD 22
-#define MULLO_MUL_N_THRESHOLD 3176
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 33
+#define MULLO_MUL_N_THRESHOLD 4141
-#define DC_DIV_QR_THRESHOLD 26
-#define DC_DIVAPPR_Q_THRESHOLD 93
+#define DC_DIV_QR_THRESHOLD 27
+#define DC_DIVAPPR_Q_THRESHOLD 94
#define DC_BDIV_QR_THRESHOLD 27
-#define DC_BDIV_Q_THRESHOLD 62
+#define DC_BDIV_Q_THRESHOLD 54
#define INV_MULMOD_BNM1_THRESHOLD 18
#define INV_NEWTON_THRESHOLD 131
-#define INV_APPR_THRESHOLD 110
+#define INV_APPR_THRESHOLD 106
#define BINV_NEWTON_THRESHOLD 165
-#define REDC_1_TO_REDC_2_THRESHOLD 12
+#define REDC_1_TO_REDC_2_THRESHOLD 14
#define REDC_2_TO_REDC_N_THRESHOLD 36
#define MU_DIV_QR_THRESHOLD 792
#define MU_DIVAPPR_Q_THRESHOLD 807
#define MUPI_DIV_QR_THRESHOLD 67
#define MU_BDIV_QR_THRESHOLD 654
-#define MU_BDIV_Q_THRESHOLD 792
+#define MU_BDIV_Q_THRESHOLD 748
-#define MATRIX22_STRASSEN_THRESHOLD 13
-#define HGCD_THRESHOLD 83
-#define GCD_DC_THRESHOLD 198
-#define GCDEXT_DC_THRESHOLD 198
+#define POWM_SEC_TABLE 4,32,204,724,1926
+
+#define MATRIX22_STRASSEN_THRESHOLD 15
+#define HGCD_THRESHOLD 84
+#define HGCD_APPR_THRESHOLD 87
+#define HGCD_REDUCE_THRESHOLD 1182
+#define GCD_DC_THRESHOLD 195
+#define GCDEXT_DC_THRESHOLD 180
#define JACOBI_BASE_METHOD 2
-#define GET_STR_DC_THRESHOLD 15
-#define GET_STR_PRECOMPUTE_THRESHOLD 27
-#define SET_STR_DC_THRESHOLD 254
-#define SET_STR_PRECOMPUTE_THRESHOLD 1122
+#define GET_STR_DC_THRESHOLD 18
+#define GET_STR_PRECOMPUTE_THRESHOLD 32
+#define SET_STR_DC_THRESHOLD 256
+#define SET_STR_PRECOMPUTE_THRESHOLD 1151
+
+#define FAC_DSC_THRESHOLD 1065
+#define FAC_ODD_THRESHOLD 0 /* always */
--- /dev/null
+dnl AMD64 mpn_lshift -- mpn left shift, optimised for Atom.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 ?
+C AMD K10 ?
+C Intel P4 ?
+C Intel core2 ?
+C Intel NHM ?
+C Intel SBR ?
+C Intel atom 4.5
+C VIA nano ?
+
+C TODO
+C * Consider using 4-way unrolling. We reach 4 c/l, but the code is 2.5 times
+C larger.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`n', `%rdx')
+define(`cnt', `%rcx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_lshift)
+ FUNC_ENTRY(4)
+ lea -8(up,n,8), up
+ lea -8(rp,n,8), rp
+ shr R32(n)
+ mov (up), %rax
+ jnc L(evn)
+
+ mov %rax, %r11
+ shl R8(%rcx), %r11
+ neg R8(%rcx)
+ shr R8(%rcx), %rax
+ test n, n
+ jnz L(gt1)
+ mov %r11, (rp)
+ FUNC_EXIT()
+ ret
+
+L(gt1): mov -8(up), %r8
+ mov %r8, %r10
+ shr R8(%rcx), %r8
+ jmp L(lo1)
+
+L(evn): mov %rax, %r10
+ neg R8(%rcx)
+ shr R8(%rcx), %rax
+ mov -8(up), %r9
+ mov %r9, %r11
+ shr R8(%rcx), %r9
+ neg R8(%rcx)
+ dec n
+ lea 8(rp), rp
+ lea -8(up), up
+ jz L(end)
+
+ ALIGN(8)
+L(top): shl R8(%rcx), %r10
+ or %r10, %r9
+ shl R8(%rcx), %r11
+ neg R8(%rcx)
+ mov -8(up), %r8
+ mov %r8, %r10
+ mov %r9, -8(rp)
+ shr R8(%rcx), %r8
+ lea -16(rp), rp
+L(lo1): mov -16(up), %r9
+ or %r11, %r8
+ mov %r9, %r11
+ shr R8(%rcx), %r9
+ lea -16(up), up
+ neg R8(%rcx)
+ mov %r8, (rp)
+ dec n
+ jg L(top)
+
+L(end): shl R8(%rcx), %r10
+ or %r10, %r9
+ shl R8(%rcx), %r11
+ mov %r9, -8(rp)
+ mov %r11, -16(rp)
+ FUNC_EXIT()
+ ret
+EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_lshiftc -- mpn left shift with complement, optimised for Atom.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 ?
+C AMD K10 ?
+C Intel P4 ?
+C Intel core2 ?
+C Intel NHM ?
+C Intel SBR ?
+C Intel atom 5
+C VIA nano ?
+
+C TODO
+C * Consider using 4-way unrolling. We reach 4.5 c/l, but the code is 2.5
+C times larger.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`n', `%rdx')
+define(`cnt', `%rcx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_lshiftc)
+ FUNC_ENTRY(4)
+ lea -8(up,n,8), up
+ lea -8(rp,n,8), rp
+ shr R32(n)
+ mov (up), %rax
+ jnc L(evn)
+
+ mov %rax, %r11
+ shl R8(%rcx), %r11
+ neg R8(%rcx)
+ shr R8(%rcx), %rax
+ test n, n
+ jnz L(gt1)
+ not %r11
+ mov %r11, (rp)
+ FUNC_EXIT()
+ ret
+
+L(gt1): mov -8(up), %r8
+ mov %r8, %r10
+ shr R8(%rcx), %r8
+ jmp L(lo1)
+
+L(evn): mov %rax, %r10
+ neg R8(%rcx)
+ shr R8(%rcx), %rax
+ mov -8(up), %r9
+ mov %r9, %r11
+ shr R8(%rcx), %r9
+ neg R8(%rcx)
+ lea 8(rp), rp
+ lea -8(up), up
+ jmp L(lo0)
+
+C ALIGN(16)
+L(top): shl R8(%rcx), %r10
+ or %r10, %r9
+ shl R8(%rcx), %r11
+ not %r9
+ neg R8(%rcx)
+ mov -8(up), %r8
+ lea -16(rp), rp
+ mov %r8, %r10
+ shr R8(%rcx), %r8
+ mov %r9, 8(rp)
+L(lo1): or %r11, %r8
+ mov -16(up), %r9
+ mov %r9, %r11
+ shr R8(%rcx), %r9
+ lea -16(up), up
+ neg R8(%rcx)
+ not %r8
+ mov %r8, (rp)
+L(lo0): dec n
+ jg L(top)
+
+L(end): shl R8(%rcx), %r10
+ or %r10, %r9
+ not %r9
+ shl R8(%rcx), %r11
+ not %r11
+ mov %r9, -8(rp)
+ mov %r11, -16(rp)
+ FUNC_EXIT()
+ ret
+EPILOGUE()
--- /dev/null
+dnl x86-64 mpn_popcount.
+
+dnl Copyright 2007, 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_popcount)
+include_mpn(`x86/pentium4/sse2/popcount.asm')
--- /dev/null
+dnl x86-64 mpn_rsh1add_n/mpn_rsh1sub_n.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C TODO
+C * Schedule loop less. It is now almost surely overscheduled, resulting in
+C large feed-in and wind-down code.
+
+C cycles/limb
+C AMD K8,K9 ?
+C AMD K10 ?
+C Intel P4 ?
+C Intel core2 ?
+C Intel NMH ?
+C Intel SBR ?
+C Intel atom 5.25
+C VIA nano ?
+
+C INPUT PARAMETERS
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`vp',`%rdx')
+define(`n',`%rcx')
+
+ifdef(`OPERATION_rsh1add_n', `
+ define(ADDSUB, add)
+ define(ADCSBB, adc)
+ define(func_n, mpn_rsh1add_n)
+ define(func_nc, mpn_rsh1add_nc)')
+ifdef(`OPERATION_rsh1sub_n', `
+ define(ADDSUB, sub)
+ define(ADCSBB, sbb)
+ define(func_n, mpn_rsh1sub_n)
+ define(func_nc, mpn_rsh1sub_nc)')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1sub_n)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(func_n)
+ FUNC_ENTRY(4)
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ mov (up), %r15
+ ADDSUB (vp), %r15
+ sbb R32(%rbx), R32(%rbx)
+ xor R32(%rax), R32(%rax)
+ shr %r15
+ adc R32(%rax), R32(%rax) C return value
+
+ mov R32(n), R32(%rbp)
+ and $3, R32(%rbp)
+ jz L(b0)
+ cmp $2, R32(%rbp)
+ jae L(b23)
+
+L(b1): dec n
+ jnz L(gt1)
+ shl $63, %rbx
+ add %rbx, %r15
+ mov %r15, (rp)
+ jmp L(cj1)
+L(gt1): lea 24(up), up
+ lea 24(vp), vp
+ mov -16(up), %r9
+ add R32(%rbx), R32(%rbx)
+ mov -8(up), %r10
+ lea 24(rp), rp
+ mov (up), %r11
+ ADCSBB -16(vp), %r9
+ ADCSBB -8(vp), %r10
+ mov %r15, %r12
+ ADCSBB (vp), %r11
+ mov %r9, %r13
+ sbb R32(%rbx), R32(%rbx)
+ mov %r11, %r15
+ mov %r10, %r14
+ shl $63, %r11
+ shl $63, %r10
+ shl $63, %r9
+ or %r9, %r12
+ shr %r13
+ mov 8(up), %r8
+ shr %r14
+ or %r10, %r13
+ shr %r15
+ or %r11, %r14
+ sub $4, n
+ jz L(cj5)
+L(gt5): mov 16(up), %r9
+ add R32(%rbx), R32(%rbx)
+ mov 24(up), %r10
+ ADCSBB 8(vp), %r8
+ mov %r15, %rbp
+ mov 32(up), %r11
+ jmp L(lo1)
+
+L(b23): jnz L(b3)
+ mov 8(up), %r8
+ sub $2, n
+ jnz L(gt2)
+ add R32(%rbx), R32(%rbx)
+ ADCSBB 8(vp), %r8
+ mov %r8, %r12
+ jmp L(cj2)
+L(gt2): mov 16(up), %r9
+ add R32(%rbx), R32(%rbx)
+ mov 24(up), %r10
+ ADCSBB 8(vp), %r8
+ mov %r15, %rbp
+ mov 32(up), %r11
+ ADCSBB 16(vp), %r9
+ lea 32(up), up
+ ADCSBB 24(vp), %r10
+ mov %r9, %r13
+ ADCSBB 32(vp), %r11
+ mov %r8, %r12
+ jmp L(lo2)
+
+L(b3): lea 40(up), up
+ lea 8(vp), vp
+ mov %r15, %r14
+ add R32(%rbx), R32(%rbx)
+ mov -32(up), %r11
+ ADCSBB 0(vp), %r11
+ lea 8(rp), rp
+ sbb R32(%rbx), R32(%rbx)
+ mov %r11, %r15
+ shl $63, %r11
+ mov -24(up), %r8
+ shr %r15
+ or %r11, %r14
+ sub $3, n
+ jnz L(gt3)
+ add R32(%rbx), R32(%rbx)
+ ADCSBB 8(vp), %r8
+ jmp L(cj3)
+L(gt3): mov -16(up), %r9
+ add R32(%rbx), R32(%rbx)
+ mov -8(up), %r10
+ ADCSBB 8(vp), %r8
+ mov %r15, %rbp
+ mov (up), %r11
+ ADCSBB 16(vp), %r9
+ ADCSBB 24(vp), %r10
+ mov %r8, %r12
+ jmp L(lo3)
+
+L(b0): lea 48(up), up
+ lea 16(vp), vp
+ add R32(%rbx), R32(%rbx)
+ mov -40(up), %r10
+ lea 16(rp), rp
+ mov -32(up), %r11
+ ADCSBB -8(vp), %r10
+ mov %r15, %r13
+ ADCSBB (vp), %r11
+ sbb R32(%rbx), R32(%rbx)
+ mov %r11, %r15
+ mov %r10, %r14
+ shl $63, %r11
+ shl $63, %r10
+ mov -24(up), %r8
+ shr %r14
+ or %r10, %r13
+ shr %r15
+ or %r11, %r14
+ sub $4, n
+ jnz L(gt4)
+ add R32(%rbx), R32(%rbx)
+ ADCSBB 8(vp), %r8
+ jmp L(cj4)
+L(gt4): mov -16(up), %r9
+ add R32(%rbx), R32(%rbx)
+ mov -8(up), %r10
+ ADCSBB 8(vp), %r8
+ mov %r15, %rbp
+ mov (up), %r11
+ ADCSBB 16(vp), %r9
+ jmp L(lo0)
+
+ ALIGN(8)
+L(top): mov 16(up), %r9
+ shr %r14
+ or %r10, %r13
+ shr %r15
+ or %r11, %r14
+ add R32(%rbx), R32(%rbx)
+ mov 24(up), %r10
+ mov %rbp, (rp)
+ ADCSBB 8(vp), %r8
+ mov %r15, %rbp
+ lea 32(rp), rp
+ mov 32(up), %r11
+L(lo1): ADCSBB 16(vp), %r9
+ lea 32(up), up
+ mov %r12, -24(rp)
+L(lo0): ADCSBB 24(vp), %r10
+ mov %r8, %r12
+ mov %r13, -16(rp)
+L(lo3): ADCSBB 32(vp), %r11
+ mov %r9, %r13
+ mov %r14, -8(rp)
+L(lo2): sbb R32(%rbx), R32(%rbx)
+ shl $63, %r8
+ mov %r11, %r15
+ shr %r12
+ mov %r10, %r14
+ shl $63, %r9
+ lea 32(vp), vp
+ shl $63, %r10
+ or %r8, %rbp
+ shl $63, %r11
+ or %r9, %r12
+ shr %r13
+ mov 8(up), %r8
+ sub $4, n
+ jg L(top)
+
+L(end): shr %r14
+ or %r10, %r13
+ shr %r15
+ or %r11, %r14
+ mov %rbp, (rp)
+ lea 32(rp), rp
+L(cj5): add R32(%rbx), R32(%rbx)
+ ADCSBB 8(vp), %r8
+ mov %r12, -24(rp)
+L(cj4): mov %r13, -16(rp)
+L(cj3): mov %r8, %r12
+ mov %r14, -8(rp)
+L(cj2): sbb R32(%rbx), R32(%rbx)
+ shl $63, %r8
+ shr %r12
+ or %r8, %r15
+ shl $63, %rbx
+ add %rbx, %r12
+ mov %r15, (rp)
+ mov %r12, 8(rp)
+L(cj1): pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+ FUNC_EXIT()
+ ret
+EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_rshift -- mpn right shift, optimised for Atom.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 ?
+C AMD K10 ?
+C Intel P4 ?
+C Intel core2 ?
+C Intel NHM ?
+C Intel SBR ?
+C Intel atom 4.5
+C VIA nano ?
+
+C TODO
+C * Consider using 4-way unrolling. We reach 4 c/l, but the code is 2.5 times
+C larger.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`n', `%rdx')
+define(`cnt', `%rcx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_rshift)
+ FUNC_ENTRY(4)
+ shr R32(n)
+ mov (up), %rax
+ jnc L(evn)
+
+ mov %rax, %r11
+ shr R8(cnt), %r11
+ neg R8(cnt)
+ shl R8(cnt), %rax
+ test n, n
+ jnz L(gt1)
+ mov %r11, (rp)
+ FUNC_EXIT()
+ ret
+
+L(gt1): mov 8(up), %r8
+ mov %r8, %r10
+ shl R8(cnt), %r8
+ jmp L(lo1)
+
+L(evn): mov %rax, %r10
+ neg R8(cnt)
+ shl R8(cnt), %rax
+ mov 8(up), %r9
+ mov %r9, %r11
+ shl R8(cnt), %r9
+ neg R8(cnt)
+ dec n
+ lea -8(rp), rp
+ lea 8(up), up
+ jz L(end)
+
+ ALIGN(8)
+L(top): shr R8(cnt), %r10
+ or %r10, %r9
+ shr R8(cnt), %r11
+ neg R8(cnt)
+ mov 8(up), %r8
+ mov %r8, %r10
+ mov %r9, 8(rp)
+ shl R8(cnt), %r8
+ lea 16(rp), rp
+L(lo1): mov 16(up), %r9
+ or %r11, %r8
+ mov %r9, %r11
+ shl R8(cnt), %r9
+ lea 16(up), up
+ neg R8(cnt)
+ mov %r8, (rp)
+ dec n
+ jg L(top)
+
+L(end): shr R8(cnt), %r10
+ or %r10, %r9
+ shr R8(cnt), %r11
+ mov %r9, 8(rp)
+ mov %r11, 16(rp)
+ FUNC_EXIT()
+ ret
+EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_sublsh1_n -- rp[] = up[] - (vp[] << 1) optimised for Intel Atom.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C TODO
+C * This code is slightly large at 501 bytes.
+C * aorrlsh1_n.asm and this file use the same basic pattern.
+
+C cycles/limb
+C AMD K8,K9 ?
+C AMD K10 ?
+C Intel P4 ?
+C Intel core2 ?
+C Intel NHM ?
+C Intel SBR ?
+C Intel atom 5 (4.875 is probably possible)
+C VIA nano ?
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`vp', `%rdx')
+define(`n', `%rcx')
+define(`cy', `%r8')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_sublsh1_n)
+ FUNC_ENTRY(4)
+ push %rbp
+ push %r15
+ xor R32(%rbp), R32(%rbp)
+L(ent): mov R32(n), R32(%rax)
+ and $3, R32(%rax)
+ jz L(b0)
+ cmp $2, R32(%rax)
+ jz L(b2)
+ jg L(b3)
+
+L(b1): mov (vp), %r8
+ add %r8, %r8
+ lea 8(vp), vp
+ sbb R32(%rax), R32(%rax) C save scy
+ add R32(%rbp), R32(%rbp) C restore acy
+ mov (up), %r15
+ sbb %r8, %r15
+ mov %r15, (rp)
+ sbb R32(%rbp), R32(%rbp) C save acy
+ lea 8(up), up
+ lea 8(rp), rp
+ jmp L(b0)
+
+L(b2): mov (vp), %r8
+ add %r8, %r8
+ mov 8(vp), %r9
+ adc %r9, %r9
+ lea 16(vp), vp
+ sbb R32(%rax), R32(%rax) C save scy
+ add R32(%rbp), R32(%rbp) C restore acy
+ mov (up), %r15
+ sbb %r8, %r15
+ mov %r15, (rp)
+ mov 8(up), %r15
+ sbb %r9, %r15
+ mov %r15, 8(rp)
+ sbb R32(%rbp), R32(%rbp) C save acy
+ lea 16(up), up
+ lea 16(rp), rp
+ jmp L(b0)
+
+L(b3): mov (vp), %r8
+ add %r8, %r8
+ mov 8(vp), %r9
+ adc %r9, %r9
+ mov 16(vp), %r10
+ adc %r10, %r10
+ lea 24(vp), vp
+ sbb R32(%rax), R32(%rax) C save scy
+ add R32(%rbp), R32(%rbp) C restore acy
+ mov (up), %r15
+ sbb %r8, %r15
+ mov %r15, (rp)
+ mov 8(up), %r15
+ sbb %r9, %r15
+ mov %r15, 8(rp)
+ mov 16(up), %r15
+ sbb %r10, %r15
+ mov %r15, 16(rp)
+ sbb R32(%rbp), R32(%rbp) C save acy
+ lea 24(up), up
+ lea 24(rp), rp
+
+L(b0): test $4, R8(n)
+ jz L(skp)
+ add R32(%rax), R32(%rax) C restore scy
+ mov (vp), %r8
+ adc %r8, %r8
+ mov 8(vp), %r9
+ adc %r9, %r9
+ mov 16(vp), %r10
+ adc %r10, %r10
+ mov 24(vp), %r11
+ adc %r11, %r11
+ lea 32(vp), vp
+ sbb R32(%rax), R32(%rax) C save scy
+ add R32(%rbp), R32(%rbp) C restore acy
+ mov (up), %r15
+ sbb %r8, %r15
+ mov %r15, (rp)
+ mov 8(up), %r15
+ sbb %r9, %r15
+ mov %r15, 8(rp)
+ mov 16(up), %r15
+ sbb %r10, %r15
+ mov %r15, 16(rp)
+ mov 24(up), %r15
+ sbb %r11, %r15
+ mov %r15, 24(rp)
+ lea 32(up), up
+ lea 32(rp), rp
+ sbb R32(%rbp), R32(%rbp) C save acy
+
+L(skp): cmp $8, n
+ jl L(rtn)
+
+ push %r12
+ push %r13
+ push %r14
+ push %rbx
+ lea -64(rp), rp
+ jmp L(x)
+
+ ALIGN(16)
+L(top): mov (vp), %r8
+ add R32(%rax), R32(%rax)
+ lea 64(vp), vp
+ adc %r8, %r8
+ mov -56(vp), %r9
+ adc %r9, %r9
+ mov -48(vp), %r10
+ adc %r10, %r10
+ mov -40(vp), %r11
+ adc %r11, %r11
+ mov -32(vp), %r12
+ adc %r12, %r12
+ mov -24(vp), %r13
+ adc %r13, %r13
+ mov -16(vp), %r14
+ adc %r14, %r14
+ mov -8(vp), %r15
+ adc %r15, %r15
+ sbb R32(%rax), R32(%rax)
+ add R32(%rbp), R32(%rbp)
+ mov (up), %rbp
+ lea 64(rp), rp
+ mov 8(up), %rbx
+ sbb %r8, %rbp
+ mov 32(up), %r8
+ mov %rbp, (rp)
+ sbb %r9, %rbx
+ mov 16(up), %rbp
+ mov %rbx, 8(rp)
+ sbb %r10, %rbp
+ mov 24(up), %rbx
+ mov %rbp, 16(rp)
+ sbb %r11, %rbx
+ mov %rbx, 24(rp)
+ sbb %r12, %r8
+ mov 40(up), %r9
+ mov %r8, 32(rp)
+ sbb %r13, %r9
+ mov 48(up), %rbp
+ mov %r9, 40(rp)
+ sbb %r14, %rbp
+ mov 56(up), %rbx
+ mov %rbp, 48(rp)
+ sbb %r15, %rbx
+ lea 64(up), up
+ mov %rbx, 56(rp)
+ sbb R32(%rbp), R32(%rbp)
+L(x): sub $8, n
+ jge L(top)
+
+L(end): pop %rbx
+ pop %r14
+ pop %r13
+ pop %r12
+L(rtn):
+ add R32(%rbp), R32(%rax)
+ neg R32(%rax)
+
+ pop %r15
+ pop %rbp
+ FUNC_EXIT()
+ ret
+EPILOGUE()
+PROLOGUE(mpn_sublsh1_nc)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8 ')
+ push %rbp
+ push %r15
+ neg %r8 C set CF
+ sbb R32(%rbp), R32(%rbp) C save acy
+ jmp L(ent)
+EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_addmul_1 and mpn_submul_1 optimised for AMD Bulldozer.
+
+dnl Copyright 2003, 2004, 2005, 2007, 2008, 2011, 2012 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9
+C AMD K10
+C AMD bd1 4.5-4.7
+C AMD bobcat
+C Intel P4
+C Intel core2
+C Intel NHM
+C Intel SBR
+C Intel atom
+C VIA nano
+
+C The loop of this code is the result of running a code generation and
+C optimisation tool suite written by David Harvey and Torbjorn Granlund.
+
+C TODO
+C * Try to make loop run closer to 4 c/l.
+
+define(`rp', `%rdi') C rcx
+define(`up', `%rsi') C rdx
+define(`n_param', `%rdx') C r8
+define(`v0', `%rcx') C r9
+
+define(`n', `%r11')
+
+ifdef(`OPERATION_addmul_1',`
+ define(`ADDSUB', `add')
+ define(`func', `mpn_addmul_1')
+')
+ifdef(`OPERATION_submul_1',`
+ define(`ADDSUB', `sub')
+ define(`func', `mpn_submul_1')
+')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+
+IFDOS(` define(`up', ``%rsi'') ') dnl
+IFDOS(` define(`rp', ``%rcx'') ') dnl
+IFDOS(` define(`v0', ``%r9'') ') dnl
+IFDOS(` define(`r9', ``rdi'') ') dnl
+IFDOS(` define(`n', ``%r8'') ') dnl
+IFDOS(` define(`r8', ``r11'') ') dnl
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(func)
+IFDOS(``push %rsi '')
+IFDOS(``push %rdi '')
+IFDOS(``mov %rdx, %rsi '')
+
+ mov (up), %rax C read first u limb early
+ push %rbx
+IFSTD(` mov n_param, %rbx ') C move away n from rdx, mul uses it
+IFDOS(` mov n, %rbx ')
+ mul v0
+
+IFSTD(` mov %rbx, n ')
+
+ and $3, R32(%rbx)
+ lea -16(rp,n,8), rp
+ jz L(b0)
+ cmp $2, R32(%rbx)
+ jb L(b1)
+ jz L(b2)
+
+L(b3): mov $0, R32(%r8)
+ mov %rax, %rbx
+ mov $0, R32(%r9)
+ mov 8(up), %rax
+ mov %rdx, %r10
+ lea (up,n,8), up
+ not n
+ jmp L(L3)
+
+L(b0): mov $0, R32(%r10)
+ mov %rax, %r8
+ mov %rdx, %rbx
+ mov 8(up), %rax
+ lea (up,n,8), up
+ neg n
+ jmp L(L0)
+
+L(b1): cmp $1, n
+ jz L(n1)
+ mov %rax, %r9
+ mov 8(up), %rax
+ mov %rdx, %r8
+ mov $0, R32(%rbx)
+ lea (up,n,8), up
+ neg n
+ inc n
+ jmp L(L1)
+
+L(b2): mov $0, R32(%rbx)
+ mov %rax, %r10
+ mov %rdx, %r9
+ mov 8(up), %rax
+ mov $0, R32(%r8)
+ lea (up,n,8), up
+ neg n
+ add $2, n
+ jns L(end)
+
+ ALIGN(32)
+L(top): mul v0
+ ADDSUB %r10, (rp,n,8)
+ adc %rax, %r9
+ mov (up,n,8), %rax
+ adc %rdx, %r8
+L(L1): mul v0
+ mov $0, R32(%r10)
+ ADDSUB %r9, 8(rp,n,8)
+ adc %rax, %r8
+ adc %rdx, %rbx
+ mov 8(up,n,8), %rax
+L(L0): mul v0
+ ADDSUB %r8, 16(rp,n,8)
+ mov $0, R32(%r8)
+ adc %rax, %rbx
+ mov $0, R32(%r9)
+ mov 16(up,n,8), %rax
+ adc %rdx, %r10
+L(L3): mul v0
+ ADDSUB %rbx, 24(rp,n,8)
+ mov $0, R32(%rbx)
+ adc %rax, %r10
+ adc %rdx, %r9
+ mov 24(up,n,8), %rax
+ add $4, n
+ js L(top)
+
+L(end): mul v0
+ ADDSUB %r10, (rp)
+ adc %r9, %rax
+ adc %r8, %rdx
+L(n1): ADDSUB %rax, 8(rp)
+ adc $0, %rdx
+ mov %rdx, %rax
+
+ pop %rbx
+IFDOS(``pop %rdi '')
+IFDOS(``pop %rsi '')
+ ret
+EPILOGUE()
+ASM_END()
--- /dev/null
+dnl AMD64 mpn_gcd_1.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_gcd_1)
+include_mpn(`x86_64/core2/gcd_1.asm')
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 7
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 5
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 12
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 14
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 5
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 24
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 34
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 11
#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 24
+#define BMOD_1_TO_MOD_1_THRESHOLD 20
-#define MUL_TOOM22_THRESHOLD 18
-#define MUL_TOOM33_THRESHOLD 53
+#define MUL_TOOM22_THRESHOLD 16
+#define MUL_TOOM33_THRESHOLD 57
#define MUL_TOOM44_THRESHOLD 154
-#define MUL_TOOM6H_THRESHOLD 274
-#define MUL_TOOM8H_THRESHOLD 466
+#define MUL_TOOM6H_THRESHOLD 250
+#define MUL_TOOM8H_THRESHOLD 309
#define MUL_TOOM32_TO_TOOM43_THRESHOLD 97
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 140
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 108
#define MUL_TOOM42_TO_TOOM53_THRESHOLD 105
#define MUL_TOOM42_TO_TOOM63_THRESHOLD 109
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 143
#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
#define SQR_TOOM2_THRESHOLD 24
-#define SQR_TOOM3_THRESHOLD 85
-#define SQR_TOOM4_THRESHOLD 119
+#define SQR_TOOM3_THRESHOLD 139
+#define SQR_TOOM4_THRESHOLD 218
#define SQR_TOOM6_THRESHOLD 318
-#define SQR_TOOM8_THRESHOLD 502
+#define SQR_TOOM8_THRESHOLD 434
+
+#define MULMID_TOOM42_THRESHOLD 22
#define MULMOD_BNM1_THRESHOLD 11
-#define SQRMOD_BNM1_THRESHOLD 16
+#define SQRMOD_BNM1_THRESHOLD 13
-#define MUL_FFT_MODF_THRESHOLD 412 /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD 396 /* k = 5 */
#define MUL_FFT_TABLE3 \
- { { 412, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \
- { 11, 5}, { 23, 6}, { 21, 7}, { 11, 6}, \
- { 23, 7}, { 21, 8}, { 11, 7}, { 25, 8}, \
- { 13, 7}, { 28, 8}, { 15, 7}, { 31, 8}, \
- { 21, 9}, { 11, 8}, { 27, 9}, { 15, 8}, \
- { 33, 9}, { 19, 8}, { 41, 9}, { 23, 8}, \
- { 47, 9}, { 27,10}, { 15, 9}, { 31, 8}, \
- { 63, 9}, { 39,10}, { 23, 9}, { 51,11}, \
- { 15,10}, { 31, 9}, { 67,10}, { 39, 9}, \
- { 79,10}, { 47, 9}, { 95,10}, { 55,11}, \
- { 31,10}, { 79,11}, { 47,10}, { 103,12}, \
- { 31,11}, { 63,10}, { 127,11}, { 79,10}, \
- { 175,11}, { 95,10}, { 191,12}, { 63,11}, \
- { 127,10}, { 255,11}, { 143,10}, { 287,11}, \
- { 159,12}, { 95,13}, { 63,12}, { 127,11}, \
- { 271, 9}, { 1087,11}, { 287,10}, { 575,11}, \
- { 303,12}, { 159,11}, { 319,10}, { 671,11}, \
- { 351,12}, { 191,11}, { 383,10}, { 767,11}, \
- { 415,12}, { 223,11}, { 447,13}, { 127,12}, \
- { 255,11}, { 543,12}, { 287,11}, { 575,10}, \
- { 1215,12}, { 319,11}, { 639,12}, { 351,13}, \
- { 191,12}, { 383,11}, { 767,12}, { 415,11}, \
- { 831,10}, { 1663,12}, { 447,14}, { 127,13}, \
- { 255,12}, { 543,11}, { 1087,10}, { 2175,12}, \
- { 575,11}, { 1151,12}, { 607,11}, { 1215,13}, \
- { 319,12}, { 639,11}, { 1279,12}, { 671,11}, \
- { 1343,10}, { 2687,12}, { 703,13}, { 383,12}, \
- { 767,11}, { 1535,12}, { 831,13}, { 447,12}, \
- { 959,14}, { 255,13}, { 511,12}, { 1087,11}, \
- { 2175,13}, { 575,12}, { 1215,11}, { 2431,10}, \
- { 4863,13}, { 639,12}, { 1343,11}, { 2687,13}, \
- { 703,12}, { 1407,14}, { 383,13}, { 767,12}, \
- { 1535,13}, { 831,12}, { 1663,13}, { 959,15}, \
- { 255,14}, { 511,13}, { 1087,12}, { 2175,13}, \
- { 1215,12}, { 2431,11}, { 4863,14}, { 639,13}, \
- { 1343,12}, { 2687,13}, { 1471,12}, { 2943,11}, \
- { 5887,14}, { 767,13}, { 1599,12}, { 3199,13}, \
- { 1727,14}, { 895,13}, { 1919,12}, { 3839,15}, \
- { 511,14}, { 1023,13}, { 2175,14}, { 1151,13}, \
- { 2431,12}, { 4863,14}, { 16384,15}, { 32768,16}, \
- { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
- {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 168
+ { { 380, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
+ { 23, 7}, { 12, 6}, { 25, 7}, { 13, 6}, \
+ { 27, 7}, { 15, 6}, { 31, 7}, { 25, 8}, \
+ { 13, 7}, { 27, 8}, { 15, 7}, { 32, 8}, \
+ { 17, 7}, { 35, 8}, { 21, 9}, { 11, 8}, \
+ { 27, 9}, { 15, 8}, { 35, 9}, { 19, 8}, \
+ { 41, 9}, { 23, 8}, { 47, 9}, { 27,10}, \
+ { 15, 9}, { 31, 8}, { 63, 9}, { 35, 8}, \
+ { 71, 9}, { 39,10}, { 23, 9}, { 51,11}, \
+ { 15,10}, { 31, 9}, { 71,10}, { 39, 9}, \
+ { 87,10}, { 47, 9}, { 99,10}, { 55,11}, \
+ { 31,10}, { 87,11}, { 47,10}, { 103,12}, \
+ { 31,11}, { 63,10}, { 135,11}, { 79,10}, \
+ { 167,11}, { 95,12}, { 63,11}, { 127,10}, \
+ { 255,11}, { 143,10}, { 287, 9}, { 575,10}, \
+ { 303,11}, { 159,12}, { 95,11}, { 191,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16}, { 65536,17}, \
+ { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+ {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 75
#define MUL_FFT_THRESHOLD 4736
-#define SQR_FFT_MODF_THRESHOLD 368 /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD 340 /* k = 5 */
#define SQR_FFT_TABLE3 \
- { { 368, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \
- { 21, 7}, { 11, 6}, { 23, 7}, { 21, 8}, \
- { 11, 7}, { 25, 8}, { 13, 7}, { 28, 8}, \
- { 15, 7}, { 31, 8}, { 17, 7}, { 35, 8}, \
- { 19, 7}, { 39, 8}, { 27, 9}, { 15, 8}, \
- { 35, 9}, { 19, 8}, { 41, 9}, { 23, 8}, \
+ { { 332, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \
+ { 11, 5}, { 23, 6}, { 25, 7}, { 25, 8}, \
+ { 13, 7}, { 27, 8}, { 15, 7}, { 31, 8}, \
+ { 21, 9}, { 11, 8}, { 27, 9}, { 15, 8}, \
+ { 33, 9}, { 19, 8}, { 41, 9}, { 23, 8}, \
{ 47, 9}, { 27,10}, { 15, 9}, { 39,10}, \
- { 23, 9}, { 51,11}, { 15,10}, { 31, 9}, \
+ { 23, 9}, { 47,11}, { 15,10}, { 31, 9}, \
{ 67,10}, { 39, 9}, { 79,10}, { 47, 9}, \
{ 95,10}, { 55,11}, { 31,10}, { 79,11}, \
{ 47,10}, { 95,12}, { 31,11}, { 63,10}, \
- { 135,11}, { 79,10}, { 159,11}, { 95,10}, \
- { 191,11}, { 111,12}, { 63,11}, { 127,10}, \
- { 255, 9}, { 543,11}, { 143, 9}, { 575,12}, \
- { 95,11}, { 191,13}, { 63,12}, { 127,11}, \
- { 255,10}, { 511,11}, { 271,10}, { 543,11}, \
- { 287,10}, { 575,11}, { 303,12}, { 159,11}, \
- { 335,12}, { 191,11}, { 415,12}, { 223,11}, \
- { 447,10}, { 895,13}, { 127,12}, { 255,11}, \
- { 319,11}, { 639,10}, { 1279,12}, { 351,13}, \
- { 191,12}, { 383,11}, { 767,12}, { 415,11}, \
- { 831,10}, { 1663,12}, { 447,11}, { 895,14}, \
- { 127,13}, { 255,12}, { 511,11}, { 1023,12}, \
- { 543,11}, { 1087,10}, { 2175,12}, { 575,11}, \
- { 1151,12}, { 607,13}, { 319,12}, { 639,11}, \
- { 1279,12}, { 671,11}, { 1343,10}, { 2687,12}, \
- { 703,13}, { 383,12}, { 767,11}, { 1599,12}, \
- { 831,13}, { 447,12}, { 959,14}, { 255,13}, \
- { 511,12}, { 1087,11}, { 2175,13}, { 575,12}, \
- { 1151,11}, { 2303,12}, { 1215,11}, { 2431,10}, \
- { 4863,13}, { 639,12}, { 1343,11}, { 2687,13}, \
- { 703,12}, { 1407,14}, { 383,13}, { 767,12}, \
- { 1599,13}, { 831,12}, { 1727,13}, { 895,15}, \
- { 255,14}, { 511,13}, { 1087,12}, { 2175,13}, \
- { 1215,12}, { 2431,11}, { 4863,14}, { 639,13}, \
- { 1343,12}, { 2687,13}, { 1471,12}, { 2943,11}, \
- { 5887,14}, { 767,13}, { 1599,12}, { 3199,13}, \
- { 1727,14}, { 895,13}, { 1919,12}, { 3839,15}, \
- { 511,14}, { 1023,13}, { 2175,14}, { 1151,13}, \
- { 2431,12}, { 4863,14}, { 16384,15}, { 32768,16}, \
- { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
- {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 172
+ { 127, 9}, { 255,10}, { 135,11}, { 79,10}, \
+ { 159,11}, { 95,10}, { 191,12}, { 63,11}, \
+ { 127,10}, { 255, 9}, { 511,10}, { 271,11}, \
+ { 143,10}, { 303,11}, { 159,10}, { 319,12}, \
+ { 95,11}, { 191,10}, { 383,11}, { 207,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16}, { 65536,17}, \
+ { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+ {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 71
#define SQR_FFT_THRESHOLD 3264
-#define MULLO_BASECASE_THRESHOLD 4
-#define MULLO_DC_THRESHOLD 30
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 37
#define MULLO_MUL_N_THRESHOLD 8648
-#define DC_DIV_QR_THRESHOLD 38
-#define DC_DIVAPPR_Q_THRESHOLD 187
+#define DC_DIV_QR_THRESHOLD 57
+#define DC_DIVAPPR_Q_THRESHOLD 204
#define DC_BDIV_QR_THRESHOLD 48
-#define DC_BDIV_Q_THRESHOLD 92
-
-#define INV_MULMOD_BNM1_THRESHOLD 49
-#define INV_NEWTON_THRESHOLD 202
-#define INV_APPR_THRESHOLD 197
-
-#define BINV_NEWTON_THRESHOLD 246
-#define REDC_1_TO_REDC_2_THRESHOLD 55
-#define REDC_2_TO_REDC_N_THRESHOLD 0 /* anomaly: never REDC_2 */
-
-#define MU_DIV_QR_THRESHOLD 1470
-#define MU_DIVAPPR_Q_THRESHOLD 1470
-#define MUPI_DIV_QR_THRESHOLD 90
-#define MU_BDIV_QR_THRESHOLD 1187
-#define MU_BDIV_Q_THRESHOLD 1470
-
-#define MATRIX22_STRASSEN_THRESHOLD 15
-#define HGCD_THRESHOLD 96
-#define GCD_DC_THRESHOLD 400
-#define GCDEXT_DC_THRESHOLD 288
-#define JACOBI_BASE_METHOD 1
-
-#define GET_STR_DC_THRESHOLD 12
-#define GET_STR_PRECOMPUTE_THRESHOLD 27
-#define SET_STR_DC_THRESHOLD 172
-#define SET_STR_PRECOMPUTE_THRESHOLD 1341
+#define DC_BDIV_Q_THRESHOLD 107
+
+#define INV_MULMOD_BNM1_THRESHOLD 30
+#define INV_NEWTON_THRESHOLD 228
+#define INV_APPR_THRESHOLD 214
+
+#define BINV_NEWTON_THRESHOLD 248
+#define REDC_1_TO_REDC_2_THRESHOLD 51
+#define REDC_2_TO_REDC_N_THRESHOLD 0 /* always */
+
+#define MU_DIV_QR_THRESHOLD 1334
+#define MU_DIVAPPR_Q_THRESHOLD 1387
+#define MUPI_DIV_QR_THRESHOLD 108
+#define MU_BDIV_QR_THRESHOLD 1142
+#define MU_BDIV_Q_THRESHOLD 1308
+
+#define POWM_SEC_TABLE 2,44,411,580,2246
+
+#define MATRIX22_STRASSEN_THRESHOLD 17
+#define HGCD_THRESHOLD 117
+#define HGCD_APPR_THRESHOLD 50
+#define HGCD_REDUCE_THRESHOLD 2681
+#define GCD_DC_THRESHOLD 487
+#define GCDEXT_DC_THRESHOLD 318
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 13
+#define GET_STR_PRECOMPUTE_THRESHOLD 20
+#define SET_STR_DC_THRESHOLD 418
+#define SET_STR_PRECOMPUTE_THRESHOLD 1340
+
+#define FAC_DSC_THRESHOLD 462
+#define FAC_ODD_THRESHOLD 0 /* always */
--- /dev/null
+dnl AMD64 mpn_hamdist -- hamming distance.
+
+dnl Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_hamdist)
+include_mpn(`x86_64/k10/hamdist.asm')
--- /dev/null
+dnl AMD64 mpn_mul_1 optimised for AMD Bulldozer.
+
+dnl Copyright 2003, 2004, 2005, 2007, 2008, 2011, 2012 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9
+C AMD K10
+C AMD bd1 4
+C AMD bobcat
+C Intel P4
+C Intel core2
+C Intel NHM
+C Intel SBR
+C Intel atom
+C VIA nano
+
+C The loop of this code is the result of running a code generation and
+C optimisation tool suite written by David Harvey and Torbjorn Granlund.
+
+C TODO
+C * Move loop code into feed-in blocks, to save insn for zeroing regs.
+
+define(`rp', `%rdi') C rcx
+define(`up', `%rsi') C rdx
+define(`n_param', `%rdx') C r8
+define(`v0', `%rcx') C r9
+
+define(`n', `%rbx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+IFDOS(` define(`up', ``%rsi'') ') dnl
+IFDOS(` define(`rp', ``%rcx'') ') dnl
+IFDOS(` define(`v0', ``%r9'') ') dnl
+IFDOS(` define(`r9', ``rdi'') ') dnl
+IFDOS(` define(`n', ``%r8'') ') dnl
+IFDOS(` define(`r8', ``rbx'') ') dnl
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_mul_1c)
+IFDOS(``push %rsi '')
+IFDOS(``push %rdi '')
+IFDOS(``mov %rdx, %rsi '')
+
+ mov (up), %rax C read first u limb early
+ push %rbx
+IFSTD(` mov n_param, %r11 ') C move away n from rdx, mul uses it
+IFDOS(` mov n, %r11 ')
+ mul v0
+
+IFSTD(` add %r8, %rax ')
+IFDOS(` add 64(%rsp), %rax ') C 40 + 3*8 (3 push insns)
+ adc $0, %rdx
+ jmp L(common)
+
+EPILOGUE()
+
+ ALIGN(16)
+PROLOGUE(mpn_mul_1)
+IFDOS(``push %rsi '')
+IFDOS(``push %rdi '')
+IFDOS(``mov %rdx, %rsi '')
+
+ mov (up), %rax C read first u limb early
+ push %rbx
+IFSTD(` mov n_param, %r11 ') C move away n from rdx, mul uses it
+IFDOS(` mov n, %r11 ')
+ mul v0
+
+L(common):
+IFSTD(` mov %r11, n ')
+
+ and $3, R32(%r11)
+ lea -16(rp,n,8), rp
+ jz L(b0)
+ cmp $2, R32(%r11)
+ jb L(b1)
+ jz L(b2)
+
+L(b3): mov %rax, %r10
+ mov %rdx, %r11
+ mov 8(up), %rax
+ mul v0
+ lea (up,n,8), up
+ not n
+ jmp L(L3)
+
+L(b0): mov %rax, %r9
+ mov %rdx, %r10
+ mov 8(up), %rax
+ lea (up,n,8), up
+ neg n
+ jmp L(L0)
+
+L(b1): mov %rax, %r8
+ cmp $1, n
+ jz L(n1)
+ mov %rdx, %r9
+ lea (up,n,8), up
+ neg n
+ mov %r8, 16(rp,n,8)
+ inc n
+ jmp L(L1)
+
+L(b2): mov %rax, %r11
+ mov %rdx, %r8
+ mov 8(up), %rax
+ lea (up,n,8), up
+ neg n
+ add $2, n
+ jns L(end)
+
+ ALIGN(16)
+L(top): mul v0
+ mov %rdx, %r9
+ add %rax, %r8
+ adc $0, %r9
+ mov %r8, 8(rp,n,8)
+ mov %r11, (rp,n,8)
+L(L1): mov (up,n,8), %rax
+ mul v0
+ add %rax, %r9
+ mov %rdx, %r10
+ mov 8(up,n,8), %rax
+ adc $0, %r10
+L(L0): mul v0
+ add %rax, %r10
+ mov %rdx, %r11
+ mov 16(up,n,8), %rax
+ adc $0, %r11
+ mul v0
+ mov %r9, 16(rp,n,8)
+L(L3): add %rax, %r11
+ mov %r10, 24(rp,n,8)
+ mov %rdx, %r8
+ adc $0, %r8
+ add $4, n
+ mov -8(up,n,8), %rax
+ js L(top)
+
+L(end): mul v0
+ add %rax, %r8
+ adc $0, %rdx
+ mov %r11, (rp)
+L(n1): mov %r8, 8(rp)
+ mov %rdx, %rax
+
+ pop %rbx
+IFDOS(``pop %rdi '')
+IFDOS(``pop %rsi '')
+ ret
+EPILOGUE()
+ASM_END()
--- /dev/null
+dnl AMD64 mpn_popcount -- population count.
+
+dnl Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_popcount)
+include_mpn(`x86_64/k10/popcount.asm')
dnl x86_64 mpn_bdiv_dbm1.
-dnl Copyright 2008 Free Software Foundation, Inc.
+dnl Copyright 2008, 2011, 2012 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
C cycles/limb
-C K8,K9: 2.25
-C K10: ?
-C P4: 12.5
-C P6 core2: 4.0
-C P6 corei7: 3.8
-C P6 atom: 20
+C AMD K8,K9 2.25
+C AMD K10 2.25
+C Intel P4 12.5
+C Intel core2 4
+C Intel NHM 3.75
+C Intel SBR 3.6
+C Intel atom 20
+C VIA nano 4
C TODO
-C * Do proper 4-way feed-in instead of the current epilogue
+C * Optimise feed-in code.
-C INPUT PARAMETERS shared
-define(`qp', `%rdi')
-define(`up', `%rsi')
-define(`n', `%rdx')
-define(`bd', `%rcx')
-define(`cy', `%r8')
+C INPUT PARAMETERS
+define(`qp', `%rdi')
+define(`up', `%rsi')
+define(`n_param', `%rdx')
+define(`bd', `%rcx')
+define(`cy', `%r8')
+define(`n', `%r9')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_bdiv_dbm1c)
- mov (%rsi), %rax
- mov %rdx, %r9 C n
-
- mul %rcx
- sub %rax, %r8
- mov %r8, (%rdi)
- sbb %rdx, %r8
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8 ')
+ mov (up), %rax
+ mov n_param, n
+ mov R32(n_param), R32(%r11)
+ mul bd
+ lea (up,n,8), up
+ lea (qp,n,8), qp
+ neg n
+ and $3, R32(%r11)
+ jz L(lo0)
+ lea -4(n,%r11), n
+ cmp $2, R32(%r11)
+ jc L(lo1)
+ jz L(lo2)
+ jmp L(lo3)
- lea (%rsi,%r9,8), %rsi
- lea (%rdi,%r9,8), %rdi
- neg %r9
- add $4, %r9
- jns L(end)
ALIGN(16)
-L(top):
- mov -24(%rsi,%r9,8), %rax
- mul %rcx
- sub %rax, %r8
- mov %r8, -24(%rdi,%r9,8)
+L(top): mov (up,n,8), %rax
+ mul bd
+L(lo0): sub %rax, %r8
+ mov %r8, (qp,n,8)
sbb %rdx, %r8
-L(3):
- mov -16(%rsi,%r9,8), %rax
- mul %rcx
- sub %rax, %r8
- mov %r8, -16(%rdi,%r9,8)
+ mov 8(up,n,8), %rax
+ mul bd
+L(lo3): sub %rax, %r8
+ mov %r8, 8(qp,n,8)
sbb %rdx, %r8
-L(2):
- mov -8(%rsi,%r9,8), %rax
- mul %rcx
- sub %rax, %r8
- mov %r8, -8(%rdi,%r9,8)
+ mov 16(up,n,8), %rax
+ mul bd
+L(lo2): sub %rax, %r8
+ mov %r8, 16(qp,n,8)
sbb %rdx, %r8
-L(1):
- mov (%rsi,%r9,8), %rax
- mul %rcx
- sub %rax, %r8
- mov %r8, (%rdi,%r9,8)
+ mov 24(up,n,8), %rax
+ mul bd
+L(lo1): sub %rax, %r8
+ mov %r8, 24(qp,n,8)
sbb %rdx, %r8
+ add $4, n
+ jnz L(top)
- add $4, %r9
- js L(top)
-L(end):
- je L(3x)
- cmp $2, %r9
- jg L(ret)
- mov $-1, %r9
- je L(1)
- jmp L(2)
-L(3x):
- dec %r9
- jmp L(3)
-
-L(ret): mov %r8, %rax
+ mov %r8, %rax
+ FUNC_EXIT()
ret
EPILOGUE()
dnl AMD64 mpn_bdiv_q_1, mpn_pi1_bdiv_q_1 -- schoolbook Hensel division by
dnl 1-limb divisor, returning quotient only.
-dnl Copyright 2001, 2002, 2004, 2005, 2006, 2009 Free Software Foundation,
-dnl Inc.
+dnl Copyright 2001, 2002, 2004, 2005, 2006, 2009, 2011, 2012 Free Software
+dnl Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl by the Free Software Foundation; either version 3 of the License, or (at
dnl your option) any later version.
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
C cycles/limb
-C K8,K9: 10
-C K10: 10
-C P4: 33
-C P6 core2: 13.25
-C P6 corei7: 14
-C P6 atom: 42
+C AMD K8,K9 10
+C AMD K10 10
+C Intel P4 33
+C Intel core2 13.25
+C Intel corei 14
+C Intel atom 42
+C VIA nano ?
C INPUT PARAMETERS
-C rp rdi
-C up rsi
-C n rdx
-C d rcx
-C di r8 just mpn_pi1_bdiv_q_1
-C shift r9 just mpn_pi1_bdiv_q_1
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`n', `%rdx')
+define(`d', `%rcx')
+define(`di', `%r8') C just mpn_pi1_bdiv_q_1
+define(`ncnt', `%r9') C just mpn_pi1_bdiv_q_1
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_bdiv_q_1)
+ FUNC_ENTRY(4)
push %rbx
mov %rcx, %rax
- xor R32(%rcx), R32(%rcx) C shift count
+ xor R32(%rcx), R32(%rcx) C ncnt count
mov %rdx, %r10
bt $0, R32(%rax)
shr R32(%rax)
and $127, R32(%rax) C d/2, 7 bits
-ifdef(`PIC',`
- mov binvert_limb_table@GOTPCREL(%rip), %rdx
-',`
- movabs $binvert_limb_table, %rdx
-')
+ LEA( binvert_limb_table, %rdx)
movzbl (%rdx,%rax), R32(%rax) C inv 8 bits
EPILOGUE()
PROLOGUE(mpn_pi1_bdiv_q_1)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8 ')
+IFDOS(` mov 64(%rsp), %r9 ')
push %rbx
mov %rcx, %r11 C d
mov %rdx, %r10 C n
- mov %r9, %rcx C shift
-L(com):
- mov (%rsi), %rax C up[0]
+ mov %r9, %rcx C ncnt
+
+L(com): mov (up), %rax C up[0]
dec %r10
jz L(one)
- mov 8(%rsi), %rdx C up[1]
- lea (%rsi,%r10,8), %rsi C up end
- lea (%rdi,%r10,8), %rdi C rp end
+ mov 8(up), %rdx C up[1]
+ lea (up,%r10,8), up C up end
+ lea (rp,%r10,8), rp C rp end
neg %r10 C -n
shrd R8(%rcx), %rdx, %rax
L(top):
C rax q
C rbx carry bit, 0 or 1
- C rcx shift
+ C rcx ncnt
C rdx
- C rsi up end
- C rdi rp end
C r10 counter, limbs, negative
mul %r11 C carry limb in rdx
- mov (%rsi,%r10,8), %rax
- mov 8(%rsi,%r10,8), %r9
+ mov (up,%r10,8), %rax
+ mov 8(up,%r10,8), %r9
shrd R8(%rcx), %r9, %rax
nop
sub %rbx, %rax C apply carry bit
sub %rdx, %rax C apply carry limb
adc $0, %rbx
L(ent): imul %r8, %rax
- mov %rax, (%rdi,%r10,8)
+ mov %rax, (rp,%r10,8)
inc %r10
jnz L(top)
mul %r11 C carry limb in rdx
- mov (%rsi), %rax C up high limb
+ mov (up), %rax C up high limb
shr R8(%rcx), %rax
sub %rbx, %rax C apply carry bit
sub %rdx, %rax C apply carry limb
imul %r8, %rax
- mov %rax, (%rdi)
+ mov %rax, (rp)
pop %rbx
+ FUNC_EXIT()
ret
L(one): shr R8(%rcx), %rax
imul %r8, %rax
- mov %rax, (%rdi)
+ mov %rax, (rp)
pop %rbx
+ FUNC_EXIT()
ret
EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_addmul_1 and mpn_submul_1 optimised for AMD bobcat.
+
+dnl Copyright 2003, 2004, 2005, 2007, 2008, 2011, 2012 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 4.5
+C AMD K10 4.5
+C AMD bd1 4.75
+C AMD bobcat 5
+C Intel P4 17.7
+C Intel core2 5.5
+C Intel NHM 5.43
+C Intel SBR 3.92
+C Intel atom 23
+C VIA nano 5.63
+
+C The loop of this code is the result of running a code generation and
+C optimisation tool suite written by David Harvey and Torbjorn Granlund.
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ifdef(`OPERATION_addmul_1',`
+ define(`ADDSUB', `add')
+ define(`func', `mpn_addmul_1')
+')
+ifdef(`OPERATION_submul_1',`
+ define(`ADDSUB', `sub')
+ define(`func', `mpn_submul_1')
+')
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+
+C Standard parameters
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`n_param', `%rdx')
+define(`v0', `%rcx')
+C Standard allocations
+define(`n', `%rbx')
+define(`w0', `%r8')
+define(`w1', `%r9')
+define(`w2', `%r10')
+define(`w3', `%r11')
+
+C DOS64 parameters
+IFDOS(` define(`rp', `%rcx') ') dnl
+IFDOS(` define(`up', `%rsi') ') dnl
+IFDOS(` define(`n_param', `%r8') ') dnl
+IFDOS(` define(`v0', `%r9') ') dnl
+C DOS64 allocations
+IFDOS(` define(`n', `%rbx') ') dnl
+IFDOS(` define(`w0', `%r8') ') dnl
+IFDOS(` define(`w1', `%rdi') ') dnl
+IFDOS(` define(`w2', `%r10') ') dnl
+IFDOS(` define(`w3', `%r11') ') dnl
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(func)
+IFDOS(` push %rsi ')
+IFDOS(` push %rdi ')
+IFDOS(` mov %rdx, %rsi ')
+
+ push %rbx
+ mov (up), %rax
+
+ lea -16(rp,n_param,8), rp
+ lea -16(up,n_param,8), up
+
+ mov n_param, n
+ and $3, R32(n_param)
+ jz L(b0)
+ cmp $2, R32(n_param)
+ ja L(b3)
+ jz L(b2)
+
+L(b1): mul v0
+ cmp $1, n
+ jz L(n1)
+ mov %rax, w2
+ mov %rdx, w3
+ neg n
+ add $3, n
+ jmp L(L1)
+L(n1): ADDSUB %rax, 8(rp)
+ adc $0, %rdx
+ mov %rdx, %rax
+ pop %rbx
+IFDOS(` pop %rdi ')
+IFDOS(` pop %rsi ')
+ ret
+
+L(b3): mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ neg n
+ inc n
+ jmp L(L3)
+
+L(b0): mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ neg n
+ add $2, n
+ jmp L(L0)
+
+L(b2): mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ neg n
+ jmp L(L2)
+
+ ALIGN(16)
+L(top): ADDSUB w0, -16(rp,n,8)
+ adc w1, w2
+ adc $0, w3
+L(L1): mov 0(up,n,8), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ ADDSUB w2, -8(rp,n,8)
+ adc w3, w0
+ adc $0, w1
+L(L0): mov 8(up,n,8), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ ADDSUB w0, 0(rp,n,8)
+ adc w1, w2
+ adc $0, w3
+L(L3): mov 16(up,n,8), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ ADDSUB w2, 8(rp,n,8)
+ adc w3, w0
+ adc $0, w1
+L(L2): mov 24(up,n,8), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ add $4, n
+ js L(top)
+
+L(end): ADDSUB w0, (rp)
+ adc w1, w2
+ adc $0, w3
+ ADDSUB w2, 8(rp)
+ adc $0, w3
+ mov w3, %rax
+
+ pop %rbx
+IFDOS(` pop %rdi ')
+IFDOS(` pop %rsi ')
+ ret
+EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_copyd optimised for AMD bobcat.
+
+dnl Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 1
+C AMD K10 1-2 (alignment fluctuations)
+C AMD bd1 ?
+C AMD bobcat 1.5
+C Intel P4 2.8
+C Intel core2 1
+C Intel NHM 1-1.25
+C Intel SBR 1
+C Intel atom 2.87
+C VIA nano 2
+
+C INPUT PARAMETERS
+C rp rdi
+C up rsi
+C n rdx
+
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`n',`%rdx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_copyd)
+ FUNC_ENTRY(3)
+ sub $4, n
+ jl L(end)
+ ALIGN(16)
+L(top): mov 24(up,n,8), %r8
+ mov %r8, 24(rp,n,8)
+ mov 16(up,n,8), %r8
+ mov %r8, 16(rp,n,8)
+ mov 8(up,n,8), %r8
+ mov %r8, 8(rp,n,8)
+ mov (up,n,8), %r8
+ mov %r8, (rp,n,8)
+L(ent): sub $4, n
+ jge L(top)
+
+L(end): cmp $-4, R32(n)
+ jz L(ret)
+ mov 24(up,n,8), %r8
+ mov %r8, 24(rp,n,8)
+ cmp $-3, R32(n)
+ jz L(ret)
+ mov 16(up,n,8), %r8
+ mov %r8, 16(rp,n,8)
+ cmp $-2, R32(n)
+ jz L(ret)
+ mov 8(up,n,8), %r8
+ mov %r8, 8(rp,n,8)
+
+L(ret): FUNC_EXIT()
+ ret
+EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_copyi optimised for AMD bobcat.
+
+dnl Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 1
+C AMD K10 1-2 (alignment fluctuations)
+C AMD bd1 ?
+C AMD bobcat 1.5
+C Intel P4 2.8
+C Intel core2 1
+C Intel NHM 1-1.25
+C Intel SBR 1
+C Intel atom 2.87
+C VIA nano 2
+
+C INPUT PARAMETERS
+C rp rdi
+C up rsi
+C n rdx
+
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`n',`%rdx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_copyi)
+ FUNC_ENTRY(3)
+ lea -32(up,n,8), up
+ lea -32(rp,n,8), rp
+ neg n
+ add $4, n
+ jg L(end)
+ ALIGN(16)
+L(top): mov (up,n,8), %r8
+ mov %r8, (rp,n,8)
+ mov 8(up,n,8), %r8
+ mov %r8, 8(rp,n,8)
+ mov 16(up,n,8), %r8
+ mov %r8, 16(rp,n,8)
+ mov 24(up,n,8), %r8
+ mov %r8, 24(rp,n,8)
+L(ent): add $4, n
+ jle L(top)
+
+L(end): cmp $4, R32(n)
+ jz L(ret)
+ mov (up,n,8), %r8
+ mov %r8, (rp,n,8)
+ cmp $3, R32(n)
+ jz L(ret)
+ mov 8(up,n,8), %r8
+ mov %r8, 8(rp,n,8)
+ cmp $2, R32(n)
+ jz L(ret)
+ mov 16(up,n,8), %r8
+ mov %r8, 16(rp,n,8)
+
+L(ret): FUNC_EXIT()
+ ret
+EPILOGUE()
-/* AMD K8 gmp-mparam.h -- Compiler/machine parameter header file.
+/* AMD Bobcat gmp-mparam.h -- Compiler/machine parameter header file.
Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
-2008, 2009, 2010, 2012 Free Software Foundation, Inc.
+2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#define GMP_LIMB_BITS 64
#define BYTES_PER_MP_LIMB 8
+/* 1600 MHz AMD Bobcat E-350 */
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 10
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 4
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 6
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 34
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 18
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 39
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 0 /* never mpn_mod_1s_2p */
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 7
#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 24
+#define BMOD_1_TO_MOD_1_THRESHOLD 17
-#define MUL_TOOM22_THRESHOLD 16
-#define MUL_TOOM33_THRESHOLD 45
-#define MUL_TOOM44_THRESHOLD 336
-#define MUL_TOOM6H_THRESHOLD 426
-#define MUL_TOOM8H_THRESHOLD 446
+#define MUL_TOOM22_THRESHOLD 27
+#define MUL_TOOM33_THRESHOLD 32
+#define MUL_TOOM44_THRESHOLD 272
+#define MUL_TOOM6H_THRESHOLD 357
+#define MUL_TOOM8H_THRESHOLD 0 /* always */
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 89
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 98
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 89
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 103
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 115
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 172
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 119
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 128
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 160
#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 24
-#define SQR_TOOM3_THRESHOLD 77
-#define SQR_TOOM4_THRESHOLD 354
-#define SQR_TOOM6_THRESHOLD 366
+#define SQR_TOOM2_THRESHOLD 28
+#define SQR_TOOM3_THRESHOLD 93
+#define SQR_TOOM4_THRESHOLD 372
+#define SQR_TOOM6_THRESHOLD 0 /* always */
#define SQR_TOOM8_THRESHOLD 430
+#define MULMID_TOOM42_THRESHOLD 24
+
#define MULMOD_BNM1_THRESHOLD 11
#define SQRMOD_BNM1_THRESHOLD 13
-#define MUL_FFT_MODF_THRESHOLD 400 /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD 460 /* k = 5 */
#define MUL_FFT_TABLE3 \
- { { 400, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \
- { 11, 5}, { 23, 6}, { 12, 5}, { 25, 6}, \
- { 21, 7}, { 11, 6}, { 23, 7}, { 12, 6}, \
- { 25, 7}, { 15, 6}, { 31, 7}, { 21, 8}, \
+ { { 372, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
+ { 10, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
+ { 12, 5}, { 25, 6}, { 25, 7}, { 13, 6}, \
+ { 27, 7}, { 15, 6}, { 31, 7}, { 21, 8}, \
{ 11, 7}, { 25, 8}, { 13, 7}, { 28, 8}, \
{ 15, 7}, { 31, 8}, { 17, 7}, { 35, 8}, \
- { 21, 9}, { 11, 8}, { 27, 9}, { 15, 8}, \
- { 35, 9}, { 19, 8}, { 41, 9}, { 23, 8}, \
- { 49, 9}, { 27,10}, { 15, 9}, { 31, 8}, \
- { 63, 9}, { 39,10}, { 23, 9}, { 55,11}, \
- { 15,10}, { 31, 9}, { 71,10}, { 39, 9}, \
+ { 29, 9}, { 15, 8}, { 35, 9}, { 19, 8}, \
+ { 41, 9}, { 23, 8}, { 47, 9}, { 27,10}, \
+ { 15, 9}, { 39,10}, { 23, 9}, { 55,11}, \
+ { 15,10}, { 31, 9}, { 67,10}, { 39, 9}, \
{ 83,10}, { 47, 9}, { 95,10}, { 55,11}, \
- { 31,10}, { 87,11}, { 47,10}, { 103,12}, \
- { 31,11}, { 63,10}, { 135,11}, { 79,10}, \
- { 159, 9}, { 319,10}, { 167,11}, { 95,10}, \
- { 191,11}, { 111,12}, { 63,11}, { 127,10}, \
- { 271,11}, { 143,10}, { 303, 9}, { 607,11}, \
- { 159,10}, { 319,12}, { 95,11}, { 191,10}, \
- { 383,11}, { 207,13}, { 63,12}, { 127,11}, \
- { 271,10}, { 543,11}, { 287,10}, { 575,11}, \
- { 303,10}, { 607,12}, { 159,11}, { 319,10}, \
- { 639,11}, { 351,10}, { 703,11}, { 367,12}, \
- { 191,11}, { 415,12}, { 223,11}, { 447,13}, \
- { 127,12}, { 255,11}, { 543,12}, { 287,11}, \
- { 607,12}, { 319,11}, { 639,12}, { 351,11}, \
- { 703,13}, { 191,12}, { 383,11}, { 767,12}, \
- { 415,11}, { 831,12}, { 447,14}, { 127,13}, \
- { 255,12}, { 607,13}, { 319,12}, { 703,13}, \
- { 383,12}, { 831,13}, { 447,12}, { 895,14}, \
- { 255,13}, { 511,12}, { 1023,13}, { 575,12}, \
- { 1151,13}, { 703,14}, { 383,13}, { 831,12}, \
- { 1663,13}, { 895,15}, { 255,14}, { 511,13}, \
- { 1087,12}, { 2175,13}, { 1151,14}, { 639,13}, \
- { 1343,12}, { 2687,13}, { 1407,14}, { 767,13}, \
- { 1663,14}, { 895,15}, { 511,14}, { 1023,13}, \
- { 2175,14}, { 1151,13}, { 2431,12}, { 4863,14}, \
- { 1279,13}, { 2687,14}, { 1407,15}, { 767,14}, \
- { 1535,13}, { 3199,14}, { 1663,13}, { 3455,16}, \
- { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
- {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 160
-#define MUL_FFT_THRESHOLD 4736
-
-#define SQR_FFT_MODF_THRESHOLD 340 /* k = 5 */
+ { 31,10}, { 79,11}, { 47,10}, { 95, 8}, \
+ { 383,10}, { 111,12}, { 31,11}, { 63,10}, \
+ { 143,11}, { 79, 9}, { 319,10}, { 167,11}, \
+ { 95,10}, { 191, 9}, { 383,10}, { 207,12}, \
+ { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \
+ { 271,11}, { 143,10}, { 287, 9}, { 575,10}, \
+ { 303,11}, { 159,10}, { 319,12}, { 95,11}, \
+ { 191,10}, { 383,11}, { 207,13}, { 8192,14}, \
+ { 16384,15}, { 32768,16}, { 65536,17}, { 131072,18}, \
+ { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+ {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 86
+#define MUL_FFT_THRESHOLD 5760
+
+#define SQR_FFT_MODF_THRESHOLD 376 /* k = 5 */
#define SQR_FFT_TABLE3 \
{ { 340, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \
- { 21, 7}, { 11, 6}, { 23, 7}, { 21, 8}, \
- { 11, 7}, { 25, 8}, { 13, 7}, { 27, 8}, \
- { 15, 7}, { 31, 8}, { 17, 7}, { 35, 8}, \
- { 19, 7}, { 39, 8}, { 21, 9}, { 11, 8}, \
- { 27, 9}, { 15, 8}, { 35, 9}, { 19, 8}, \
- { 41, 9}, { 23, 8}, { 47, 9}, { 27,10}, \
- { 15, 9}, { 39,10}, { 23, 9}, { 51,11}, \
- { 15,10}, { 31, 9}, { 67,10}, { 39, 9}, \
- { 79,10}, { 47, 9}, { 95,10}, { 55,11}, \
- { 31,10}, { 79,11}, { 47,10}, { 95,12}, \
- { 31,11}, { 63,10}, { 127, 9}, { 255,10}, \
- { 135, 9}, { 271,11}, { 79,10}, { 159, 9}, \
- { 319,11}, { 95,10}, { 191, 9}, { 383,12}, \
+ { 21, 7}, { 11, 6}, { 23, 7}, { 13, 6}, \
+ { 27, 7}, { 21, 8}, { 11, 7}, { 25, 8}, \
+ { 13, 7}, { 27, 8}, { 15, 7}, { 31, 8}, \
+ { 17, 7}, { 35, 8}, { 19, 7}, { 39, 8}, \
+ { 21, 9}, { 11, 8}, { 27, 9}, { 15, 8}, \
+ { 35, 9}, { 19, 8}, { 41, 9}, { 23, 8}, \
+ { 47, 9}, { 27,10}, { 15, 9}, { 39,10}, \
+ { 23, 9}, { 51,11}, { 15,10}, { 31, 9}, \
+ { 67,10}, { 39, 9}, { 83,10}, { 47, 9}, \
+ { 95,10}, { 55,11}, { 31,10}, { 79,11}, \
+ { 47,10}, { 95,12}, { 31,11}, { 63,10}, \
+ { 127,11}, { 95,10}, { 191, 9}, { 383,12}, \
{ 63,11}, { 127,10}, { 255, 9}, { 511,10}, \
- { 271, 9}, { 543,11}, { 143,10}, { 287, 9}, \
- { 575,10}, { 303, 9}, { 607,11}, { 159,10}, \
- { 319, 9}, { 639,12}, { 95,11}, { 191,10}, \
- { 383,11}, { 207,10}, { 415,13}, { 63,12}, \
- { 127,11}, { 255,10}, { 511,11}, { 271,10}, \
- { 543,11}, { 287,10}, { 575,11}, { 303,12}, \
- { 159,11}, { 319,10}, { 639,11}, { 351,10}, \
- { 703,12}, { 191,11}, { 383,10}, { 767,11}, \
- { 415,12}, { 223,11}, { 479,13}, { 127,12}, \
- { 255,11}, { 543,12}, { 287,11}, { 607,12}, \
- { 319,11}, { 639,12}, { 351,11}, { 703,13}, \
- { 191,12}, { 383,11}, { 767,12}, { 415,11}, \
- { 831,12}, { 479,14}, { 127,13}, { 255,12}, \
- { 607,13}, { 319,12}, { 703,13}, { 383,12}, \
- { 831,13}, { 447,12}, { 895,14}, { 255,13}, \
- { 511,12}, { 1023,13}, { 575,12}, { 1151,13}, \
- { 703,14}, { 383,13}, { 895,15}, { 255,14}, \
- { 511,13}, { 1087,12}, { 2175,13}, { 1151,14}, \
- { 639,13}, { 1343,12}, { 2687,14}, { 767,13}, \
- { 1599,12}, { 3199,13}, { 1663,14}, { 895,15}, \
- { 511,14}, { 1023,13}, { 2175,14}, { 1151,13}, \
- { 2431,12}, { 4863,14}, { 1279,13}, { 2687,15}, \
- { 767,14}, { 1535,13}, { 3199,14}, { 1663,16}, \
- { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
- {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 160
-#define SQR_FFT_THRESHOLD 3264
-
-#define MULLO_BASECASE_THRESHOLD 3
-#define MULLO_DC_THRESHOLD 43
-#define MULLO_MUL_N_THRESHOLD 9174
-
-#define DC_DIV_QR_THRESHOLD 43
-#define DC_DIVAPPR_Q_THRESHOLD 142
-#define DC_BDIV_QR_THRESHOLD 44
-#define DC_BDIV_Q_THRESHOLD 80
-
-#define INV_MULMOD_BNM1_THRESHOLD 42
-#define INV_NEWTON_THRESHOLD 181
-#define INV_APPR_THRESHOLD 157
-
-#define BINV_NEWTON_THRESHOLD 230
-#define REDC_1_TO_REDC_2_THRESHOLD 54
-#define REDC_2_TO_REDC_N_THRESHOLD 0 /* anomaly: never REDC_2 */
-
-#define MU_DIV_QR_THRESHOLD 1442
-#define MU_DIVAPPR_Q_THRESHOLD 1442
-#define MUPI_DIV_QR_THRESHOLD 91
-#define MU_BDIV_QR_THRESHOLD 1142
-#define MU_BDIV_Q_THRESHOLD 1334
-
-#define MATRIX22_STRASSEN_THRESHOLD 14
-#define HGCD_THRESHOLD 95
-#define GCD_DC_THRESHOLD 298
-#define GCDEXT_DC_THRESHOLD 283
-#define JACOBI_BASE_METHOD 1
-
-#define GET_STR_DC_THRESHOLD 15
-#define GET_STR_PRECOMPUTE_THRESHOLD 30
-#define SET_STR_DC_THRESHOLD 306
-#define SET_STR_PRECOMPUTE_THRESHOLD 1628
+ { 271, 9}, { 543,10}, { 287, 9}, { 575,10}, \
+ { 319,12}, { 95,11}, { 191,10}, { 383,11}, \
+ { 207,10}, { 415,13}, { 8192,14}, { 16384,15}, \
+ { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
+ { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+ {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 77
+#define SQR_FFT_THRESHOLD 3648
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 52
+#define MULLO_MUL_N_THRESHOLD 11278
+
+#define DC_DIV_QR_THRESHOLD 71
+#define DC_DIVAPPR_Q_THRESHOLD 202
+#define DC_BDIV_QR_THRESHOLD 76
+#define DC_BDIV_Q_THRESHOLD 151
+
+#define INV_MULMOD_BNM1_THRESHOLD 51
+#define INV_NEWTON_THRESHOLD 248
+#define INV_APPR_THRESHOLD 204
+
+#define BINV_NEWTON_THRESHOLD 252
+#define REDC_1_TO_REDC_2_THRESHOLD 46
+#define REDC_2_TO_REDC_N_THRESHOLD 0 /* always */
+
+#define MU_DIV_QR_THRESHOLD 1470
+#define MU_DIVAPPR_Q_THRESHOLD 1589
+#define MUPI_DIV_QR_THRESHOLD 122
+#define MU_BDIV_QR_THRESHOLD 1334
+#define MU_BDIV_Q_THRESHOLD 1442
+
+#define POWM_SEC_TABLE 1,41,322,840,1421
+
+#define MATRIX22_STRASSEN_THRESHOLD 15
+#define HGCD_THRESHOLD 86
+#define HGCD_APPR_THRESHOLD 50
+#define HGCD_REDUCE_THRESHOLD 3014
+#define GCD_DC_THRESHOLD 483
+#define GCDEXT_DC_THRESHOLD 303
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 16
+#define GET_STR_PRECOMPUTE_THRESHOLD 31
+#define SET_STR_DC_THRESHOLD 266
+#define SET_STR_PRECOMPUTE_THRESHOLD 1424
+
+#define FAC_DSC_THRESHOLD 906
+#define FAC_ODD_THRESHOLD 46
--- /dev/null
+dnl AMD64 mpn_mul_1 optimised for AMD bobcat.
+
+dnl Copyright 2003, 2004, 2005, 2007, 2008, 2011, 2012 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 4.5
+C AMD K10 4.5
+C AMD bd1 4.62
+C AMD bobcat 5
+C Intel P4 14
+C Intel core2 4.5
+C Intel NHM 4.23
+C Intel SBR 3.0
+C Intel atom 21
+C VIA nano 4.94
+
+C The loop of this code is the result of running a code generation and
+C optimisation tool suite written by David Harvey and Torbjorn Granlund.
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+C Standard parameters
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`n_param', `%rdx')
+define(`v0', `%rcx')
+define(`cy', `%r8')
+C Standard allocations
+define(`n', `%rbx')
+define(`w0', `%r8')
+define(`w1', `%r9')
+define(`w2', `%r10')
+define(`w3', `%r11')
+
+C DOS64 parameters
+IFDOS(` define(`rp', `%rcx') ') dnl
+IFDOS(` define(`up', `%rsi') ') dnl
+IFDOS(` define(`n_param', `%r8') ') dnl
+IFDOS(` define(`v0', `%r9') ') dnl
+IFDOS(` define(`cy', `64(%rsp)')') dnl
+C DOS64 allocations
+IFDOS(` define(`n', `%rbx') ') dnl
+IFDOS(` define(`w0', `%r8') ') dnl
+IFDOS(` define(`w1', `%rdi') ') dnl
+IFDOS(` define(`w2', `%r10') ') dnl
+IFDOS(` define(`w3', `%r11') ') dnl
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_mul_1c)
+IFDOS(` push %rsi ')
+IFDOS(` push %rdi ')
+IFDOS(` mov %rdx, %rsi ')
+ mov cy, w2
+ jmp L(com)
+EPILOGUE()
+
+PROLOGUE(mpn_mul_1)
+IFDOS(` push %rsi ')
+IFDOS(` push %rdi ')
+IFDOS(` mov %rdx, %rsi ')
+ xor w2, w2
+L(com): push %rbx
+ mov (up), %rax
+
+ lea -16(rp,n_param,8), rp
+ lea -16(up,n_param,8), up
+
+ mov n_param, n
+ and $3, R32(n_param)
+ jz L(b0)
+ cmp $2, R32(n_param)
+ ja L(b3)
+ jz L(b2)
+
+L(b1): mul v0
+ cmp $1, n
+ jz L(n1)
+ neg n
+ add $3, n
+ add %rax, w2
+ mov %rdx, w3
+ jmp L(L1)
+L(n1): add %rax, w2
+ mov %rdx, %rax
+ mov w2, 8(rp)
+ adc $0, %rax
+ pop %rbx
+IFDOS(` pop %rdi ')
+IFDOS(` pop %rsi ')
+ ret
+
+L(b3): mul v0
+ neg n
+ inc n
+ add %rax, w2
+ mov %rdx, w3
+ jmp L(L3)
+
+L(b0): mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ neg n
+ add $2, n
+ add w2, w0
+ jmp L(L0)
+
+L(b2): mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ neg n
+ add w2, w0
+ jmp L(L2)
+
+ ALIGN(16)
+L(top): mov w0, -16(rp,n,8)
+ add w1, w2
+L(L1): adc $0, w3
+ mov 0(up,n,8), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ mov w2, -8(rp,n,8)
+ add w3, w0
+L(L0): adc $0, w1
+ mov 8(up,n,8), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ mov w0, 0(rp,n,8)
+ add w1, w2
+L(L3): adc $0, w3
+ mov 16(up,n,8), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ mov w2, 8(rp,n,8)
+ add w3, w0
+L(L2): adc $0, w1
+ mov 24(up,n,8), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ add $4, n
+ js L(top)
+
+L(end): mov w0, (rp)
+ add w1, w2
+ adc $0, w3
+ mov w2, 8(rp)
+ mov w3, %rax
+
+ pop %rbx
+IFDOS(` pop %rdi ')
+IFDOS(` pop %rsi ')
+ ret
+EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_mul_basecase optimised for AMD bobcat.
+
+dnl Copyright 2003, 2004, 2005, 2007, 2008, 2011, 2012 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 4.5
+C AMD K10 4.5
+C AMD bd1 4.75
+C AMD bobcat 5
+C Intel P4 17.7
+C Intel core2 5.5
+C Intel NHM 5.43
+C Intel SBR 3.92
+C Intel atom 23
+C VIA nano 5.63
+
+C This mul_basecase is based on mul_1 and addmul_1, since these both run at the
+C multiply insn bandwidth, without any apparent loop branch exit pipeline
+C replays experienced on K8. The structure is unusual: it falls into mul_1 in
+C the same way for all n, then it splits into 4 different wind-down blocks and
+C 4 separate addmul_1 loops.
+C
+C We have not tried using the same addmul_1 loops with a switch into feed-in
+C code, as we do in other basecase implementations. Doing that could save
+C substantial code volume, but would also probably add some overhead.
+
+C TODO
+C * Tune un < 3 code.
+C * Fix slowdown for un=vn=3 (67->71) compared to default code.
+C * This is 1263 bytes, compared to 1099 bytes for default code. Consider
+C combining addmul loops like that code. Tolerable slowdown?
+C * Lots of space could be saved by replacing the "switch" code by gradual
+C jumps out from mul_1 winddown code, perhaps with no added overhead.
+C * Are the ALIGN(16) really necessary? They add about 25 bytes of padding.
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+C Standard parameters
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`un_param', `%rdx')
+define(`vp', `%rcx')
+define(`vn', `%r8')
+C Standard allocations
+define(`un', `%rbx')
+define(`w0', `%r10')
+define(`w1', `%r11')
+define(`w2', `%r12')
+define(`w3', `%r13')
+define(`n', `%rbp')
+define(`v0', `%r9')
+
+C Temp macro for allowing control over indexing.
+C Define to return $1 for more conservative ptr handling.
+define(`X',`$2')
+
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_mul_basecase)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8d ')
+
+ mov (up), %rax
+ mov (vp), v0
+
+ cmp $2, un_param
+ ja L(ge3)
+ jz L(u2)
+
+ mul v0 C u0 x v0
+ mov %rax, (rp)
+ mov %rdx, 8(rp)
+ FUNC_EXIT()
+ ret
+
+L(u2): mul v0 C u0 x v0
+ mov %rax, (rp)
+ mov 8(up), %rax
+ mov %rdx, w0
+ mul v0
+ add %rax, w0
+ mov %rdx, w1
+ adc $0, w1
+ cmp $1, R32(vn)
+ jnz L(u2v2)
+ mov w0, 8(rp)
+ mov w1, 16(rp)
+ FUNC_EXIT()
+ ret
+
+L(u2v2):mov 8(vp), v0
+ mov (up), %rax
+ mul v0
+ add %rax, w0
+ mov w0, 8(rp)
+ mov %rdx, %r8 C CAUTION: r8 realloc
+ adc $0, %r8
+ mov 8(up), %rax
+ mul v0
+ add w1, %r8
+ adc $0, %rdx
+ add %r8, %rax
+ adc $0, %rdx
+ mov %rax, 16(rp)
+ mov %rdx, 24(rp)
+ FUNC_EXIT()
+ ret
+
+
+L(ge3): push %rbx
+ push %rbp
+ push %r12
+ push %r13
+
+ lea 8(vp), vp
+
+ lea -24(rp,un_param,8), rp
+ lea -24(up,un_param,8), up
+ xor R32(un), R32(un)
+ mov $2, R32(n)
+ sub un_param, un
+ sub un_param, n
+
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ jmp L(L3)
+
+ ALIGN(16)
+L(top): mov w0, -16(rp,n,8)
+ add w1, w2
+ adc $0, w3
+ mov (up,n,8), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ mov w2, -8(rp,n,8)
+ add w3, w0
+ adc $0, w1
+ mov 8(up,n,8), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ mov w0, (rp,n,8)
+ add w1, w2
+ adc $0, w3
+L(L3): mov 16(up,n,8), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ mov w2, 8(rp,n,8)
+ add w3, w0
+ adc $0, w1
+ mov 24(up,n,8), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ add $4, n
+ js L(top)
+
+ mov w0, -16(rp,n,8)
+ add w1, w2
+ adc $0, w3
+
+C Switch on n into right addmul_l loop
+ test n, n
+ jz L(r2)
+ cmp $2, R32(n)
+ ja L(r3)
+ jz L(r0)
+ jmp L(r1)
+
+
+L(r3): mov w2, X(-8(rp,n,8),16(rp))
+ mov w3, X((rp,n,8),24(rp))
+ add $2, un
+
+C outer loop(3)
+L(to3): dec vn
+ jz L(ret)
+ mov (vp), v0
+ mov 8(up,un,8), %rax
+ lea 8(vp), vp
+ lea 8(rp), rp
+ mov un, n
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ jmp L(al3)
+
+ ALIGN(16)
+L(ta3): add w0, -16(rp,n,8)
+ adc w1, w2
+ adc $0, w3
+ mov (up,n,8), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ add w2, -8(rp,n,8)
+ adc w3, w0
+ adc $0, w1
+ mov 8(up,n,8), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ add w0, (rp,n,8)
+ adc w1, w2
+ adc $0, w3
+L(al3): mov 16(up,n,8), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ add w2, 8(rp,n,8)
+ adc w3, w0
+ adc $0, w1
+ mov 24(up,n,8), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ add $4, n
+ js L(ta3)
+
+ add w0, X(-16(rp,n,8),8(rp))
+ adc w1, w2
+ adc $0, w3
+ add w2, X(-8(rp,n,8),16(rp))
+ adc $0, w3
+ mov w3, X((rp,n,8),24(rp))
+ jmp L(to3)
+
+
+L(r2): mov X(0(up,n,8),(up)), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ mov w2, X(-8(rp,n,8),-8(rp))
+ add w3, w0
+ adc $0, w1
+ mov X(8(up,n,8),8(up)), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ mov w0, X((rp,n,8),(rp))
+ add w1, w2
+ adc $0, w3
+ mov X(16(up,n,8),16(up)), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ mov w2, X(8(rp,n,8),8(rp))
+ add w3, w0
+ adc $0, w1
+ mov w0, X(16(rp,n,8),16(rp))
+ adc $0, w3
+ mov w1, X(24(rp,n,8),24(rp))
+ inc un
+
+C outer loop(2)
+L(to2): dec vn
+ jz L(ret)
+ mov (vp), v0
+ mov 16(up,un,8), %rax
+ lea 8(vp), vp
+ lea 8(rp), rp
+ mov un, n
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ jmp L(al2)
+
+ ALIGN(16)
+L(ta2): add w0, -16(rp,n,8)
+ adc w1, w2
+ adc $0, w3
+ mov (up,n,8), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ add w2, -8(rp,n,8)
+ adc w3, w0
+ adc $0, w1
+ mov 8(up,n,8), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ add w0, (rp,n,8)
+ adc w1, w2
+ adc $0, w3
+ mov 16(up,n,8), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ add w2, 8(rp,n,8)
+ adc w3, w0
+ adc $0, w1
+L(al2): mov 24(up,n,8), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ add $4, n
+ js L(ta2)
+
+ add w0, X(-16(rp,n,8),8(rp))
+ adc w1, w2
+ adc $0, w3
+ add w2, X(-8(rp,n,8),16(rp))
+ adc $0, w3
+ mov w3, X((rp,n,8),24(rp))
+ jmp L(to2)
+
+
+L(r1): mov X(0(up,n,8),8(up)), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ mov w2, X(-8(rp,n,8),(rp))
+ add w3, w0
+ adc $0, w1
+ mov X(8(up,n,8),16(up)), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ mov w0, X((rp,n,8),8(rp))
+ add w1, w2
+ adc $0, w3
+ mov w2, X(8(rp,n,8),16(rp))
+ mov w3, X(16(rp,n,8),24(rp))
+ add $4, un
+
+C outer loop(1)
+L(to1): dec vn
+ jz L(ret)
+ mov (vp), v0
+ mov -8(up,un,8), %rax
+ lea 8(vp), vp
+ lea 8(rp), rp
+ mov un, n
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ jmp L(al1)
+
+ ALIGN(16)
+L(ta1): add w0, -16(rp,n,8)
+ adc w1, w2
+ adc $0, w3
+L(al1): mov (up,n,8), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ add w2, -8(rp,n,8)
+ adc w3, w0
+ adc $0, w1
+ mov 8(up,n,8), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ add w0, (rp,n,8)
+ adc w1, w2
+ adc $0, w3
+ mov 16(up,n,8), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ add w2, 8(rp,n,8)
+ adc w3, w0
+ adc $0, w1
+ mov 24(up,n,8), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ add $4, n
+ js L(ta1)
+
+ add w0, X(-16(rp,n,8),8(rp))
+ adc w1, w2
+ adc $0, w3
+ add w2, X(-8(rp,n,8),16(rp))
+ adc $0, w3
+ mov w3, X((rp,n,8),24(rp))
+ jmp L(to1)
+
+
+L(r0): mov X((up,n,8),16(up)), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ mov w2, X(-8(rp,n,8),8(rp))
+ add w3, w0
+ adc $0, w1
+ mov w0, X((rp,n,8),16(rp))
+ mov w1, X(8(rp,n,8),24(rp))
+ add $3, un
+
+C outer loop(0)
+L(to0): dec vn
+ jz L(ret)
+ mov (vp), v0
+ mov (up,un,8), %rax
+ lea 8(vp), vp
+ lea 8(rp), rp
+ mov un, n
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ jmp L(al0)
+
+ ALIGN(16)
+L(ta0): add w0, -16(rp,n,8)
+ adc w1, w2
+ adc $0, w3
+ mov (up,n,8), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ add w2, -8(rp,n,8)
+ adc w3, w0
+ adc $0, w1
+L(al0): mov 8(up,n,8), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ add w0, (rp,n,8)
+ adc w1, w2
+ adc $0, w3
+ mov 16(up,n,8), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ add w2, 8(rp,n,8)
+ adc w3, w0
+ adc $0, w1
+ mov 24(up,n,8), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ add $4, n
+ js L(ta0)
+
+ add w0, X(-16(rp,n,8),8(rp))
+ adc w1, w2
+ adc $0, w3
+ add w2, X(-8(rp,n,8),16(rp))
+ adc $0, w3
+ mov w3, X((rp,n,8),24(rp))
+ jmp L(to0)
+
+
+L(ret): pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+ FUNC_EXIT()
+ ret
+EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_sqr_basecase optimised for AMD bobcat.
+
+dnl Copyright 2003, 2004, 2005, 2007, 2008, 2011, 2012 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 4.5
+C AMD K10 4.5
+C AMD bd1 4.75
+C AMD bobcat 5
+C Intel P4 17.7
+C Intel core2 5.5
+C Intel NHM 5.43
+C Intel SBR 3.92
+C Intel atom 23
+C VIA nano 5.63
+
+C This sqr_basecase is based on mul_1 and addmul_1, since these both run at the
+C multiply insn bandwidth, without any apparent loop branch exit pipeline
+C replays experienced on K8. The structure is unusual: it falls into mul_1 in
+C the same way for all n, then it splits into 4 different wind-down blocks and
+C 4 separate addmul_1 loops.
+C
+C We have not tried using the same addmul_1 loops with a switch into feed-in
+C code, as we do in other basecase implementations. Doing that could save
+C substantial code volume, but would also probably add some overhead.
+
+C TODO
+C * Tune un < 4 code.
+C * Perhaps implement a larger final corner (it is now 2 x 1).
+C * Lots of space could be saved by replacing the "switch" code by gradual
+C jumps out from mul_1 winddown code, perhaps with no added overhead.
+C * Are the ALIGN(16) really necessary? They add about 25 bytes of padding.
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+C Standard parameters
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`un_param', `%rdx')
+C Standard allocations
+define(`un', `%rbx')
+define(`w0', `%r8')
+define(`w1', `%r9')
+define(`w2', `%r10')
+define(`w3', `%r11')
+define(`n', `%rbp')
+define(`v0', `%rcx')
+
+C Temp macro for allowing control over indexing.
+C Define to return $1 for more conservative ptr handling.
+define(`X',`$2')
+dnl define(`X',`$1')
+
+
+ASM_START()
+ TEXT
+ ALIGN(64)
+PROLOGUE(mpn_sqr_basecase)
+ FUNC_ENTRY(3)
+
+ mov (up), %rax
+
+ cmp $2, R32(un_param)
+ jae L(ge2)
+
+ mul %rax
+ mov %rax, (rp)
+ mov %rdx, 8(rp)
+ FUNC_EXIT()
+ ret
+
+L(ge2): mov (up), v0
+ jnz L(g2)
+
+ mul %rax
+ mov %rax, (rp)
+ mov 8(up), %rax
+ mov %rdx, w0
+ mul v0
+ add %rax, w0
+ mov %rdx, w1
+ adc $0, w1
+ mov 8(up), v0
+ mov (up), %rax
+ mul v0
+ add %rax, w0
+ mov w0, 8(rp)
+ mov %rdx, w0 C CAUTION: r8 realloc
+ adc $0, w0
+ mov 8(up), %rax
+ mul v0
+ add w1, w0
+ adc $0, %rdx
+ add w0, %rax
+ adc $0, %rdx
+ mov %rax, 16(rp)
+ mov %rdx, 24(rp)
+ FUNC_EXIT()
+ ret
+
+L(g2): cmp $3, R32(un_param)
+ ja L(g3)
+ mul %rax
+ mov %rax, (rp)
+ mov %rdx, 8(rp)
+ mov 8(up), %rax
+ mul %rax
+ mov %rax, 16(rp)
+ mov %rdx, 24(rp)
+ mov 16(up), %rax
+ mul %rax
+ mov %rax, 32(rp)
+ mov %rdx, 40(rp)
+
+ mov (up), v0
+ mov 8(up), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ mov 16(up), %rax
+ mul v0
+ xor R32(w2), R32(w2)
+ add %rax, w1
+ adc %rdx, w2
+
+ mov 8(up), v0
+ mov 16(up), %rax
+ mul v0
+ xor R32(w3), R32(w3)
+ add %rax, w2
+ adc %rdx, w3
+ add w0, w0
+ adc w1, w1
+ adc w2, w2
+ adc w3, w3
+ mov $0, R32(v0)
+ adc v0, v0
+ add w0, 8(rp)
+ adc w1, 16(rp)
+ adc w2, 24(rp)
+ adc w3, 32(rp)
+ adc v0, 40(rp)
+ FUNC_EXIT()
+ ret
+
+L(g3): push %rbx
+ push %rbp
+
+ mov 8(up), %rax
+ lea -24(rp,un_param,8), rp
+ lea -24(up,un_param,8), up
+ neg un_param
+ push un_param C for sqr_diag_addlsh1
+ lea (un_param), un
+ lea 3(un_param), n
+
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ jmp L(L3)
+
+ ALIGN(16)
+L(top): mov w0, -16(rp,n,8)
+ add w1, w2
+ adc $0, w3
+ mov (up,n,8), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ mov w2, -8(rp,n,8)
+ add w3, w0
+ adc $0, w1
+ mov 8(up,n,8), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ mov w0, (rp,n,8)
+ add w1, w2
+ adc $0, w3
+L(L3): mov 16(up,n,8), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ mov w2, 8(rp,n,8)
+ add w3, w0
+ adc $0, w1
+ mov 24(up,n,8), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ add $4, n
+ js L(top)
+
+ mov w0, -16(rp,n,8)
+ add w1, w2
+ adc $0, w3
+
+ test n, n
+ jz L(r2)
+ cmp $2, R32(n)
+ ja L(r3)
+ jz L(r0)
+
+
+L(r1): mov X((up,n,8),8(up)), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ mov w2, X(-8(rp,n,8),(rp))
+ add w3, w0
+ adc $0, w1
+ mov X(8(up,n,8),16(up)), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ mov w0, X((rp,n,8),8(rp))
+ add w1, w2
+ adc $0, w3
+ mov w2, X(8(rp,n,8),16(rp))
+ mov w3, X(16(rp,n,8),24(rp))
+ add $5, un
+ jmp L(to0)
+
+L(r2): mov X((up,n,8),(up)), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ mov w2, X(-8(rp,n,8),-8(rp))
+ add w3, w0
+ adc $0, w1
+ mov X(8(up,n,8),8(up)), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ mov w0, X((rp,n,8),(rp))
+ add w1, w2
+ adc $0, w3
+ mov X(16(up,n,8),16(up)), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ mov w2, X(8(rp,n,8),8(rp))
+ add w3, w0
+ adc $0, w1
+ mov w0, X(16(rp,n,8),16(rp))
+ adc $0, w3
+ mov w1, X(24(rp,n,8),24(rp))
+ add $6, un
+ jmp L(to1)
+
+L(r3): mov w2, X(-8(rp,n,8),16(rp))
+ mov w3, X((rp,n,8),24(rp))
+ add $3, un
+ jmp L(to2)
+
+L(r0): mov X((up,n,8),16(up)), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ mov w2, X(-8(rp,n,8),8(rp))
+ add w3, w0
+ adc $0, w1
+ mov w0, X((rp,n,8),16(rp))
+ mov w1, X(8(rp,n,8),24(rp))
+ add $4, un
+C jmp L(to3)
+C fall through into main loop
+
+
+L(outer):
+ mov un, n
+ mov (up,un,8), v0
+ mov 8(up,un,8), %rax
+ lea 8(rp), rp
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ jmp L(al3)
+
+ ALIGN(16)
+L(ta3): add w0, -16(rp,n,8)
+ adc w1, w2
+ adc $0, w3
+ mov (up,n,8), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ add w2, -8(rp,n,8)
+ adc w3, w0
+ adc $0, w1
+ mov 8(up,n,8), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ add w0, (rp,n,8)
+ adc w1, w2
+ adc $0, w3
+L(al3): mov 16(up,n,8), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ add w2, 8(rp,n,8)
+ adc w3, w0
+ adc $0, w1
+ mov 24(up,n,8), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ add $4, n
+ js L(ta3)
+
+ add w0, X(-16(rp,n,8),8(rp))
+ adc w1, w2
+ adc $0, w3
+ add w2, X(-8(rp,n,8),16(rp))
+ adc $0, w3
+ mov w3, X((rp,n,8),24(rp))
+
+
+L(to2): mov un, n
+ cmp $-4, R32(un)
+ jnc L(end)
+ add $4, un
+ mov 8(up,n,8), v0
+ mov 16(up,n,8), %rax
+ lea 8(rp), rp
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ jmp L(al2)
+
+ ALIGN(16)
+L(ta2): add w0, -16(rp,n,8)
+ adc w1, w2
+ adc $0, w3
+ mov (up,n,8), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ add w2, -8(rp,n,8)
+ adc w3, w0
+ adc $0, w1
+ mov 8(up,n,8), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ add w0, (rp,n,8)
+ adc w1, w2
+ adc $0, w3
+ mov 16(up,n,8), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ add w2, 8(rp,n,8)
+ adc w3, w0
+ adc $0, w1
+L(al2): mov 24(up,n,8), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ add $4, n
+ js L(ta2)
+
+ add w0, X(-16(rp,n,8),8(rp))
+ adc w1, w2
+ adc $0, w3
+ add w2, X(-8(rp,n,8),16(rp))
+ adc $0, w3
+ mov w3, X((rp,n,8),24(rp))
+
+
+L(to1): mov un, n
+ mov -16(up,un,8), v0
+ mov -8(up,un,8), %rax
+ lea 8(rp), rp
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ jmp L(al1)
+
+ ALIGN(16)
+L(ta1): add w0, -16(rp,n,8)
+ adc w1, w2
+ adc $0, w3
+L(al1): mov (up,n,8), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ add w2, -8(rp,n,8)
+ adc w3, w0
+ adc $0, w1
+ mov 8(up,n,8), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ add w0, (rp,n,8)
+ adc w1, w2
+ adc $0, w3
+ mov 16(up,n,8), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ add w2, 8(rp,n,8)
+ adc w3, w0
+ adc $0, w1
+ mov 24(up,n,8), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ add $4, n
+ js L(ta1)
+
+ add w0, X(-16(rp,n,8),8(rp))
+ adc w1, w2
+ adc $0, w3
+ add w2, X(-8(rp,n,8),16(rp))
+ adc $0, w3
+ mov w3, X((rp,n,8),24(rp))
+
+
+L(to0): mov un, n
+ mov -8(up,un,8), v0
+ mov (up,un,8), %rax
+ lea 8(rp), rp
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ jmp L(al0)
+
+ ALIGN(16)
+L(ta0): add w0, -16(rp,n,8)
+ adc w1, w2
+ adc $0, w3
+ mov (up,n,8), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ add w2, -8(rp,n,8)
+ adc w3, w0
+ adc $0, w1
+L(al0): mov 8(up,n,8), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ add w0, (rp,n,8)
+ adc w1, w2
+ adc $0, w3
+ mov 16(up,n,8), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ add w2, 8(rp,n,8)
+ adc w3, w0
+ adc $0, w1
+ mov 24(up,n,8), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ add $4, n
+ js L(ta0)
+
+ add w0, X(-16(rp,n,8),8(rp))
+ adc w1, w2
+ adc $0, w3
+ add w2, X(-8(rp,n,8),16(rp))
+ adc $0, w3
+ mov w3, X((rp,n,8),24(rp))
+ jmp L(outer)
+
+
+L(end): mov X(8(up,un,8),(up)), v0
+ mov X(16(up,un,8),8(up)), %rax
+ mul v0
+ mov %rax, w0
+ mov %rdx, w1
+ mov X(24(up,un,8),16(up)), %rax
+ mul v0
+ mov %rax, w2
+ mov %rdx, w3
+ add w0, X(24(rp,un,8),16(rp))
+ adc w1, w2
+ adc $0, w3
+ add w2, X(32(rp,un,8),24(rp))
+ adc $0, w3
+ mov X(16(up,un,8),8(up)), v0
+ mov X(24(up,un,8),16(up)), %rax
+ mul v0
+ add %rax, w3
+ mov w3, X(40(rp,un,8),32(rp))
+ adc $0, %rdx
+ mov %rdx, X(48(rp,un,8),40(rp))
+
+
+C sqr_diag_addlsh1
+
+ lea 16(up), up
+ lea 40(rp), rp
+ pop n
+ lea 2(n,n), n
+
+ mov (up,n,4), %rax
+ mul %rax
+ xor R32(w2), R32(w2)
+
+ mov 8(rp,n,8), w0
+ mov %rax, (rp,n,8)
+ jmp L(lm)
+
+ ALIGN(8)
+L(tsd): add %rbx, w0
+ adc %rax, w1
+ mov w0, -8(rp,n,8)
+ mov 8(rp,n,8), w0
+ mov w1, (rp,n,8)
+L(lm): mov 16(rp,n,8), w1
+ adc w0, w0
+ adc w1, w1
+ lea (%rdx,w2), %rbx
+ mov 8(up,n,4), %rax
+ setc R8(w2)
+ mul %rax
+ add $2, n
+ js L(tsd)
+
+L(esd): add %rbx, w0
+ adc %rax, w1
+ mov w0, X(-8(rp,n,8),-8(rp))
+ mov w1, X((rp,n,8),(rp))
+ adc w2, %rdx
+ mov %rdx, X(8(rp,n,8),8(rp))
+
+ pop %rbp
+ pop %rbx
+ FUNC_EXIT()
+ ret
+EPILOGUE()
dnl AMD64 mpn_com.
-dnl Copyright 2004, 2005, 2006 Free Software Foundation, Inc.
+dnl Copyright 2004, 2005, 2006, 2011, 2012 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
C cycles/limb
-C K8,K9: 1.25
-C K10: 1.25
-C P4: 2.78
-C P6-15: 1.1
+C AMD K8,K9 1.25
+C AMD K10 1.25
+C Intel P4 2.78
+C Intel core2 1.1
+C Intel corei 1.5
+C Intel atom ?
+C VIA nano 2
C INPUT PARAMETERS
define(`rp',`%rdi')
define(`up',`%rsi')
define(`n',`%rdx')
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
ASM_START()
TEXT
ALIGN(32)
PROLOGUE(mpn_com)
+ FUNC_ENTRY(3)
movq (up), %r8
- movl %edx, %eax
+ movl R32(%rdx), R32(%rax)
leaq (up,n,8), up
leaq (rp,n,8), rp
negq n
- andl $3, %eax
+ andl $3, R32(%rax)
je L(b00)
- cmpl $2, %eax
+ cmpl $2, R32(%rax)
jc L(b01)
je L(b10)
movq %r9, 24(rp,n,8)
addq $4, n
jnc L(oop)
-L(ret): ret
+L(ret): FUNC_EXIT()
+ ret
EPILOGUE()
dnl AMD64 mpn_copyd -- copy limb vector, decrementing.
-dnl Copyright 2003, 2005, 2007 Free Software Foundation, Inc.
+dnl Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
+C cycles/limb
+C AMD K8,K9 1
+C AMD K10 1
+C AMD bd1 1.36
+C AMD bobcat 1.71
+C Intel P4 2-3
+C Intel core2 1
+C Intel NHM 1
+C Intel SBR 1
+C Intel atom 2
+C VIA nano 2
-C cycles/limb
-C K8,K9: 1
-C K10: 1
-C P4: 2.8
-C P6 core2: 1.2
-C P6 corei7: 1
+IFSTD(`define(`rp',`%rdi')')
+IFSTD(`define(`up',`%rsi')')
+IFSTD(`define(`n', `%rdx')')
-C INPUT PARAMETERS
-C rp rdi
-C up rsi
-C n rdx
+IFDOS(`define(`rp',`%rcx')')
+IFDOS(`define(`up',`%rdx')')
+IFDOS(`define(`n', `%r8')')
-define(`rp',`%rdi')
-define(`up',`%rsi')
-define(`n',`%rdx')
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
ASM_START()
TEXT
- ALIGN(16)
+ ALIGN(64)
PROLOGUE(mpn_copyd)
- leaq -8(up,n,8), up
- leaq (rp,n,8), rp
- subq $4, n
+ lea -8(up,n,8), up
+ lea (rp,n,8), rp
+ sub $4, n
jc L(end)
- ALIGN(16)
-L(oop): movq (up), %r8
- movq -8(up), %r9
- leaq -32(rp), rp
- movq -16(up), %r10
- movq -24(up), %r11
- leaq -32(up), up
- movq %r8, 24(rp)
- movq %r9, 16(rp)
- subq $4, n
- movq %r10, 8(rp)
- movq %r11, (rp)
- jnc L(oop)
+ nop
-L(end): shrl %edx C edx = lowpart(n)
+L(top): mov (up), %rax
+ mov -8(up), %r9
+ lea -32(rp), rp
+ mov -16(up), %r10
+ mov -24(up), %r11
+ lea -32(up), up
+ mov %rax, 24(rp)
+ mov %r9, 16(rp)
+ sub $4, n
+ mov %r10, 8(rp)
+ mov %r11, (rp)
+ jnc L(top)
+
+L(end): shr R32(n)
jnc 1f
- movq (up), %r8
- movq %r8, -8(rp)
- leaq -8(rp), rp
- leaq -8(up), up
-1: shrl %edx C edx = lowpart(n)
+ mov (up), %rax
+ mov %rax, -8(rp)
+ lea -8(rp), rp
+ lea -8(up), up
+1: shr R32(n)
jnc 1f
- movq (up), %r8
- movq -8(up), %r9
- movq %r8, -8(rp)
- movq %r9, -16(rp)
+ mov (up), %rax
+ mov -8(up), %r9
+ mov %rax, -8(rp)
+ mov %r9, -16(rp)
1: ret
EPILOGUE()
dnl AMD64 mpn_copyi -- copy limb vector, incrementing.
-dnl Copyright 2003, 2005, 2007 Free Software Foundation, Inc.
+dnl Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
+C cycles/limb
+C AMD K8,K9 1
+C AMD K10 1
+C AMD bd1 1.36
+C AMD bobcat 1.71
+C Intel P4 2-3
+C Intel core2 1
+C Intel NHM 1
+C Intel SBR 1
+C Intel atom 2
+C VIA nano 2
-C cycles/limb
-C K8,K9: 1
-C K10: 1
-C P4: 2.8
-C P6-15: 1.2
+IFSTD(`define(`rp',`%rdi')')
+IFSTD(`define(`up',`%rsi')')
+IFSTD(`define(`n', `%rdx')')
-C INPUT PARAMETERS
-C rp rdi
-C up rsi
-C n rdx
+IFDOS(`define(`rp',`%rcx')')
+IFDOS(`define(`up',`%rdx')')
+IFDOS(`define(`n', `%r8')')
-define(`rp',`%rdi')
-define(`up',`%rsi')
-define(`n',`%rdx')
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
ASM_START()
TEXT
- ALIGN(16)
+ ALIGN(64)
+ .byte 0,0,0,0,0,0
PROLOGUE(mpn_copyi)
- leaq -8(rp), rp
- subq $4, n
+ lea -8(rp), rp
+ sub $4, n
jc L(end)
- ALIGN(16)
-L(oop): movq (up), %r8
- movq 8(up), %r9
- leaq 32(rp), rp
- movq 16(up), %r10
- movq 24(up), %r11
- leaq 32(up), up
- movq %r8, -24(rp)
- movq %r9, -16(rp)
- subq $4, n
- movq %r10, -8(rp)
- movq %r11, (rp)
- jnc L(oop)
-L(end): shrl %edx C edx = lowpart(n)
+L(top): mov (up), %rax
+ mov 8(up), %r9
+ lea 32(rp), rp
+ mov 16(up), %r10
+ mov 24(up), %r11
+ lea 32(up), up
+ mov %rax, -24(rp)
+ mov %r9, -16(rp)
+ sub $4, n
+ mov %r10, -8(rp)
+ mov %r11, (rp)
+ jnc L(top)
+
+L(end): shr R32(n)
jnc 1f
- movq (up), %r8
- movq %r8, 8(rp)
- leaq 8(rp), rp
- leaq 8(up), up
-1: shrl %edx C edx = lowpart(n)
+ mov (up), %rax
+ mov %rax, 8(rp)
+ lea 8(rp), rp
+ lea 8(up), up
+1: shr R32(n)
jnc 1f
- movq (up), %r8
- movq 8(up), %r9
- movq %r8, 8(rp)
- movq %r9, 16(rp)
+ mov (up), %rax
+ mov 8(up), %r9
+ mov %rax, 8(rp)
+ mov %r9, 16(rp)
1: ret
EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
+dnl AMD64 mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 1)
+define(RSH, 63)
+
+ifdef(`OPERATION_addlsh1_n', `
+ define(ADDSUB, add)
+ define(ADCSBB, adc)
+ define(func, mpn_addlsh1_n)')
+ifdef(`OPERATION_rsblsh1_n', `
+ define(ADDSUB, sub)
+ define(ADCSBB, sbb)
+ define(func, mpn_rsblsh1_n)')
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_rsblsh1_n)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+include_mpn(`x86_64/aorrlshC_n.asm')
--- /dev/null
+dnl AMD64 mpn_addlsh2_n -- rp[] = up[] + (vp[] << 2)
+dnl AMD64 mpn_rsblsh2_n -- rp[] = (vp[] << 2) - up[]
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 2)
+define(RSH, 62)
+
+ifdef(`OPERATION_addlsh2_n', `
+ define(ADDSUB, add)
+ define(ADCSBB, adc)
+ define(func, mpn_addlsh2_n)')
+ifdef(`OPERATION_rsblsh2_n', `
+ define(ADDSUB, sub)
+ define(ADCSBB, sbb)
+ define(func, mpn_rsblsh2_n)')
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_rsblsh2_n)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+include_mpn(`x86_64/aorrlshC_n.asm')
--- /dev/null
+dnl AMD64 mpn_addlsh_n and mpn_rsblsh_n. R = V2^k +- U.
+
+dnl Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_addlsh_n mpn_addlsh_nc mpn_rsblsh_n)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+include_mpn(`x86_64/coreinhm/aorrlsh_n.asm')
--- /dev/null
+dnl Core 2 mpn_add_err1_n, mpn_sub_err1_n
+
+dnl Contributed by David Harvey.
+
+dnl Copyright 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 ?
+C AMD K10 ?
+C Intel P4 ?
+C Intel core2 4.14
+C Intel corei ?
+C Intel atom ?
+C VIA nano ?
+
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`vp', `%rdx')
+define(`ep', `%rcx')
+define(`yp', `%r8')
+define(`n', `%r9')
+define(`cy_param', `8(%rsp)')
+
+define(`el', `%rbx')
+define(`eh', `%rbp')
+define(`t0', `%r10')
+define(`t1', `%r11')
+define(`t2', `%r12')
+define(`t3', `%r13')
+define(`w0', `%r14')
+define(`w1', `%r15')
+
+ifdef(`OPERATION_add_err1_n', `
+ define(ADCSBB, adc)
+ define(func, mpn_add_err1_n)')
+ifdef(`OPERATION_sub_err1_n', `
+ define(ADCSBB, sbb)
+ define(func, mpn_sub_err1_n)')
+
+MULFUNC_PROLOGUE(mpn_add_err1_n mpn_sub_err1_n)
+
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(func)
+ mov cy_param, %rax
+
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ lea (up,n,8), up
+ lea (vp,n,8), vp
+ lea (rp,n,8), rp
+
+ mov R32(n), R32(%r10)
+ and $3, R32(%r10)
+ jz L(0mod4)
+ cmp $2, R32(%r10)
+ jc L(1mod4)
+ jz L(2mod4)
+L(3mod4):
+ xor R32(el), R32(el)
+ xor R32(eh), R32(eh)
+ xor R32(t0), R32(t0)
+ xor R32(t1), R32(t1)
+ lea -24(yp,n,8), yp
+ neg n
+
+ shr $1, %al C restore carry
+ mov (up,n,8), w0
+ mov 8(up,n,8), w1
+ ADCSBB (vp,n,8), w0
+ mov w0, (rp,n,8)
+ cmovc 16(yp), el
+ ADCSBB 8(vp,n,8), w1
+ mov w1, 8(rp,n,8)
+ cmovc 8(yp), t0
+ mov 16(up,n,8), w0
+ ADCSBB 16(vp,n,8), w0
+ mov w0, 16(rp,n,8)
+ cmovc (yp), t1
+ setc %al C save carry
+ add t0, el
+ adc $0, eh
+ add t1, el
+ adc $0, eh
+
+ add $3, n
+ jnz L(loop)
+ jmp L(end)
+
+ ALIGN(16)
+L(0mod4):
+ xor R32(el), R32(el)
+ xor R32(eh), R32(eh)
+ lea (yp,n,8), yp
+ neg n
+ jmp L(loop)
+
+ ALIGN(16)
+L(1mod4):
+ xor R32(el), R32(el)
+ xor R32(eh), R32(eh)
+ lea -8(yp,n,8), yp
+ neg n
+
+ shr $1, %al C restore carry
+ mov (up,n,8), w0
+ ADCSBB (vp,n,8), w0
+ mov w0, (rp,n,8)
+ cmovc (yp), el
+ setc %al C save carry
+
+ add $1, n
+ jnz L(loop)
+ jmp L(end)
+
+ ALIGN(16)
+L(2mod4):
+ xor R32(el), R32(el)
+ xor R32(eh), R32(eh)
+ xor R32(t0), R32(t0)
+ lea -16(yp,n,8), yp
+ neg n
+
+ shr $1, %al C restore carry
+ mov (up,n,8), w0
+ mov 8(up,n,8), w1
+ ADCSBB (vp,n,8), w0
+ mov w0, (rp,n,8)
+ cmovc 8(yp), el
+ ADCSBB 8(vp,n,8), w1
+ mov w1, 8(rp,n,8)
+ cmovc (yp), t0
+ setc %al C save carry
+ add t0, el
+ adc $0, eh
+
+ add $2, n
+ jnz L(loop)
+ jmp L(end)
+
+ ALIGN(32)
+L(loop):
+ mov (up,n,8), w0
+ shr $1, %al C restore carry
+ mov -8(yp), t0
+ mov $0, R32(t3)
+ ADCSBB (vp,n,8), w0
+ cmovnc t3, t0
+ mov w0, (rp,n,8)
+ mov 8(up,n,8), w1
+ mov 16(up,n,8), w0
+ ADCSBB 8(vp,n,8), w1
+ mov -16(yp), t1
+ cmovnc t3, t1
+ mov -24(yp), t2
+ mov w1, 8(rp,n,8)
+ ADCSBB 16(vp,n,8), w0
+ cmovnc t3, t2
+ mov 24(up,n,8), w1
+ ADCSBB 24(vp,n,8), w1
+ cmovc -32(yp), t3
+ setc %al C save carry
+ add t0, el
+ adc $0, eh
+ add t1, el
+ adc $0, eh
+ add t2, el
+ adc $0, eh
+ lea -32(yp), yp
+ mov w0, 16(rp,n,8)
+ add t3, el
+ adc $0, eh
+ add $4, n
+ mov w1, -8(rp,n,8)
+ jnz L(loop)
+
+L(end):
+ mov el, (ep)
+ mov eh, 8(ep)
+
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+ ret
+EPILOGUE()
dnl Intel P6-15 mpn_add_n/mpn_sub_n -- mpn add or subtract.
-dnl Copyright 2006, 2007 Free Software Foundation, Inc.
+dnl Copyright 2006, 2007, 2011, 2012 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
C cycles/limb
-C K8,K9: 2.25
-C K10: 2
-C P4: 10
-C P6 core2: 2.05
-C P6 corei7: 2.3
+C AMD K8,K9 2.25
+C AMD K10 2
+C Intel P4 10
+C Intel core2 2.05
+C Intel NHM 2.3
+C Intel SBR 1.9
+C Intel atom ?
+C VIA nano ?
C INPUT PARAMETERS
define(`rp', `%rdi')
MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
-ASM_START()
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+ASM_START()
TEXT
ALIGN(16)
-
PROLOGUE(func_nc)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8 ')
jmp L(start)
EPILOGUE()
PROLOGUE(func)
+ FUNC_ENTRY(4)
xor %r8, %r8
L(start):
mov (up), %r10
lea -8(up,n,8), up
lea -8(vp,n,8), vp
lea -16(rp,n,8), rp
- mov %ecx, %eax
+ mov R32(%rcx), R32(%rax)
neg n
- and $3, %eax
+ and $3, R32(%rax)
je L(b00)
- add %rax, n C clear low rcx bits for jrcxz
- cmp $2, %eax
+ add %rax, n C clear low rcx bits for jrcxz
+ cmp $2, R32(%rax)
jl L(b01)
je L(b10)
L(end): ADCSBB %r11, %r10
mov %r10, 8(rp)
- mov %ecx, %eax C clear eax, ecx contains 0
- adc %eax, %eax
+ mov R32(%rcx), R32(%rax) C clear eax, ecx contains 0
+ adc R32(%rax), R32(%rax)
+ FUNC_EXIT()
ret
ALIGN(16)
+++ /dev/null
-dnl x86-64 mpn_addlsh1_n and mpn_sublsh1_n, optimized for "Core" 2.
-
-dnl Copyright 2008 Free Software Foundation, Inc.
-
-dnl This file is part of the GNU MP Library.
-
-dnl The GNU MP Library is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU Lesser General Public License as published
-dnl by the Free Software Foundation; either version 3 of the License, or (at
-dnl your option) any later version.
-
-dnl The GNU MP Library is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-dnl License for more details.
-
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-include(`../config.m4')
-
-C cycles/limb
-C K8,K9: 4.25
-C K10: ?
-C P4: ?
-C P6-15: 3
-
-C INPUT PARAMETERS
-define(`rp',`%rdi')
-define(`up',`%rsi')
-define(`vp',`%rdx')
-define(`n', `%rcx')
-
-ifdef(`OPERATION_addlsh1_n', `
- define(ADDSUB, add)
- define(ADCSBB, adc)
- define(func, mpn_addlsh1_n)')
-ifdef(`OPERATION_sublsh1_n', `
- define(ADDSUB, sub)
- define(ADCSBB, sbb)
- define(func, mpn_sublsh1_n)')
-
-MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
-
-ASM_START()
- TEXT
- ALIGN(8)
-PROLOGUE(func)
- push %rbx
- push %r12
-
- mov R32(%rcx), R32(%rax)
- lea 24(up,n,8), up
- lea 24(vp,n,8), vp
- lea 24(rp,n,8), rp
- neg n
-
- xor R32(%r11), R32(%r11)
-
- mov -24(vp,n,8), %r8 C do first limb early
- shrd $63, %r8, %r11
-
- and $3, R32(%rax)
- je L(b0)
- cmp $2, R32(%rax)
- jc L(b1)
- je L(b2)
-
-L(b3): mov -16(vp,n,8), %r9
- shrd $63, %r9, %r8
- mov -8(vp,n,8), %r10
- shrd $63, %r10, %r9
- mov -24(up,n,8), %r12
- ADDSUB %r11, %r12
- mov %r12, -24(rp,n,8)
- mov -16(up,n,8), %r12
- ADCSBB %r8, %r12
- mov %r12, -16(rp,n,8)
- mov -8(up,n,8), %r12
- ADCSBB %r9, %r12
- mov %r12, -8(rp,n,8)
- mov %r10, %r11
- sbb R32(%rax), R32(%rax) C save cy
- add $3, n
- js L(top)
- jmp L(end)
-
-L(b1): mov -24(up,n,8), %r12
- ADDSUB %r11, %r12
- mov %r12, -24(rp,n,8)
- mov %r8, %r11
- sbb R32(%rax), R32(%rax) C save cy
- inc n
- js L(top)
- jmp L(end)
-
-L(b2): mov -16(vp,n,8), %r9
- shrd $63, %r9, %r8
- mov -24(up,n,8), %r12
- ADDSUB %r11, %r12
- mov %r12, -24(rp,n,8)
- mov -16(up,n,8), %r12
- ADCSBB %r8, %r12
- mov %r12, -16(rp,n,8)
- mov %r9, %r11
- sbb R32(%rax), R32(%rax) C save cy
- add $2, n
- js L(top)
- jmp L(end)
-
- ALIGN(16)
-L(top): mov -24(vp,n,8), %r8
- shrd $63, %r8, %r11
-L(b0): mov -16(vp,n,8), %r9
- shrd $63, %r9, %r8
- mov -8(vp,n,8), %r10
- shrd $63, %r10, %r9
- mov (vp,n,8), %rbx
- shrd $63, %rbx, %r10
-
- add R32(%rax), R32(%rax) C restore cy
-
- mov -24(up,n,8), %r12
- ADCSBB %r11, %r12
- mov %r12, -24(rp,n,8)
-
- mov -16(up,n,8), %r12
- ADCSBB %r8, %r12
- mov %r12, -16(rp,n,8)
-
- mov -8(up,n,8), %r12
- ADCSBB %r9, %r12
- mov %r12, -8(rp,n,8)
-
- mov (up,n,8), %r12
- ADCSBB %r10, %r12
- mov %r12, (rp,n,8)
-
- mov %rbx, %r11
- sbb R32(%rax), R32(%rax) C save cy
-
- add $4, n
- js L(top)
-
-L(end): add %r11, %r11
- pop %r12
- pop %rbx
- sbb $0, R32(%rax)
- neg R32(%rax)
- ret
-EPILOGUE()
dnl x86-64 mpn_addmul_1 and mpn_submul_1, optimized for "Core 2".
-dnl Copyright 2003, 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc.
+dnl Copyright 2003, 2004, 2005, 2007, 2008, 2009, 2011, 2012 Free Software
+dnl Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
C cycles/limb
-C K8,K9: 4
-C K10: 4
-C P4: ?
-C P6 core2: 4.3-4.5 (fluctuating)
-C P6 corei7: 5
+C AMD K8,K9 4
+C AMD K10 4
+C AMD bd1 5.1
+C AMD bobcat
+C Intel P4 ?
+C Intel core2 4.3-4.5 (fluctuating)
+C Intel NHM 5.0
+C Intel SBR 4.1
+C Intel atom ?
+C VIA nano 5.25
C INPUT PARAMETERS
define(`rp', `%rdi')
ifdef(`OPERATION_addmul_1',`
define(`ADDSUB', `add')
- define(`func', `mpn_addmul_1')
+ define(`func', `mpn_addmul_1')
+ define(`func_1c', `mpn_addmul_1c')
')
ifdef(`OPERATION_submul_1',`
define(`ADDSUB', `sub')
- define(`func', `mpn_submul_1')
+ define(`func', `mpn_submul_1')
+ define(`func_1c', `mpn_submul_1c')
')
-MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_addmul_1c mpn_submul_1 mpn_submul_1c)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ C For DOS, on the stack we have four saved registers, return address,
+ C space for four register arguments, and finally the carry input.
+
+IFDOS(` define(`carry_in', `72(%rsp)')') dnl
+IFSTD(` define(`carry_in', `%r8')') dnl
ASM_START()
TEXT
+ ALIGN(16)
+PROLOGUE(func_1c)
+ FUNC_ENTRY(4)
+ push %rbx
+ push %rbp
+ lea (%rdx), %rbx
+ neg %rbx
+
+ mov (up), %rax
+ mov (rp), %r10
+
+ lea -16(rp,%rdx,8), rp
+ lea (up,%rdx,8), up
+ mul %rcx
+ add carry_in, %rax
+ adc $0, %rdx
+ jmp L(start_nc)
+EPILOGUE()
+
ALIGN(16)
PROLOGUE(func)
+ FUNC_ENTRY(4)
push %rbx
push %rbp
lea (%rdx), %rbx
lea (up,%rdx,8), up
mul %rcx
+L(start_nc):
bt $0, R32(%rbx)
jc L(odd)
adc %rdx, %rax
pop %rbp
pop %rbx
+ FUNC_EXIT()
ret
EPILOGUE()
--- /dev/null
+dnl X86-64 mpn_copyd optimised for Intel Sandy Bridge.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_copyd)
+include_mpn(`x86_64/fastsse/copyd-palignr.asm')
--- /dev/null
+dnl X86-64 mpn_copyi optimised for Intel Sandy Bridge.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_copyi)
+include_mpn(`x86_64/fastsse/copyi-palignr.asm')
--- /dev/null
+dnl x86-64 mpn_divrem_1 -- mpn by limb division.
+
+dnl Copyright 2004, 2005, 2007, 2008, 2009, 2010, 2012 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C norm unorm frac
+C AMD K8,K9 15 15 12
+C AMD K10 15 15 12
+C Intel P4 44 44 43
+C Intel core2 24 24 19.5
+C Intel corei 19 19 18
+C Intel atom 51 51 36
+C VIA nano 46 44 22.5
+
+C mp_limb_t
+C mpn_divrem_1 (mp_ptr qp, mp_size_t fn,
+C mp_srcptr np, mp_size_t nn, mp_limb_t d)
+
+C mp_limb_t
+C mpn_preinv_divrem_1 (mp_ptr qp, mp_size_t fn,
+C mp_srcptr np, mp_size_t nn, mp_limb_t d,
+C mp_limb_t dinv, int cnt)
+
+C INPUT PARAMETERS
+define(`qp', `%rdi')
+define(`fn_param', `%rsi')
+define(`up_param', `%rdx')
+define(`un_param', `%rcx')
+define(`d', `%r8')
+define(`dinv', `%r9') C only for mpn_preinv_divrem_1
+C shift passed on stack C only for mpn_preinv_divrem_1
+
+define(`cnt', `%rcx')
+define(`up', `%rsi')
+define(`fn', `%r12')
+define(`un', `%rbx')
+
+
+C rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15
+C cnt qp d dinv
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+IFSTD(`define(`CNTOFF', `40($1)')')
+IFDOS(`define(`CNTOFF', `104($1)')')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_preinv_divrem_1)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8 ')
+IFDOS(` mov 64(%rsp), %r9 ')
+ xor R32(%rax), R32(%rax)
+ push %r13
+ push %r12
+ push %rbp
+ push %rbx
+
+ mov fn_param, fn
+ mov un_param, un
+ add fn_param, un_param
+ mov up_param, up
+
+ lea -8(qp,un_param,8), qp
+
+ mov CNTOFF(%rsp), R8(cnt)
+ shl R8(cnt), d
+ jmp L(ent)
+EPILOGUE()
+
+ ALIGN(16)
+PROLOGUE(mpn_divrem_1)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8 ')
+ xor R32(%rax), R32(%rax)
+ push %r13
+ push %r12
+ push %rbp
+ push %rbx
+
+ mov fn_param, fn
+ mov un_param, un
+ add fn_param, un_param
+ mov up_param, up
+ je L(ret)
+
+ lea -8(qp,un_param,8), qp
+ xor R32(%rbp), R32(%rbp)
+
+L(unnormalized):
+ test un, un
+ je L(44)
+ mov -8(up,un,8), %rax
+ cmp d, %rax
+ jae L(44)
+ mov %rbp, (qp)
+ mov %rax, %rbp
+ lea -8(qp), qp
+ je L(ret)
+ dec un
+L(44):
+ bsr d, %rcx
+ not R32(%rcx)
+ sal R8(%rcx), d
+ sal R8(%rcx), %rbp
+
+ push %rcx
+IFSTD(` push %rdi ')
+IFSTD(` push %rsi ')
+ push %r8
+IFSTD(` mov d, %rdi ')
+IFDOS(` mov d, %rcx ')
+ CALL( mpn_invert_limb)
+ pop %r8
+IFSTD(` pop %rsi ')
+IFSTD(` pop %rdi ')
+ pop %rcx
+
+ mov %rax, dinv
+ mov %rbp, %rax
+ test un, un
+ je L(frac)
+L(ent): mov -8(up,un,8), %rbp
+ shr R8(%rcx), %rax
+ shld R8(%rcx), %rbp, %rax
+ sub $2, un
+ js L(end)
+
+ ALIGN(16)
+L(top): lea 1(%rax), %r11
+ mul dinv
+ mov (up,un,8), %r10
+ shld R8(%rcx), %r10, %rbp
+ mov %rbp, %r13
+ add %rax, %r13
+ adc %r11, %rdx
+ mov %rdx, %r11
+ imul d, %rdx
+ sub %rdx, %rbp
+ lea (d,%rbp), %rax
+ sub $8, qp
+ cmp %r13, %rbp
+ cmovc %rbp, %rax
+ adc $-1, %r11
+ cmp d, %rax
+ jae L(ufx)
+L(uok): dec un
+ mov %r11, 8(qp)
+ mov %r10, %rbp
+ jns L(top)
+
+L(end): lea 1(%rax), %r11
+ sal R8(%rcx), %rbp
+ mul dinv
+ add %rbp, %rax
+ adc %r11, %rdx
+ mov %rax, %r11
+ mov %rdx, %r13
+ imul d, %rdx
+ sub %rdx, %rbp
+ mov d, %rax
+ add %rbp, %rax
+ cmp %r11, %rbp
+ cmovc %rbp, %rax
+ adc $-1, %r13
+ cmp d, %rax
+ jae L(efx)
+L(eok): mov %r13, (qp)
+ sub $8, qp
+ jmp L(frac)
+
+L(ufx): sub d, %rax
+ inc %r11
+ jmp L(uok)
+L(efx): sub d, %rax
+ inc %r13
+ jmp L(eok)
+
+L(frac):mov d, %rbp
+ neg %rbp
+ jmp L(fent)
+
+ ALIGN(16) C K8-K10 P6-CNR P6-NHM P4
+L(ftop):mul dinv C 0,12 0,17 0,17
+ add %r11, %rdx C 5 8 10
+ mov %rax, %r11 C 4 8 3
+ mov %rdx, %r13 C 6 9 11
+ imul %rbp, %rdx C 6 9 11
+ mov d, %rax C
+ add %rdx, %rax C 10 14 14
+ cmp %r11, %rdx C 10 14 14
+ cmovc %rdx, %rax C 11 15 15
+ adc $-1, %r13 C
+ mov %r13, (qp) C
+ sub $8, qp C
+L(fent):lea 1(%rax), %r11 C
+ dec fn C
+ jns L(ftop) C
+
+ shr R8(%rcx), %rax
+L(ret): pop %rbx
+ pop %rbp
+ pop %r12
+ pop %r13
+ FUNC_EXIT()
+ ret
+EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_gcd_1 optimised for Intel C2, NHM, SBR and AMD K10, BD.
+
+dnl Based on the K7 gcd_1.asm, by Kevin Ryde. Rehacked for AMD64 by Torbjorn
+dnl Granlund.
+
+dnl Copyright 2000, 2001, 2002, 2005, 2009, 2011, 2012 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/bit (approx)
+C AMD K8,K9 8.50
+C AMD K10 4.30
+C AMD bd1 5.00
+C AMD bobcat 10.0
+C Intel P4 18.6
+C Intel core2 3.83
+C Intel NHM 5.17
+C Intel SBR 4.69
+C Intel atom 17.0
+C VIA nano 5.44
+C Numbers measured with: speed -CD -s16-64 -t48 mpn_gcd_1
+
+C TODO
+C * Optimise inner-loop for specific CPUs.
+C * Use DIV for 1-by-1 reductions, at least for some CPUs.
+
+C Threshold of when to call bmod when U is one limb. Should be about
+C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
+define(`BMOD_THRES_LOG2', 6)
+
+C INPUT PARAMETERS
+define(`up', `%rdi')
+define(`n', `%rsi')
+define(`v0', `%rdx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+IFDOS(`define(`STACK_ALLOC', 40)')
+IFSTD(`define(`STACK_ALLOC', 8)')
+
+C Undo some configure cleverness.
+C The problem is that C only defines the '1c' variant, and that configure
+C therefore considers modexact_1c to be the base function. It then adds a
+C special fat rule for mpn_modexact_1_odd, messing up things when a cpudep
+C gcd_1 exists without a corresponding cpudep mode1o.
+ifdef(`WANT_FAT_BINARY', `
+ define(`mpn_modexact_1_odd', `MPN_PREFIX`modexact_1_odd_x86_64'')')
+
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_gcd_1)
+ FUNC_ENTRY(3)
+ mov (up), %rax C U low limb
+ or v0, %rax
+ bsf %rax, %rax C min(ctz(u0),ctz(v0))
+
+ bsf v0, %rcx
+ shr R8(%rcx), v0
+
+ push %rax C preserve common twos over call
+ push v0 C preserve v0 argument over call
+ sub $STACK_ALLOC, %rsp C maintain ABI required rsp alignment
+
+ cmp $1, n
+ jnz L(reduce_nby1)
+
+C Both U and V are single limbs, reduce with bmod if u0 >> v0.
+ mov (up), %r8
+ mov %r8, %rax
+ shr $BMOD_THRES_LOG2, %r8
+ cmp %r8, v0
+ ja L(reduced)
+ jmp L(bmod)
+
+L(reduce_nby1):
+ cmp $BMOD_1_TO_MOD_1_THRESHOLD, n
+ jl L(bmod)
+IFDOS(` mov %rdx, %r8 ')
+IFDOS(` mov %rsi, %rdx ')
+IFDOS(` mov %rdi, %rcx ')
+ CALL( mpn_mod_1)
+ jmp L(reduced)
+L(bmod):
+IFDOS(` mov %rdx, %r8 ')
+IFDOS(` mov %rsi, %rdx ')
+IFDOS(` mov %rdi, %rcx ')
+ CALL( mpn_modexact_1_odd)
+L(reduced):
+
+ add $STACK_ALLOC, %rsp
+ pop %rdx
+
+ bsf %rax, %rcx
+C test %rax, %rax C FIXME: does this lower latency?
+ jnz L(mid)
+ jmp L(end)
+
+ ALIGN(16) C K10 BD C2 NHM SBR
+L(top): cmovc %r10, %rax C if x-y < 0 0,3 0,3 0,6 0,5 0,5
+ cmovc %r9, %rdx C use x,y-x 0,3 0,3 2,8 1,7 1,7
+L(mid): shr R8(%rcx), %rax C 1,7 1,6 2,8 2,8 2,8
+ mov %rdx, %r10 C 1 1 4 3 3
+ sub %rax, %r10 C 2 2 5 4 4
+ bsf %r10, %rcx C 3 3 6 5 5
+ mov %rax, %r9 C 2 2 3 3 4
+ sub %rdx, %rax C 2 2 4 3 4
+ jnz L(top) C
+
+L(end): pop %rcx
+ mov %rdx, %rax
+ shl R8(%rcx), %rax
+ FUNC_EXIT()
+ ret
+EPILOGUE()
/* Core 2 gmp-mparam.h -- Compiler/machine parameter header file.
Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
-2008, 2009, 2010 Free Software Foundation, Inc.
+2008, 2009, 2010, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 5
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 4
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 5
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 8
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 11
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 15
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 6
#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 26
+#define BMOD_1_TO_MOD_1_THRESHOLD 23
#define MUL_TOOM22_THRESHOLD 23
#define MUL_TOOM33_THRESHOLD 65
-#define MUL_TOOM44_THRESHOLD 183
-#define MUL_TOOM6H_THRESHOLD 254
-#define MUL_TOOM8H_THRESHOLD 381
+#define MUL_TOOM44_THRESHOLD 106
+#define MUL_TOOM6H_THRESHOLD 224
+#define MUL_TOOM8H_THRESHOLD 0 /* always */
#define MUL_TOOM32_TO_TOOM43_THRESHOLD 69
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 122
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 76
#define MUL_TOOM42_TO_TOOM53_THRESHOLD 73
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 74
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 72
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 100
#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
#define SQR_TOOM2_THRESHOLD 28
-#define SQR_TOOM3_THRESHOLD 97
+#define SQR_TOOM3_THRESHOLD 101
#define SQR_TOOM4_THRESHOLD 148
-#define SQR_TOOM6_THRESHOLD 254
+#define SQR_TOOM6_THRESHOLD 206
#define SQR_TOOM8_THRESHOLD 296
-#define MULMOD_BNM1_THRESHOLD 12
-#define SQRMOD_BNM1_THRESHOLD 14
+#define MULMID_TOOM42_THRESHOLD 24
+
+#define MULMOD_BNM1_THRESHOLD 18
+#define SQRMOD_BNM1_THRESHOLD 17
#define MUL_FFT_MODF_THRESHOLD 380 /* k = 5 */
#define MUL_FFT_TABLE3 \
{ { 380, 5}, { 15, 6}, { 8, 5}, { 17, 6}, \
- { 9, 5}, { 19, 6}, { 11, 5}, { 23, 6}, \
- { 19, 7}, { 10, 6}, { 21, 7}, { 11, 6}, \
- { 23, 7}, { 13, 6}, { 27, 7}, { 24, 8}, \
+ { 9, 5}, { 19, 6}, { 19, 7}, { 10, 6}, \
+ { 21, 7}, { 11, 6}, { 23, 7}, { 13, 6}, \
+ { 27, 7}, { 21, 8}, { 11, 7}, { 23, 8}, \
{ 13, 7}, { 27, 8}, { 15, 7}, { 31, 8}, \
{ 17, 7}, { 35, 8}, { 19, 7}, { 39, 8}, \
{ 21, 9}, { 11, 8}, { 27, 9}, { 15, 8}, \
{ 35, 9}, { 19, 8}, { 41, 9}, { 23, 8}, \
- { 47, 9}, { 27,10}, { 15, 9}, { 39,10}, \
- { 23, 9}, { 51,11}, { 15,10}, { 31, 9}, \
- { 63,10}, { 39, 9}, { 79,10}, { 47, 9}, \
- { 95,10}, { 55,11}, { 31,10}, { 87,11}, \
- { 47,12}, { 31,11}, { 63,10}, { 127, 9}, \
- { 255,10}, { 143,11}, { 79, 9}, { 319,11}, \
- { 95,10}, { 207,11}, { 111,12}, { 63,11}, \
- { 143,10}, { 287,11}, { 159,10}, { 319,11}, \
- { 175,12}, { 95,11}, { 191,10}, { 383,11}, \
+ { 47, 9}, { 27,10}, { 15, 9}, { 43,10}, \
+ { 23, 9}, { 55,11}, { 15,10}, { 31, 9}, \
+ { 67,10}, { 39, 9}, { 79,10}, { 55,11}, \
+ { 31, 9}, { 127,10}, { 71, 8}, { 287,10}, \
+ { 79,11}, { 47,12}, { 31,11}, { 63, 9}, \
+ { 255,10}, { 135, 9}, { 271,11}, { 79, 9}, \
+ { 319,10}, { 175,11}, { 95,10}, { 191, 9}, \
+ { 383,11}, { 111,12}, { 63,11}, { 127,10}, \
+ { 271, 9}, { 543,11}, { 143,10}, { 287,11}, \
+ { 159,10}, { 319, 9}, { 639,11}, { 175,10}, \
+ { 351,12}, { 95,11}, { 191,10}, { 383,11}, \
{ 207,10}, { 415,13}, { 63,12}, { 127,11}, \
- { 287,10}, { 575,12}, { 159,11}, { 319,10}, \
- { 639,11}, { 351,10}, { 703,11}, { 367,12}, \
- { 191,11}, { 415,10}, { 831,12}, { 223,11}, \
- { 447,10}, { 895,11}, { 479,13}, { 127,12}, \
- { 255,11}, { 543,12}, { 287,11}, { 607,12}, \
- { 319,11}, { 639,12}, { 351,11}, { 703,13}, \
- { 191,12}, { 415,11}, { 831,12}, { 447,11}, \
- { 895,12}, { 479,14}, { 127,13}, { 255,12}, \
- { 543,11}, { 1087,12}, { 607,13}, { 319,12}, \
- { 735,13}, { 383,12}, { 831,13}, { 447,12}, \
- { 959,14}, { 255,13}, { 511,12}, { 1087,13}, \
- { 575,12}, { 1215,13}, { 639,12}, { 1279,13}, \
- { 703,14}, { 383,13}, { 831,12}, { 1663,13}, \
- { 959,15}, { 255,14}, { 511,13}, { 1087,12}, \
- { 2175,13}, { 1215,14}, { 639,13}, { 1343,12}, \
- { 2687,13}, { 1407,12}, { 2815,13}, { 1471,14}, \
- { 767,13}, { 1535,12}, { 3071,13}, { 1663,14}, \
- { 895,13}, { 1791,12}, { 3583,13}, { 1919,15}, \
+ { 271,10}, { 543,11}, { 287,12}, { 159,11}, \
+ { 319,10}, { 671,11}, { 351,12}, { 191,11}, \
+ { 415,12}, { 223,11}, { 447,10}, { 895,11}, \
+ { 479,13}, { 127,12}, { 287,11}, { 607,12}, \
+ { 319,11}, { 671,12}, { 351,13}, { 191,12}, \
+ { 415,11}, { 831,10}, { 1663,12}, { 479,14}, \
+ { 127,13}, { 255,12}, { 543,11}, { 1087,12}, \
+ { 607,13}, { 319,12}, { 703,13}, { 383,12}, \
+ { 767,10}, { 3071,12}, { 831,13}, { 447,12}, \
+ { 959,14}, { 255,13}, { 511,12}, { 1023,13}, \
+ { 575,12}, { 1151,11}, { 2303,13}, { 639,12}, \
+ { 1343,13}, { 703,14}, { 383,13}, { 831,12}, \
+ { 1727,13}, { 959,15}, { 255,14}, { 511,13}, \
+ { 1087,12}, { 2175,13}, { 1215,14}, { 639,13}, \
+ { 1343,12}, { 2687,13}, { 1407,12}, { 2815,14}, \
+ { 767,13}, { 1663,14}, { 895,13}, { 1919,15}, \
{ 511,14}, { 1023,13}, { 2175,14}, { 1151,13}, \
- { 2303,12}, { 4607,13}, { 2431,12}, { 4863,14}, \
- { 1279,13}, { 2559,14}, { 1407,15}, { 767,14}, \
- { 1535,13}, { 3199,14}, { 1663,13}, { 3455,12}, \
- { 6911,14}, { 1791,13}, { 3583,14}, { 16384,15}, \
- { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
- { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
- {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 173
+ { 2431,12}, { 4863,13}, { 2495,14}, { 1279,13}, \
+ { 2687,14}, { 1407,15}, { 767,14}, { 1663,13}, \
+ { 3327,12}, { 6655,13}, { 3455,12}, { 6911,14}, \
+ { 16384,15}, { 32768,16}, { 65536,17}, { 131072,18}, \
+ { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+ {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 170
#define MUL_FFT_THRESHOLD 4736
-#define SQR_FFT_MODF_THRESHOLD 256 /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD 308 /* k = 5 */
#define SQR_FFT_TABLE3 \
- { { 256, 5}, { 8, 4}, { 17, 5}, { 9, 4}, \
- { 19, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
+ { { 308, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
{ 21, 7}, { 11, 6}, { 23, 7}, { 12, 6}, \
- { 25, 7}, { 21, 8}, { 11, 7}, { 25, 8}, \
+ { 25, 7}, { 21, 8}, { 11, 7}, { 24, 8}, \
{ 13, 7}, { 27, 8}, { 15, 7}, { 31, 8}, \
{ 21, 9}, { 11, 8}, { 27, 9}, { 15, 8}, \
{ 35, 9}, { 19, 8}, { 41, 9}, { 23, 8}, \
{ 47, 9}, { 27,10}, { 15, 9}, { 39,10}, \
- { 23, 9}, { 47,11}, { 15,10}, { 31, 9}, \
- { 63,10}, { 39, 9}, { 79,10}, { 55,11}, \
- { 31,10}, { 79,11}, { 47,10}, { 95,12}, \
- { 31,11}, { 63, 8}, { 511,10}, { 135, 9}, \
- { 271,10}, { 143,11}, { 79,10}, { 159, 9}, \
- { 319,10}, { 175,11}, { 95,10}, { 191, 9}, \
- { 383,10}, { 207,11}, { 111,12}, { 63,11}, \
- { 127,10}, { 271,11}, { 143,10}, { 287, 9}, \
- { 575,10}, { 303,11}, { 159,10}, { 319, 9}, \
- { 639,12}, { 95,11}, { 191,10}, { 383,11}, \
- { 207,13}, { 63,12}, { 127,11}, { 271,10}, \
- { 543,11}, { 287,10}, { 575,12}, { 159,11}, \
- { 351,12}, { 191,11}, { 415,12}, { 223,11}, \
- { 447,10}, { 895,11}, { 479,13}, { 127,12}, \
- { 255,11}, { 543,12}, { 287,11}, { 607,12}, \
- { 319,11}, { 639,12}, { 351,13}, { 191,12}, \
- { 415,11}, { 831,12}, { 479,11}, { 959,14}, \
- { 127,13}, { 255,12}, { 607,13}, { 319,12}, \
- { 703,13}, { 383,12}, { 831,13}, { 447,12}, \
- { 895,14}, { 255,13}, { 511,12}, { 1023,13}, \
- { 575,12}, { 1215,13}, { 639,12}, { 1279,13}, \
- { 703,14}, { 383,13}, { 767,12}, { 1535,13}, \
- { 831,12}, { 1663,13}, { 959,15}, { 255,14}, \
- { 511,13}, { 1087,12}, { 2175,13}, { 1215,14}, \
- { 639,13}, { 1343,12}, { 2687,13}, { 1407,12}, \
- { 2815,14}, { 767,13}, { 1663,14}, { 895,13}, \
+ { 23, 9}, { 59,11}, { 15,10}, { 31, 8}, \
+ { 125, 9}, { 67,10}, { 39, 9}, { 79,10}, \
+ { 47, 9}, { 103,10}, { 79, 9}, { 159,10}, \
+ { 87, 9}, { 175, 8}, { 351,11}, { 47,10}, \
+ { 95,11}, { 63,10}, { 127, 8}, { 511, 9}, \
+ { 271, 8}, { 543,11}, { 79,10}, { 175,11}, \
+ { 95,10}, { 191, 9}, { 415,12}, { 63,11}, \
+ { 127,10}, { 255,11}, { 143,10}, { 287, 9}, \
+ { 575,11}, { 159,10}, { 319,11}, { 175,10}, \
+ { 351,12}, { 95,11}, { 191,10}, { 383,11}, \
+ { 207,10}, { 415,13}, { 63,12}, { 127,11}, \
+ { 255,10}, { 511, 9}, { 1023,10}, { 543,11}, \
+ { 287,10}, { 575,11}, { 303,12}, { 159,11}, \
+ { 319,10}, { 639,11}, { 351,12}, { 191,11}, \
+ { 383,10}, { 767,11}, { 415,10}, { 831,12}, \
+ { 223,11}, { 479,13}, { 127,12}, { 255,11}, \
+ { 543,12}, { 287,11}, { 575,12}, { 319,11}, \
+ { 639,12}, { 351,13}, { 191,12}, { 383,11}, \
+ { 767,12}, { 415,11}, { 831,12}, { 447,11}, \
+ { 895,12}, { 479,14}, { 127,13}, { 255,12}, \
+ { 543,11}, { 1087,12}, { 607,13}, { 319,12}, \
+ { 639,11}, { 1279,12}, { 703,13}, { 383,12}, \
+ { 831,13}, { 447,12}, { 959,14}, { 255,13}, \
+ { 511,12}, { 1087,13}, { 575,12}, { 1215,13}, \
+ { 639,12}, { 1279,13}, { 703,14}, { 383,13}, \
+ { 767,12}, { 1535,13}, { 831,12}, { 1663,13}, \
+ { 959,15}, { 255,14}, { 511,13}, { 1087,12}, \
+ { 2175,13}, { 1215,14}, { 639,13}, { 1343,12}, \
+ { 2687,13}, { 1407,12}, { 2815,14}, { 767,13}, \
+ { 1535,12}, { 3071,13}, { 1663,14}, { 895,13}, \
{ 1791,15}, { 511,14}, { 1023,13}, { 2175,14}, \
{ 1151,13}, { 2303,12}, { 4607,13}, { 2431,12}, \
{ 4863,14}, { 1279,13}, { 2687,14}, { 1407,13}, \
{ 6911,14}, { 1791,16}, { 65536,17}, { 131072,18}, \
{ 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
{4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 166
-#define SQR_FFT_THRESHOLD 3200
+#define SQR_FFT_TABLE3_SIZE 178
+#define SQR_FFT_THRESHOLD 3520
-#define MULLO_BASECASE_THRESHOLD 3
-#define MULLO_DC_THRESHOLD 20
-#define MULLO_MUL_N_THRESHOLD 8648
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 55
+#define MULLO_MUL_N_THRESHOLD 9174
-#define DC_DIV_QR_THRESHOLD 46
-#define DC_DIVAPPR_Q_THRESHOLD 190
-#define DC_BDIV_QR_THRESHOLD 57
-#define DC_BDIV_Q_THRESHOLD 156
+#define DC_DIV_QR_THRESHOLD 54
+#define DC_DIVAPPR_Q_THRESHOLD 179
+#define DC_BDIV_QR_THRESHOLD 53
+#define DC_BDIV_Q_THRESHOLD 125
-#define INV_MULMOD_BNM1_THRESHOLD 50
-#define INV_NEWTON_THRESHOLD 172
+#define INV_MULMOD_BNM1_THRESHOLD 62
+#define INV_NEWTON_THRESHOLD 173
#define INV_APPR_THRESHOLD 172
-#define BINV_NEWTON_THRESHOLD 240
+#define BINV_NEWTON_THRESHOLD 230
#define REDC_1_TO_REDC_2_THRESHOLD 10
#define REDC_2_TO_REDC_N_THRESHOLD 63
#define MU_DIV_QR_THRESHOLD 1334
-#define MU_DIVAPPR_Q_THRESHOLD 1334
-#define MUPI_DIV_QR_THRESHOLD 81
-#define MU_BDIV_QR_THRESHOLD 1037
-#define MU_BDIV_Q_THRESHOLD 1334
-
-#define MATRIX22_STRASSEN_THRESHOLD 18
-#define HGCD_THRESHOLD 138
-#define GCD_DC_THRESHOLD 465
-#define GCDEXT_DC_THRESHOLD 365
-#define JACOBI_BASE_METHOD 1
-
-#define GET_STR_DC_THRESHOLD 9
-#define GET_STR_PRECOMPUTE_THRESHOLD 20
+#define MU_DIVAPPR_Q_THRESHOLD 1210
+#define MUPI_DIV_QR_THRESHOLD 79
+#define MU_BDIV_QR_THRESHOLD 1057
+#define MU_BDIV_Q_THRESHOLD 1187
+
+#define POWM_SEC_TABLE 2,65,322,780
+
+
+#define MATRIX22_STRASSEN_THRESHOLD 17
+#define HGCD_THRESHOLD 135
+#define HGCD_APPR_THRESHOLD 178
+#define HGCD_REDUCE_THRESHOLD 2121
+#define GCD_DC_THRESHOLD 416
+#define GCDEXT_DC_THRESHOLD 361
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 12
+#define GET_STR_PRECOMPUTE_THRESHOLD 21
#define SET_STR_DC_THRESHOLD 552
-#define SET_STR_PRECOMPUTE_THRESHOLD 1790
+#define SET_STR_PRECOMPUTE_THRESHOLD 1815
+
+#define FAC_DSC_THRESHOLD 608
+#define FAC_ODD_THRESHOLD 28
dnl x86-64 mpn_lshift optimized for "Core 2".
-dnl Copyright 2007, 2009 Free Software Foundation, Inc.
-dnl
+dnl Copyright 2007, 2009, 2011, 2012 Free Software Foundation, Inc.
+
dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or
-dnl modify it under the terms of the GNU Lesser General Public License as
-dnl published by the Free Software Foundation; either version 3 of the
-dnl License, or (at your option) any later version.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl Lesser General Public License for more details.
-dnl
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
C cycles/limb
-C K8,K9: 4.25
-C K10: 4.25
-C P4: 14.7
-C P6 core2: 1.27
-C P6 corei7: 1.5
+C AMD K8,K9 4.25
+C AMD K10 4.25
+C Intel P4 14.7
+C Intel core2 1.27
+C Intel NHM 1.375 (up to about n = 260, then 1.5)
+C Intel SBR 1.87
+C Intel atom ?
+C VIA nano ?
C INPUT PARAMETERS
define(`rp', `%rdi')
define(`up', `%rsi')
define(`n', `%rdx')
-define(`cnt', `%cl')
+define(`cnt', `%rcx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_lshift)
+ FUNC_ENTRY(4)
lea -8(rp,n,8), rp
lea -8(up,n,8), up
- mov %edx, %eax
- and $3, %eax
+ mov R32(%rdx), R32(%rax)
+ and $3, R32(%rax)
jne L(nb00)
L(b00): C n = 4, 8, 12, ...
mov (up), %r10
mov -8(up), %r11
- xor %eax, %eax
- shld %cl, %r10, %rax
+ xor R32(%rax), R32(%rax)
+ shld R8(cnt), %r10, %rax
mov -16(up), %r8
lea 24(rp), rp
sub $4, n
jmp L(00)
L(nb00):C n = 1, 5, 9, ...
- cmp $2, %eax
+ cmp $2, R32(%rax)
jae L(nb01)
L(b01): mov (up), %r9
- xor %eax, %eax
- shld %cl, %r9, %rax
+ xor R32(%rax), R32(%rax)
+ shld R8(cnt), %r9, %rax
sub $2, n
jb L(le1)
mov -8(up), %r10
lea -8(up), up
lea 16(rp), rp
jmp L(01)
-L(le1): shl %cl, %r9
+L(le1): shl R8(cnt), %r9
mov %r9, (rp)
+ FUNC_EXIT()
ret
L(nb01):C n = 2, 6, 10, ...
jne L(b11)
L(b10): mov (up), %r8
mov -8(up), %r9
- xor %eax, %eax
- shld %cl, %r8, %rax
+ xor R32(%rax), R32(%rax)
+ shld R8(cnt), %r8, %rax
sub $3, n
jb L(le2)
mov -16(up), %r10
lea -16(up), up
lea 8(rp), rp
jmp L(10)
-L(le2): shld %cl, %r9, %r8
+L(le2): shld R8(cnt), %r9, %r8
mov %r8, (rp)
- shl %cl, %r9
+ shl R8(cnt), %r9
mov %r9, -8(rp)
+ FUNC_EXIT()
ret
ALIGN(16) C performance critical!
L(b11): C n = 3, 7, 11, ...
mov (up), %r11
mov -8(up), %r8
- xor %eax, %eax
- shld %cl, %r11, %rax
+ xor R32(%rax), R32(%rax)
+ shld R8(cnt), %r11, %rax
mov -16(up), %r9
lea -24(up), up
sub $4, n
jb L(end)
ALIGN(16)
-L(top): shld %cl, %r8, %r11
+L(top): shld R8(cnt), %r8, %r11
mov (up), %r10
mov %r11, (rp)
-L(10): shld %cl, %r9, %r8
+L(10): shld R8(cnt), %r9, %r8
mov -8(up), %r11
mov %r8, -8(rp)
-L(01): shld %cl, %r10, %r9
+L(01): shld R8(cnt), %r10, %r9
mov -16(up), %r8
mov %r9, -16(rp)
-L(00): shld %cl, %r11, %r10
+L(00): shld R8(cnt), %r11, %r10
mov -24(up), %r9
mov %r10, -24(rp)
add $-32, up
sub $4, n
jnc L(top)
-L(end): shld %cl, %r8, %r11
+L(end): shld R8(cnt), %r8, %r11
mov %r11, (rp)
- shld %cl, %r9, %r8
+ shld R8(cnt), %r9, %r8
mov %r8, -8(rp)
- shl %cl, %r9
+ shl R8(cnt), %r9
mov %r9, -16(rp)
+ FUNC_EXIT()
ret
EPILOGUE()
dnl x86-64 mpn_lshiftc optimized for "Core 2".
-dnl Copyright 2007, 2009 Free Software Foundation, Inc.
-dnl
+dnl Copyright 2007, 2009, 2011, 2012 Free Software Foundation, Inc.
+
dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or
-dnl modify it under the terms of the GNU Lesser General Public License as
-dnl published by the Free Software Foundation; either version 3 of the
-dnl License, or (at your option) any later version.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl Lesser General Public License for more details.
-dnl
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
C cycles/limb
-C K8,K9: ?
-C K10: ?
-C P4: ?
-C P6 core2: 1.5
-C P6 corei7: 1.75
+C AMD K8,K9 ?
+C AMD K10 ?
+C Intel P4 ?
+C Intel core2 1.5
+C Intel NHM 2.25 (up to about n = 260, then 1.875)
+C Intel SBR 2.25
+C Intel atom ?
+C VIA nano ?
C INPUT PARAMETERS
define(`rp', `%rdi')
define(`up', `%rsi')
define(`n', `%rdx')
-define(`cnt', `%cl')
+define(`cnt', `%rcx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_lshiftc)
+ FUNC_ENTRY(4)
lea -8(rp,n,8), rp
lea -8(up,n,8), up
- mov %edx, %eax
- and $3, %eax
+ mov R32(%rdx), R32(%rax)
+ and $3, R32(%rax)
jne L(nb00)
L(b00): C n = 4, 8, 12, ...
mov (up), %r10
mov -8(up), %r11
- xor %eax, %eax
- shld %cl, %r10, %rax
+ xor R32(%rax), R32(%rax)
+ shld R8(cnt), %r10, %rax
mov -16(up), %r8
lea 24(rp), rp
sub $4, n
jmp L(00)
L(nb00):C n = 1, 5, 9, ...
- cmp $2, %eax
+ cmp $2, R32(%rax)
jae L(nb01)
L(b01): mov (up), %r9
- xor %eax, %eax
- shld %cl, %r9, %rax
+ xor R32(%rax), R32(%rax)
+ shld R8(cnt), %r9, %rax
sub $2, n
jb L(le1)
mov -8(up), %r10
lea -8(up), up
lea 16(rp), rp
jmp L(01)
-L(le1): shl %cl, %r9
+L(le1): shl R8(cnt), %r9
not %r9
mov %r9, (rp)
+ FUNC_EXIT()
ret
L(nb01):C n = 2, 6, 10, ...
jne L(b11)
L(b10): mov (up), %r8
mov -8(up), %r9
- xor %eax, %eax
- shld %cl, %r8, %rax
+ xor R32(%rax), R32(%rax)
+ shld R8(cnt), %r8, %rax
sub $3, n
jb L(le2)
mov -16(up), %r10
lea -16(up), up
lea 8(rp), rp
jmp L(10)
-L(le2): shld %cl, %r9, %r8
+L(le2): shld R8(cnt), %r9, %r8
not %r8
mov %r8, (rp)
- shl %cl, %r9
+ shl R8(cnt), %r9
not %r9
mov %r9, -8(rp)
+ FUNC_EXIT()
ret
ALIGN(16) C performance critical!
L(b11): C n = 3, 7, 11, ...
mov (up), %r11
mov -8(up), %r8
- xor %eax, %eax
- shld %cl, %r11, %rax
+ xor R32(%rax), R32(%rax)
+ shld R8(cnt), %r11, %rax
mov -16(up), %r9
lea -24(up), up
sub $4, n
jb L(end)
ALIGN(16)
-L(top): shld %cl, %r8, %r11
+L(top): shld R8(cnt), %r8, %r11
mov (up), %r10
not %r11
mov %r11, (rp)
-L(10): shld %cl, %r9, %r8
+L(10): shld R8(cnt), %r9, %r8
mov -8(up), %r11
not %r8
mov %r8, -8(rp)
-L(01): shld %cl, %r10, %r9
+L(01): shld R8(cnt), %r10, %r9
mov -16(up), %r8
not %r9
mov %r9, -16(rp)
-L(00): shld %cl, %r11, %r10
+L(00): shld R8(cnt), %r11, %r10
mov -24(up), %r9
not %r10
mov %r10, -24(rp)
sub $4, n
jnc L(top)
-L(end): shld %cl, %r8, %r11
+L(end): shld R8(cnt), %r8, %r11
not %r11
mov %r11, (rp)
- shld %cl, %r9, %r8
+ shld R8(cnt), %r9, %r8
not %r8
mov %r8, -8(rp)
- shl %cl, %r9
+ shl R8(cnt), %r9
not %r9
mov %r9, -16(rp)
+ FUNC_EXIT()
ret
EPILOGUE()
--- /dev/null
+dnl X86-64 mpn_rsh1add_n, mpn_rsh1sub_n optimised for Intel Conroe/Penryn.
+
+dnl Copyright 2003, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 ?
+C AMD K10 ?
+C Intel P4 ?
+C Intel core2 3.05
+C Intel NHM 3.3
+C Intel SBR 2.5
+C Intel atom ?
+C VIA nano ?
+
+C TODO
+C * Loopmix to approach 2.5 c/l on NHM.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`vp', `%rdx')
+define(`n', `%rcx')
+
+ifdef(`OPERATION_rsh1add_n', `
+ define(ADDSUB, add)
+ define(ADCSBB, adc)
+ define(func_n, mpn_rsh1add_n)
+ define(func_nc, mpn_rsh1add_nc)')
+ifdef(`OPERATION_rsh1sub_n', `
+ define(ADDSUB, sub)
+ define(ADCSBB, sbb)
+ define(func_n, mpn_rsh1sub_n)
+ define(func_nc, mpn_rsh1sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1add_nc mpn_rsh1sub_n mpn_rsh1sub_nc)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(func_nc)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8 ')
+ push %rbx
+ push %rbp
+
+ neg %r8 C set C flag from parameter
+ mov (up), %r8
+ ADCSBB (vp), %r8
+ jmp L(ent)
+EPILOGUE()
+
+ ALIGN(16)
+PROLOGUE(func_n)
+ FUNC_ENTRY(4)
+ push %rbx
+ push %rbp
+
+ mov (up), %r8
+ ADDSUB (vp), %r8
+L(ent): sbb R32(%rbx), R32(%rbx) C save cy
+ mov %r8, %rax
+ and $1, R32(%rax) C return value
+
+ lea (up,n,8), up
+ lea (vp,n,8), vp
+ lea (rp,n,8), rp
+ mov R32(n), R32(%rbp)
+ neg n
+ and $3, R32(%rbp)
+ jz L(b0)
+ cmp $2, R32(%rbp)
+ jae L(n1)
+
+L(b1): mov %r8, %rbp
+ inc n
+ js L(top)
+ jmp L(end)
+
+L(n1): jnz L(b3)
+ add R32(%rbx), R32(%rbx) C restore cy
+ mov 8(up,n,8), %r11
+ ADCSBB 8(vp,n,8), %r11
+ sbb R32(%rbx), R32(%rbx) C save cy
+ mov %r8, %r10
+ add $-2, n
+ jmp L(2)
+
+L(b3): add R32(%rbx), R32(%rbx) C restore cy
+ mov 8(up,n,8), %r10
+ mov 16(up,n,8), %r11
+ ADCSBB 8(vp,n,8), %r10
+ ADCSBB 16(vp,n,8), %r11
+ sbb R32(%rbx), R32(%rbx) C save cy
+ mov %r8, %r9
+ dec n
+ jmp L(3)
+
+L(b0): add R32(%rbx), R32(%rbx) C restore cy
+ mov 8(up,n,8), %r9
+ mov 16(up,n,8), %r10
+ mov 24(up,n,8), %r11
+ ADCSBB 8(vp,n,8), %r9
+ ADCSBB 16(vp,n,8), %r10
+ ADCSBB 24(vp,n,8), %r11
+ sbb R32(%rbx), R32(%rbx) C save cy
+ jmp L(4)
+
+ ALIGN(16)
+
+L(top): add R32(%rbx), R32(%rbx) C restore cy
+ mov (up,n,8), %r8
+ mov 8(up,n,8), %r9
+ mov 16(up,n,8), %r10
+ mov 24(up,n,8), %r11
+ ADCSBB (vp,n,8), %r8
+ ADCSBB 8(vp,n,8), %r9
+ ADCSBB 16(vp,n,8), %r10
+ ADCSBB 24(vp,n,8), %r11
+ sbb R32(%rbx), R32(%rbx) C save cy
+ shrd $1, %r8, %rbp
+ mov %rbp, -8(rp,n,8)
+L(4): shrd $1, %r9, %r8
+ mov %r8, (rp,n,8)
+L(3): shrd $1, %r10, %r9
+ mov %r9, 8(rp,n,8)
+L(2): shrd $1, %r11, %r10
+ mov %r10, 16(rp,n,8)
+L(1): add $4, n
+ mov %r11, %rbp
+ js L(top)
+
+L(end): shrd $1, %rbx, %rbp
+ mov %rbp, -8(rp)
+ pop %rbp
+ pop %rbx
+ FUNC_EXIT()
+ ret
+EPILOGUE()
dnl x86-64 mpn_rshift optimized for "Core 2".
-dnl Copyright 2007, 2009 Free Software Foundation, Inc.
-dnl
+dnl Copyright 2007, 2009, 2011, 2012 Free Software Foundation, Inc.
+
dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or
-dnl modify it under the terms of the GNU Lesser General Public License as
-dnl published by the Free Software Foundation; either version 3 of the
-dnl License, or (at your option) any later version.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl Lesser General Public License for more details.
-dnl
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
C cycles/limb
-C K8,K9: 4.25
-C K10: 4.25
-C P4: 14.7
-C P6 core2: 1.27
-C P6 corei7: 1.5
+C AMD K8,K9 4.25
+C AMD K10 4.25
+C Intel P4 14.7
+C Intel core2 1.27
+C Intel NHM 1.375 (up to about n = 260, then 1.5)
+C Intel SBR 1.77
+C Intel atom ?
+C VIA nano ?
C INPUT PARAMETERS
define(`rp', `%rdi')
define(`up', `%rsi')
define(`n', `%rdx')
-define(`cnt', `%cl')
+define(`cnt', `%rcx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_rshift)
- mov %edx, %eax
- and $3, %eax
+ FUNC_ENTRY(4)
+ mov R32(%rdx), R32(%rax)
+ and $3, R32(%rax)
jne L(nb00)
L(b00): C n = 4, 8, 12, ...
mov (up), %r10
mov 8(up), %r11
- xor %eax, %eax
- shrd %cl, %r10, %rax
+ xor R32(%rax), R32(%rax)
+ shrd R8(cnt), %r10, %rax
mov 16(up), %r8
lea 8(up), up
lea -24(rp), rp
jmp L(00)
L(nb00):C n = 1, 5, 9, ...
- cmp $2, %eax
+ cmp $2, R32(%rax)
jae L(nb01)
L(b01): mov (up), %r9
- xor %eax, %eax
- shrd %cl, %r9, %rax
+ xor R32(%rax), R32(%rax)
+ shrd R8(cnt), %r9, %rax
sub $2, n
jb L(le1)
mov 8(up), %r10
lea 16(up), up
lea -16(rp), rp
jmp L(01)
-L(le1): shr %cl, %r9
+L(le1): shr R8(cnt), %r9
mov %r9, (rp)
+ FUNC_EXIT()
ret
L(nb01):C n = 2, 6, 10, ...
jne L(b11)
L(b10): mov (up), %r8
mov 8(up), %r9
- xor %eax, %eax
- shrd %cl, %r8, %rax
+ xor R32(%rax), R32(%rax)
+ shrd R8(cnt), %r8, %rax
sub $3, n
jb L(le2)
mov 16(up), %r10
lea 24(up), up
lea -8(rp), rp
jmp L(10)
-L(le2): shrd %cl, %r9, %r8
+L(le2): shrd R8(cnt), %r9, %r8
mov %r8, (rp)
- shr %cl, %r9
+ shr R8(cnt), %r9
mov %r9, 8(rp)
+ FUNC_EXIT()
ret
ALIGN(16)
L(b11): C n = 3, 7, 11, ...
mov (up), %r11
mov 8(up), %r8
- xor %eax, %eax
- shrd %cl, %r11, %rax
+ xor R32(%rax), R32(%rax)
+ shrd R8(cnt), %r11, %rax
mov 16(up), %r9
lea 32(up), up
sub $4, n
jb L(end)
ALIGN(16)
-L(top): shrd %cl, %r8, %r11
+L(top): shrd R8(cnt), %r8, %r11
mov -8(up), %r10
mov %r11, (rp)
-L(10): shrd %cl, %r9, %r8
+L(10): shrd R8(cnt), %r9, %r8
mov (up), %r11
mov %r8, 8(rp)
-L(01): shrd %cl, %r10, %r9
+L(01): shrd R8(cnt), %r10, %r9
mov 8(up), %r8
mov %r9, 16(rp)
-L(00): shrd %cl, %r11, %r10
+L(00): shrd R8(cnt), %r11, %r10
mov 16(up), %r9
mov %r10, 24(rp)
add $32, up
sub $4, n
jnc L(top)
-L(end): shrd %cl, %r8, %r11
+L(end): shrd R8(cnt), %r8, %r11
mov %r11, (rp)
- shrd %cl, %r9, %r8
+ shrd R8(cnt), %r9, %r8
mov %r8, 8(rp)
- shr %cl, %r9
+ shr R8(cnt), %r9
mov %r9, 16(rp)
+ FUNC_EXIT()
ret
EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_sublsh1_n optimised for Core 2 and Core iN.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 1)
+define(RSH, 63)
+
+define(ADDSUB, sub)
+define(ADCSBB, sbb)
+define(func, mpn_sublsh1_n)
+
+MULFUNC_PROLOGUE(mpn_sublsh1_n)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+include_mpn(`x86_64/core2/sublshC_n.asm')
--- /dev/null
+dnl AMD64 mpn_sublsh2_n optimised for Core 2 and Core iN.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 2)
+define(RSH, 62)
+
+define(ADDSUB, sub)
+define(ADCSBB, sbb)
+define(func, mpn_sublsh2_n)
+
+MULFUNC_PROLOGUE(mpn_sublsh2_n)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+include_mpn(`x86_64/core2/sublshC_n.asm')
--- /dev/null
+dnl AMD64 mpn_sublshC_n -- rp[] = up[] - (vp[] << 1), optimised for Core 2 and
+dnl Core iN.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+C cycles/limb
+C AMD K8,K9 4.25
+C AMD K10 ?
+C Intel P4 ?
+C Intel core2 3
+C Intel NHM 3.1
+C Intel SBR 2.47
+C Intel atom ?
+C VIA nano ?
+
+C INPUT PARAMETERS
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`vp',`%rdx')
+define(`n', `%rcx')
+
+ASM_START()
+ TEXT
+ ALIGN(8)
+PROLOGUE(func)
+ FUNC_ENTRY(4)
+ push %rbx
+ push %r12
+
+ mov R32(%rcx), R32(%rax)
+ lea 24(up,n,8), up
+ lea 24(vp,n,8), vp
+ lea 24(rp,n,8), rp
+ neg n
+
+ xor R32(%r11), R32(%r11)
+
+ mov -24(vp,n,8), %r8 C do first limb early
+ shrd $RSH, %r8, %r11
+
+ and $3, R32(%rax)
+ je L(b0)
+ cmp $2, R32(%rax)
+ jc L(b1)
+ je L(b2)
+
+L(b3): mov -16(vp,n,8), %r9
+ shrd $RSH, %r9, %r8
+ mov -8(vp,n,8), %r10
+ shrd $RSH, %r10, %r9
+ mov -24(up,n,8), %r12
+ ADDSUB %r11, %r12
+ mov %r12, -24(rp,n,8)
+ mov -16(up,n,8), %r12
+ ADCSBB %r8, %r12
+ mov %r12, -16(rp,n,8)
+ mov -8(up,n,8), %r12
+ ADCSBB %r9, %r12
+ mov %r12, -8(rp,n,8)
+ mov %r10, %r11
+ sbb R32(%rax), R32(%rax) C save cy
+ add $3, n
+ js L(top)
+ jmp L(end)
+
+L(b1): mov -24(up,n,8), %r12
+ ADDSUB %r11, %r12
+ mov %r12, -24(rp,n,8)
+ mov %r8, %r11
+ sbb R32(%rax), R32(%rax) C save cy
+ inc n
+ js L(top)
+ jmp L(end)
+
+L(b2): mov -16(vp,n,8), %r9
+ shrd $RSH, %r9, %r8
+ mov -24(up,n,8), %r12
+ ADDSUB %r11, %r12
+ mov %r12, -24(rp,n,8)
+ mov -16(up,n,8), %r12
+ ADCSBB %r8, %r12
+ mov %r12, -16(rp,n,8)
+ mov %r9, %r11
+ sbb R32(%rax), R32(%rax) C save cy
+ add $2, n
+ js L(top)
+ jmp L(end)
+
+ ALIGN(16)
+L(top): mov -24(vp,n,8), %r8
+ shrd $RSH, %r8, %r11
+L(b0): mov -16(vp,n,8), %r9
+ shrd $RSH, %r9, %r8
+ mov -8(vp,n,8), %r10
+ shrd $RSH, %r10, %r9
+ mov (vp,n,8), %rbx
+ shrd $RSH, %rbx, %r10
+
+ add R32(%rax), R32(%rax) C restore cy
+
+ mov -24(up,n,8), %r12
+ ADCSBB %r11, %r12
+ mov %r12, -24(rp,n,8)
+
+ mov -16(up,n,8), %r12
+ ADCSBB %r8, %r12
+ mov %r12, -16(rp,n,8)
+
+ mov -8(up,n,8), %r12
+ ADCSBB %r9, %r12
+ mov %r12, -8(rp,n,8)
+
+ mov (up,n,8), %r12
+ ADCSBB %r10, %r12
+ mov %r12, (rp,n,8)
+
+ mov %rbx, %r11
+ sbb R32(%rax), R32(%rax) C save cy
+
+ add $4, n
+ js L(top)
+
+L(end): shr $RSH, %r11
+ pop %r12
+ pop %rbx
+ sub R32(%r11), R32(%rax)
+ neg R32(%rax)
+ FUNC_EXIT()
+ ret
+EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_addlsh_n -- rp[] = up[] + (vp[] << k)
+dnl AMD64 mpn_rsblsh_n -- rp[] = (vp[] << k) - up[]
+dnl Optimised for Nehalem.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 ?
+C AMD K10 4.75
+C Intel P4 ?
+C Intel core2 2.8-3
+C Intel NHM 2.8
+C Intel SBR 3.55
+C Intel atom ?
+C VIA nano ?
+
+C The inner-loop probably runs close to optimally on Nehalem (using 4-way
+C unrolling). The rest of the code is quite crude, and could perhaps be made
+C both smaller and faster.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`vp', `%rdx')
+define(`n', `%rcx')
+define(`cnt', `%r8')
+define(`cy', `%r9') C for _nc variant
+
+ifdef(`OPERATION_addlsh_n', `
+ define(ADDSUB, add)
+ define(ADCSBB, adc)
+ define(IFRSB, )
+ define(func_n, mpn_addlsh_n)
+ define(func_nc, mpn_addlsh_nc)')
+ifdef(`OPERATION_rsblsh_n', `
+ define(ADDSUB, sub)
+ define(ADCSBB, sbb)
+ define(IFRSB, `$1')
+ define(func_n, mpn_rsblsh_n)
+ define(func_nc, mpn_rsblsh_nc)')
+
+C mpn_rsblsh_nc removed below, its idea of carry-in is inconsistent with
+C refmpn_rsblsh_nc
+MULFUNC_PROLOGUE(mpn_addlsh_n mpn_addlsh_nc mpn_rsblsh_n)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(32)
+PROLOGUE(func_n)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8d ') C cnt
+ push %rbx
+ xor R32(%rbx), R32(%rbx) C clear CF save register
+L(ent): push %rbp
+ mov R32(n), R32(%rbp)
+ mov n, %rax
+
+ mov R32(cnt), R32(%rcx)
+ neg R32(%rcx)
+
+ lea -8(up,%rax,8), up
+ lea -8(vp,%rax,8), vp
+ lea -40(rp,%rax,8), rp
+ neg %rax
+
+ and $3, R32(%rbp)
+ jz L(b0)
+ cmp $2, R32(%rbp)
+ jc L(b1)
+ jz L(b2)
+
+L(b3): xor R32(%r9), R32(%r9)
+ mov 8(vp,%rax,8), %r10
+ mov 16(vp,%rax,8), %r11
+ shrd %cl, %r10, %r9
+ shrd %cl, %r11, %r10
+ add R32(%rbx), R32(%rbx)
+ ADCSBB 8(up,%rax,8), %r9
+ mov 24(vp,%rax,8), %r8
+ ADCSBB 16(up,%rax,8), %r10
+ sbb R32(%rbx), R32(%rbx)
+ add $3, %rax
+ jmp L(lo3)
+
+L(b0): mov 8(vp,%rax,8), %r9
+ xor R32(%r8), R32(%r8)
+ shrd %cl, %r9, %r8
+ mov 16(vp,%rax,8), %r10
+ mov 24(vp,%rax,8), %r11
+ shrd %cl, %r10, %r9
+ shrd %cl, %r11, %r10
+ add R32(%rbx), R32(%rbx)
+ ADCSBB 8(up,%rax,8), %r8
+ mov %r8, 40(rp,%rax,8) C offset 40
+ ADCSBB 16(up,%rax,8), %r9
+ mov 32(vp,%rax,8), %r8
+ ADCSBB 24(up,%rax,8), %r10
+ sbb R32(%rbx), R32(%rbx)
+ add $4, %rax
+ jmp L(lo0)
+
+L(b1): mov 8(vp,%rax,8), %r8
+ add $1, %rax
+ jz L(1)
+ mov 8(vp,%rax,8), %r9
+ xor R32(%rbp), R32(%rbp)
+ jmp L(lo1)
+L(1): xor R32(%r11), R32(%r11)
+ jmp L(wd1)
+
+L(b2): xor %r10, %r10
+ mov 8(vp,%rax,8), %r11
+ shrd %cl, %r11, %r10
+ add R32(%rbx), R32(%rbx)
+ mov 16(vp,%rax,8), %r8
+ ADCSBB 8(up,%rax,8), %r10
+ sbb R32(%rbx), R32(%rbx)
+ add $2, %rax
+ jz L(end)
+
+ ALIGN(16)
+L(top): mov 8(vp,%rax,8), %r9
+ mov %r11, %rbp
+L(lo2): mov %r10, 24(rp,%rax,8) C offset 24
+L(lo1): shrd %cl, %r8, %rbp
+ shrd %cl, %r9, %r8
+ mov 16(vp,%rax,8), %r10
+ mov 24(vp,%rax,8), %r11
+ shrd %cl, %r10, %r9
+ shrd %cl, %r11, %r10
+ add R32(%rbx), R32(%rbx)
+ ADCSBB (up,%rax,8), %rbp
+ ADCSBB 8(up,%rax,8), %r8
+ mov %r8, 40(rp,%rax,8) C offset 40
+ ADCSBB 16(up,%rax,8), %r9
+ mov 32(vp,%rax,8), %r8
+ ADCSBB 24(up,%rax,8), %r10
+ sbb R32(%rbx), R32(%rbx)
+ add $4, %rax
+ mov %rbp, (rp,%rax,8) C offset 32
+L(lo0):
+L(lo3): mov %r9, 16(rp,%rax,8) C offset 48
+ jnz L(top)
+
+L(end): mov %r10, 24(rp,%rax,8)
+L(wd1): shrd %cl, %r8, %r11
+ add R32(%rbx), R32(%rbx)
+ ADCSBB (up,%rax,8), %r11
+ mov %r11, 32(rp,%rax,8) C offset 32
+ adc R32(%rax), R32(%rax) C rax is zero after loop
+ shr R8(%rcx), %r8
+ ADDSUB %r8, %rax
+IFRSB( neg %rax)
+ pop %rbp
+ pop %rbx
+ FUNC_EXIT()
+ ret
+EPILOGUE()
+PROLOGUE(func_nc)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8d ') C cnt
+IFDOS(` mov 64(%rsp), %r9 ') C cy
+ push %rbx
+ neg cy
+ sbb R32(%rbx), R32(%rbx) C initialise CF save register
+ jmp L(ent)
+EPILOGUE()
/* Nehalem gmp-mparam.h -- Compiler/machine parameter header file.
Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
-2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 6
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 6
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 14
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 19
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 11
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 16
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 9
#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 18
+#define BMOD_1_TO_MOD_1_THRESHOLD 16
#define MUL_TOOM22_THRESHOLD 18
-#define MUL_TOOM33_THRESHOLD 65
-#define MUL_TOOM44_THRESHOLD 166
-#define MUL_TOOM6H_THRESHOLD 254
-#define MUL_TOOM8H_THRESHOLD 333
+#define MUL_TOOM33_THRESHOLD 57
+#define MUL_TOOM44_THRESHOLD 169
+#define MUL_TOOM6H_THRESHOLD 222
+#define MUL_TOOM8H_THRESHOLD 288
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 69
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 91
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 97
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 96
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 65
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 108
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 99
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 105
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 82
#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 28
-#define SQR_TOOM3_THRESHOLD 105
+#define SQR_TOOM2_THRESHOLD 30
+#define SQR_TOOM3_THRESHOLD 101
#define SQR_TOOM4_THRESHOLD 250
-#define SQR_TOOM6_THRESHOLD 366
-#define SQR_TOOM8_THRESHOLD 478
+#define SQR_TOOM6_THRESHOLD 306
+#define SQR_TOOM8_THRESHOLD 454
-#define MULMOD_BNM1_THRESHOLD 13
+#define MULMID_TOOM42_THRESHOLD 22
+
+#define MULMOD_BNM1_THRESHOLD 11
#define SQRMOD_BNM1_THRESHOLD 13
#define MUL_FFT_MODF_THRESHOLD 380 /* k = 5 */
#define MUL_FFT_TABLE3 \
{ { 380, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
- { 11, 5}, { 23, 6}, { 21, 7}, { 11, 6}, \
- { 23, 7}, { 24, 8}, { 13, 7}, { 27, 8}, \
+ { 10, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
+ { 21, 7}, { 11, 6}, { 23, 7}, { 21, 8}, \
+ { 11, 7}, { 24, 8}, { 13, 7}, { 27, 8}, \
{ 15, 7}, { 31, 8}, { 21, 9}, { 11, 8}, \
{ 27, 9}, { 15, 8}, { 33, 9}, { 19, 8}, \
{ 39, 9}, { 23, 8}, { 47, 9}, { 27,10}, \
- { 15, 9}, { 39,10}, { 23, 9}, { 51,11}, \
+ { 15, 9}, { 39,10}, { 23, 9}, { 47,11}, \
{ 15,10}, { 31, 9}, { 67,10}, { 39, 9}, \
{ 79,10}, { 47, 9}, { 95,10}, { 55,11}, \
{ 31,10}, { 79,11}, { 47,10}, { 95,12}, \
{ 31,11}, { 63,10}, { 135,11}, { 79,10}, \
- { 159, 9}, { 319, 8}, { 639,10}, { 167,11}, \
- { 95,10}, { 191,12}, { 63,11}, { 127,10}, \
- { 255, 9}, { 511,11}, { 143,10}, { 287, 9}, \
- { 575,11}, { 159,10}, { 319,12}, { 95,11}, \
- { 191,10}, { 383,11}, { 207,13}, { 8192,14}, \
- { 16384,15}, { 32768,16}, { 65536,17}, { 131072,18}, \
- { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
- {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 74
+ { 159,11}, { 95,10}, { 191, 9}, { 383,12}, \
+ { 63,11}, { 127,10}, { 255, 9}, { 511,11}, \
+ { 143,10}, { 287, 9}, { 575,10}, { 303,11}, \
+ { 159,10}, { 319,12}, { 95,11}, { 191,10}, \
+ { 383,11}, { 207,13}, { 63,12}, { 127,11}, \
+ { 255,10}, { 511,11}, { 271,10}, { 543,11}, \
+ { 287,10}, { 575,11}, { 303,12}, { 159,11}, \
+ { 319,10}, { 639,11}, { 351,10}, { 703,12}, \
+ { 191,11}, { 383,10}, { 767,11}, { 415,10}, \
+ { 831,12}, { 223,11}, { 447,10}, { 895,13}, \
+ { 127,12}, { 255,11}, { 511,10}, { 1023,11}, \
+ { 543,12}, { 287,11}, { 607,12}, { 319,11}, \
+ { 639,12}, { 351,11}, { 703,10}, { 1407,13}, \
+ { 191,12}, { 383,11}, { 767,12}, { 415,11}, \
+ { 831,10}, { 1663,12}, { 447,11}, { 895,12}, \
+ { 479,14}, { 127,13}, { 255,12}, { 511,11}, \
+ { 1023,12}, { 543,11}, { 1087,12}, { 575,11}, \
+ { 1151,12}, { 607,13}, { 319,12}, { 703,11}, \
+ { 1407,13}, { 383,12}, { 831,11}, { 1663,13}, \
+ { 447,12}, { 959,11}, { 1919,14}, { 16384,15}, \
+ { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
+ { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+ {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 137
#define MUL_FFT_THRESHOLD 3712
-#define SQR_FFT_MODF_THRESHOLD 308 /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD 304 /* k = 5 */
#define SQR_FFT_TABLE3 \
- { { 308, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
+ { { 304, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
{ 21, 7}, { 11, 6}, { 23, 7}, { 21, 8}, \
{ 11, 7}, { 24, 8}, { 13, 7}, { 27, 8}, \
{ 15, 7}, { 31, 8}, { 21, 9}, { 11, 8}, \
{ 27, 9}, { 15, 8}, { 33, 9}, { 19, 8}, \
{ 41, 9}, { 23, 8}, { 47, 9}, { 27,10}, \
- { 15, 9}, { 43,10}, { 23, 9}, { 47,11}, \
+ { 15, 9}, { 39,10}, { 23, 9}, { 47,11}, \
{ 15,10}, { 31, 9}, { 67,10}, { 39, 9}, \
- { 79,10}, { 47, 9}, { 95,10}, { 55,11}, \
- { 31,10}, { 79,11}, { 47,10}, { 95,12}, \
- { 31,11}, { 63,10}, { 127, 9}, { 255, 8}, \
- { 511,10}, { 135,11}, { 79,10}, { 159, 9}, \
- { 319,11}, { 95,10}, { 191, 9}, { 383, 8}, \
- { 767,12}, { 63,10}, { 255,11}, { 143, 9}, \
- { 575, 8}, { 1151,11}, { 159,10}, { 319, 9}, \
- { 639,11}, { 175,12}, { 95,11}, { 191,10}, \
- { 383,13}, { 8192,14}, { 16384,15}, { 32768,16}, \
- { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
- {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 76
+ { 79,10}, { 47,11}, { 31,10}, { 79,11}, \
+ { 47,12}, { 31,11}, { 63,10}, { 127, 9}, \
+ { 255,11}, { 79,10}, { 159, 9}, { 319,11}, \
+ { 95,10}, { 191, 9}, { 383,12}, { 63,11}, \
+ { 127,10}, { 255, 9}, { 511,10}, { 271, 9}, \
+ { 543,11}, { 143,10}, { 287, 9}, { 575,11}, \
+ { 159,10}, { 319,11}, { 175,12}, { 95,11}, \
+ { 191,10}, { 383,11}, { 207,13}, { 63,12}, \
+ { 127,11}, { 255,10}, { 511,11}, { 271,10}, \
+ { 543,11}, { 287,10}, { 575,12}, { 159,11}, \
+ { 319,10}, { 639,11}, { 351,10}, { 703,12}, \
+ { 191,11}, { 383,10}, { 767,11}, { 415,10}, \
+ { 831,12}, { 223,11}, { 447,10}, { 895,11}, \
+ { 479,13}, { 127,12}, { 255,11}, { 511,10}, \
+ { 1023,11}, { 543,12}, { 287,11}, { 575,10}, \
+ { 1151,12}, { 319,11}, { 639,12}, { 351,11}, \
+ { 703,13}, { 191,12}, { 383,11}, { 767,12}, \
+ { 415,11}, { 831,12}, { 447,11}, { 895,12}, \
+ { 479,11}, { 959,14}, { 127,13}, { 255,12}, \
+ { 511,11}, { 1023,12}, { 543,11}, { 1087,12}, \
+ { 575,11}, { 1151,12}, { 607,13}, { 319,12}, \
+ { 639,11}, { 1279,12}, { 703,11}, { 1407,13}, \
+ { 383,12}, { 767,11}, { 1535,12}, { 831,13}, \
+ { 447,12}, { 959,11}, { 1919,14}, { 16384,15}, \
+ { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
+ { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+ {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 137
#define SQR_FFT_THRESHOLD 3200
#define MULLO_BASECASE_THRESHOLD 0 /* always */
-#define MULLO_DC_THRESHOLD 21
+#define MULLO_DC_THRESHOLD 45
#define MULLO_MUL_N_THRESHOLD 6633
#define DC_DIV_QR_THRESHOLD 38
-#define DC_DIVAPPR_Q_THRESHOLD 133
-#define DC_BDIV_QR_THRESHOLD 32
-#define DC_BDIV_Q_THRESHOLD 70
+#define DC_DIVAPPR_Q_THRESHOLD 123
+#define DC_BDIV_QR_THRESHOLD 36
+#define DC_BDIV_Q_THRESHOLD 26
-#define INV_MULMOD_BNM1_THRESHOLD 46
-#define INV_NEWTON_THRESHOLD 195
+#define INV_MULMOD_BNM1_THRESHOLD 35
+#define INV_NEWTON_THRESHOLD 163
#define INV_APPR_THRESHOLD 147
#define BINV_NEWTON_THRESHOLD 230
-#define REDC_1_TO_REDC_2_THRESHOLD 12
-#define REDC_2_TO_REDC_N_THRESHOLD 59
-
-#define MU_DIV_QR_THRESHOLD 1334
-#define MU_DIVAPPR_Q_THRESHOLD 1360
-#define MUPI_DIV_QR_THRESHOLD 74
-#define MU_BDIV_QR_THRESHOLD 1142
-#define MU_BDIV_Q_THRESHOLD 1308
-
-#define MATRIX22_STRASSEN_THRESHOLD 17
-#define HGCD_THRESHOLD 125
-#define GCD_DC_THRESHOLD 330
-#define GCDEXT_DC_THRESHOLD 382
-#define JACOBI_BASE_METHOD 1
-
-#define GET_STR_DC_THRESHOLD 13
-#define GET_STR_PRECOMPUTE_THRESHOLD 24
-#define SET_STR_DC_THRESHOLD 230
-#define SET_STR_PRECOMPUTE_THRESHOLD 1660
+#define REDC_1_TO_REDC_2_THRESHOLD 10
+#define REDC_2_TO_REDC_N_THRESHOLD 54
+
+#define MU_DIV_QR_THRESHOLD 1187
+#define MU_DIVAPPR_Q_THRESHOLD 1187
+#define MUPI_DIV_QR_THRESHOLD 75
+#define MU_BDIV_QR_THRESHOLD 1078
+#define MU_BDIV_Q_THRESHOLD 1142
+
+#define POWM_SEC_TABLE 2,65,322,1036,2699
+
+#define MATRIX22_STRASSEN_THRESHOLD 16
+#define HGCD_THRESHOLD 142
+#define HGCD_APPR_THRESHOLD 177
+#define HGCD_REDUCE_THRESHOLD 2121
+#define GCD_DC_THRESHOLD 345
+#define GCDEXT_DC_THRESHOLD 372
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 12
+#define GET_STR_PRECOMPUTE_THRESHOLD 20
+#define SET_STR_DC_THRESHOLD 378
+#define SET_STR_PRECOMPUTE_THRESHOLD 1585
+
+#define FAC_DSC_THRESHOLD 351
+#define FAC_ODD_THRESHOLD 43
--- /dev/null
+dnl AMD64 mpn_hamdist -- hamming distance.
+
+dnl Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_hamdist)
+include_mpn(`x86_64/k10/hamdist.asm')
--- /dev/null
+dnl AMD64 mpn_popcount -- population count.
+
+dnl Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_popcount)
+include_mpn(`x86_64/k10/popcount.asm')
--- /dev/null
+dnl X86-64 mpn_addmul_2 optimised for Intel Sandy Bridge.
+
+dnl Copyright 2008, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9
+C AMD K10 4.07
+C AMD bd1
+C AMD bobcat 5.25
+C Intel P4 16.1
+C Intel core2
+C Intel NHM
+C Intel SBR 3.2
+C Intel atom
+C VIA nano 5.23
+
+C This code is the result of running a code generation and optimisation tool
+C suite written by David Harvey and Torbjorn Granlund.
+
+C TODO
+C * Tune feed-in and wind-down code.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`n_param',`%rdx')
+define(`vp', `%rcx')
+
+define(`v0', `%r12')
+define(`v1', `%r13')
+define(`n', `%r11')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_addmul_2)
+ FUNC_ENTRY(4)
+ push %rbx
+ push %r12
+ push %r13
+ push %r14
+
+ mov (up), %rax
+
+ mov n_param, n
+ mov 0(vp), v0
+ mov 8(vp), v1
+ shr $2, n
+ and $3, R32(n_param)
+ jz L(b0)
+ cmp $2, R32(n_param)
+ jb L(b1)
+ jz L(b2)
+
+L(b3): mov (rp), %r10
+ mov $0, R32(%rcx)
+ mul v0
+ add %rax, %r10
+ mov %rdx, %r14
+ adc $0, %r14
+ lea -16(rp), rp
+ lea -16(up), up
+ mov $0, R32(%r9)
+ mov $0, R32(%rbx)
+ inc n
+ jmp L(L3)
+
+L(b0): mov (rp), %r8
+ mul v0
+ add %rax, %r8
+ mov %rdx, %r9
+ adc $0, %r9
+ mov $0, R32(%rbx)
+ lea -8(rp), rp
+ lea -8(up), up
+ jmp L(L0)
+
+L(b1): mov (rp), %r10
+ mov $0, R32(%rcx)
+ mul v0
+ add %rax, %r10
+ mov %rdx, %r14
+ adc $0, %r14
+ mov %r10, 0(rp)
+ jmp L(L1)
+
+L(b2): mov (rp), %r8
+ mul v0
+ add %rax, %r8
+ mov $0, R32(%rbx)
+ mov %rdx, %r9
+ adc $0, %r9
+ lea -24(rp), rp
+ lea -24(up), up
+ inc n
+ jmp L(L2)
+
+ ALIGN(32)
+L(top): mov %r10, 32(rp)
+ adc %rbx, %r14 C 10
+ lea 32(rp), rp
+L(L1): mov 0(up), %rax
+ adc $0, R32(%rcx)
+ mul v1
+ mov $0, R32(%rbx)
+ mov 8(rp), %r8
+ add %rax, %r8
+ mov %rdx, %r9
+ mov 8(up), %rax
+ adc $0, %r9
+ mul v0
+ add %rax, %r8
+ adc %rdx, %r9
+ adc $0, R32(%rbx)
+ add %r14, %r8 C 0 12
+ adc %rcx, %r9 C 1
+L(L0): mov 8(up), %rax
+ adc $0, R32(%rbx)
+ mov 16(rp), %r10
+ mul v1
+ add %rax, %r10
+ mov %rdx, %r14
+ mov 16(up), %rax
+ mov $0, R32(%rcx)
+ adc $0, %r14
+ mul v0
+ add %rax, %r10
+ adc %rdx, %r14
+ adc $0, R32(%rcx)
+ mov %r8, 8(rp)
+L(L3): mov 24(rp), %r8
+ mov 16(up), %rax
+ mul v1
+ add %r9, %r10 C 3
+ adc %rbx, %r14 C 4
+ adc $0, R32(%rcx)
+ add %rax, %r8
+ mov %rdx, %r9
+ adc $0, %r9
+ mov 24(up), %rax
+ mul v0
+ add %rax, %r8
+ mov $0, R32(%rbx)
+ adc %rdx, %r9
+ adc $0, R32(%rbx)
+ add %r14, %r8 C 6
+ adc %rcx, %r9 C 7
+ mov %r10, 16(rp)
+L(L2): mov 24(up), %rax
+ adc $0, R32(%rbx)
+ mov 32(rp), %r10
+ mul v1
+ add %rax, %r10
+ mov 32(up), %rax
+ lea 32(up), up
+ mov %rdx, %r14
+ adc $0, %r14
+ mov %r8, 24(rp)
+ mov $0, R32(%rcx)
+ mul v0
+ add %rax, %r10
+ adc %rdx, %r14
+ adc $0, R32(%rcx)
+ add %r9, %r10 C 9
+ dec n
+ jnz L(top)
+
+ mov %r10, 32(rp)
+ adc %rbx, %r14
+ mov 0(up), %rax
+ adc $0, R32(%rcx)
+ mul v1
+ mov %rax, %r8
+ mov %rdx, %rax
+ add %r14, %r8
+ adc %rcx, %rax
+ mov %r8, 40(rp)
+
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbx
+ FUNC_EXIT()
+ ret
+EPILOGUE()
+ASM_END()
--- /dev/null
+dnl AMD64 mpn_addlsh1_n -- rp[] = up[] + (vp[] << 1)
+dnl AMD64 mpn_rsblsh1_n -- rp[] = (vp[] << 1) - up[]
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 1)
+define(RSH, 63)
+
+ifdef(`OPERATION_addlsh1_n', `
+ define(ADDSUB, add)
+ define(ADCSBB, adc)
+ define(func_n, mpn_addlsh1_n)
+ define(func_nc, mpn_addlsh1_nc)')
+ifdef(`OPERATION_rsblsh1_n', `
+ define(ADDSUB, sub)
+ define(ADCSBB, sbb)
+ define(func_n, mpn_rsblsh1_n)
+ define(func_nc, mpn_rsblsh1_nc)')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_addlsh1_nc mpn_rsblsh1_n mpn_rsblsh1_nc)
+include_mpn(`x86_64/coreisbr/aorrlshC_n.asm')
--- /dev/null
+dnl AMD64 mpn_addlsh2_n -- rp[] = up[] + (vp[] << 1)
+dnl AMD64 mpn_rsblsh2_n -- rp[] = (vp[] << 1) - up[]
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 2)
+define(RSH, 62)
+
+ifdef(`OPERATION_addlsh2_n', `
+ define(ADDSUB, add)
+ define(ADCSBB, adc)
+ define(func_n, mpn_addlsh2_n)
+ define(func_nc, mpn_addlsh2_nc)')
+ifdef(`OPERATION_rsblsh2_n', `
+ define(ADDSUB, sub)
+ define(ADCSBB, sbb)
+ define(func_n, mpn_rsblsh2_n)
+ define(func_nc, mpn_rsblsh2_nc)')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+C mpn_rsblsh2_nc removed below, its idea of carry-in is inconsistent with
+C refmpn_rsblsh2_nc
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_addlsh2_nc mpn_rsblsh2_n)
+include_mpn(`x86_64/coreisbr/aorrlshC_n.asm')
--- /dev/null
+dnl AMD64 mpn_addlshC_n -- rp[] = up[] + (vp[] << C)
+dnl AMD64 mpn_rsblshC_n -- rp[] = (vp[] << C) - up[]
+
+dnl Copyright 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+
+C cycles/limb
+C AMD K8,K9 ?
+C AMD K10 ?
+C Intel P4 ?
+C Intel core2 3.25
+C Intel NHM 4
+C Intel SBR 2 C (or 1.95 when L(top)'s alignment = 16 (mod 32))
+C Intel atom ?
+C VIA nano ?
+
+C This code probably runs close to optimally on Sandy Bridge (using 4-way
+C unrolling). It also runs reasonably well on Core 2, but it runs poorly on
+C all other processors, including Nehalem.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`vp', `%rdx')
+define(`n', `%rcx')
+define(`cy', `%r8')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(func_nc)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8 ')
+ push %rbp
+ mov cy, %rax
+ neg %rax C set msb on carry
+ xor R32(%rbp), R32(%rbp) C limb carry
+ mov (vp), %r8
+ shrd $RSH, %r8, %rbp
+ mov R32(n), R32(%r9)
+ and $3, R32(%r9)
+ je L(b00)
+ cmp $2, R32(%r9)
+ jc L(b01)
+ je L(b10)
+ jmp L(b11)
+EPILOGUE()
+
+ ALIGN(16)
+PROLOGUE(func_n)
+ FUNC_ENTRY(4)
+ push %rbp
+ xor R32(%rbp), R32(%rbp) C limb carry
+ mov (vp), %r8
+ shrd $RSH, %r8, %rbp
+ mov R32(n), R32(%rax)
+ and $3, R32(%rax)
+ je L(b00)
+ cmp $2, R32(%rax)
+ jc L(b01)
+ je L(b10)
+
+L(b11): mov 8(vp), %r9
+ shrd $RSH, %r9, %r8
+ mov 16(vp), %r10
+ shrd $RSH, %r10, %r9
+ add R32(%rax), R32(%rax) C init carry flag
+ ADCSBB (up), %rbp
+ ADCSBB 8(up), %r8
+ ADCSBB 16(up), %r9
+ mov %rbp, (rp)
+ mov %r8, 8(rp)
+ mov %r9, 16(rp)
+ mov %r10, %rbp
+ lea 24(up), up
+ lea 24(vp), vp
+ lea 24(rp), rp
+ sbb R32(%rax), R32(%rax) C save carry flag
+ sub $3, n
+ ja L(top)
+ jmp L(end)
+
+L(b01): add R32(%rax), R32(%rax) C init carry flag
+ ADCSBB (up), %rbp
+ mov %rbp, (rp)
+ mov %r8, %rbp
+ lea 8(up), up
+ lea 8(vp), vp
+ lea 8(rp), rp
+ sbb R32(%rax), R32(%rax) C save carry flag
+ sub $1, n
+ ja L(top)
+ jmp L(end)
+
+L(b10): mov 8(vp), %r9
+ shrd $RSH, %r9, %r8
+ add R32(%rax), R32(%rax) C init carry flag
+ ADCSBB (up), %rbp
+ ADCSBB 8(up), %r8
+ mov %rbp, (rp)
+ mov %r8, 8(rp)
+ mov %r9, %rbp
+ lea 16(up), up
+ lea 16(vp), vp
+ lea 16(rp), rp
+ sbb R32(%rax), R32(%rax) C save carry flag
+ sub $2, n
+ ja L(top)
+ jmp L(end)
+
+ ALIGN(16)
+L(top): mov (vp), %r8
+ shrd $RSH, %r8, %rbp
+L(b00): mov 8(vp), %r9
+ shrd $RSH, %r9, %r8
+ mov 16(vp), %r10
+ shrd $RSH, %r10, %r9
+ mov 24(vp), %r11
+ shrd $RSH, %r11, %r10
+ lea 32(vp), vp
+ add R32(%rax), R32(%rax) C restore carry flag
+ ADCSBB (up), %rbp
+ ADCSBB 8(up), %r8
+ ADCSBB 16(up), %r9
+ ADCSBB 24(up), %r10
+ lea 32(up), up
+ mov %rbp, (rp)
+ mov %r8, 8(rp)
+ mov %r9, 16(rp)
+ mov %r10, 24(rp)
+ mov %r11, %rbp
+ lea 32(rp), rp
+ sbb R32(%rax), R32(%rax) C save carry flag
+ sub $4, n
+ jnz L(top)
+
+L(end): shr $RSH, %rbp
+ add R32(%rax), R32(%rax) C restore carry flag
+ ADCSBB $0, %rbp
+ mov %rbp, %rax
+ pop %rbp
+ FUNC_EXIT()
+ ret
+EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_addlsh_n -- rp[] = up[] + (vp[] << k)
+dnl AMD64 mpn_rsblsh_n -- rp[] = (vp[] << k) - up[]
+dnl Optimised for Sandy Bridge.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 ?
+C AMD K10 5.25
+C Intel P4 ?
+C Intel core2 3.1
+C Intel NHM 3.95
+C Intel SBR 2.75
+C Intel atom ?
+C VIA nano ?
+
+C The inner-loop probably runs close to optimally on Sandy Bridge (using 4-way
+C unrolling). The rest of the code is quite crude, and could perhaps be made
+C both smaller and faster.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`vp', `%rdx')
+define(`n', `%rcx')
+define(`cnt', `%r8')
+define(`cy', `%r9') C for _nc variant
+
+ifdef(`OPERATION_addlsh_n', `
+ define(ADDSUB, add)
+ define(ADCSBB, adc)
+ define(IFRSB, )
+ define(func_n, mpn_addlsh_n)
+ define(func_nc, mpn_addlsh_nc)')
+ifdef(`OPERATION_rsblsh_n', `
+ define(ADDSUB, sub)
+ define(ADCSBB, sbb)
+ define(IFRSB, `$1')
+ define(func_n, mpn_rsblsh_n)
+ define(func_nc, mpn_rsblsh_nc)')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+C mpn_rsblsh_nc removed below, its idea of carry-in is inconsistent with
+C refmpn_rsblsh_nc
+MULFUNC_PROLOGUE(mpn_addlsh_n mpn_addlsh_nc mpn_rsblsh_n)
+
+ASM_START()
+ TEXT
+ ALIGN(32)
+PROLOGUE(func_n)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8d ') C cnt
+ push %rbx
+ xor R32(%rbx), R32(%rbx) C clear CF save register
+L(ent): push %rbp
+ mov R32(n), R32(%rbp)
+ mov n, %rax
+ mov R32(cnt), R32(%rcx)
+ neg R32(%rcx)
+ and $3, R32(%rbp)
+ jz L(b0)
+ lea -32(vp,%rbp,8), vp
+ lea -32(up,%rbp,8), up
+ lea -32(rp,%rbp,8), rp
+ cmp $2, R32(%rbp)
+ jc L(b1)
+ jz L(b2)
+
+L(b3): xor %r8, %r8
+ mov 8(vp), %r9
+ mov 16(vp), %r10
+ shrd R8(%rcx), %r9, %r8
+ shrd R8(%rcx), %r10, %r9
+ mov 24(vp), %r11
+ shrd R8(%rcx), %r11, %r10
+ sub $3, %rax
+ jz L(3)
+ add R32(%rbx), R32(%rbx)
+ lea 32(vp), vp
+ ADCSBB 8(up), %r8
+ ADCSBB 16(up), %r9
+ ADCSBB 24(up), %r10
+ lea 32(up), up
+ jmp L(lo3)
+L(3): add R32(%rbx), R32(%rbx)
+ lea 32(vp), vp
+ ADCSBB 8(up), %r8
+ ADCSBB 16(up), %r9
+ ADCSBB 24(up), %r10
+ jmp L(wd3)
+
+L(b0): mov (vp), %r8
+ mov 8(vp), %r9
+ xor R32(%rbp), R32(%rbp)
+ jmp L(lo0)
+
+L(b1): xor %r10, %r10
+ mov 24(vp), %r11
+ shrd R8(%rcx), %r11, %r10
+ sub $1, %rax
+ jz L(1)
+ add R32(%rbx), R32(%rbx)
+ lea 32(vp), vp
+ ADCSBB 24(up), %r10
+ lea 32(up), up
+ mov (vp), %r8
+ jmp L(lo1)
+L(1): add R32(%rbx), R32(%rbx)
+ ADCSBB 24(up), %r10
+ jmp L(wd1)
+
+L(b2): xor %r9, %r9
+ mov 16(vp), %r10
+ shrd R8(%rcx), %r10, %r9
+ mov 24(vp), %r11
+ shrd R8(%rcx), %r11, %r10
+ sub $2, %rax
+ jz L(2)
+ add R32(%rbx), R32(%rbx)
+ lea 32(vp), vp
+ ADCSBB 16(up), %r9
+ ADCSBB 24(up), %r10
+ lea 32(up), up
+ jmp L(lo2)
+L(2): add R32(%rbx), R32(%rbx)
+ ADCSBB 16(up), %r9
+ ADCSBB 24(up), %r10
+ jmp L(wd2)
+
+ ALIGN(32) C 16-byte alignment is not enough!
+L(top): shrd R8(%rcx), %r11, %r10
+ add R32(%rbx), R32(%rbx)
+ lea 32(vp), vp
+ ADCSBB (up), %rbp
+ ADCSBB 8(up), %r8
+ ADCSBB 16(up), %r9
+ ADCSBB 24(up), %r10
+ mov %rbp, (rp)
+ lea 32(up), up
+L(lo3): mov %r8, 8(rp)
+L(lo2): mov %r9, 16(rp)
+ mov (vp), %r8
+L(lo1): mov %r10, 24(rp)
+ mov 8(vp), %r9
+ mov %r11, %rbp
+ lea 32(rp), rp
+ sbb R32(%rbx), R32(%rbx)
+L(lo0): shrd R8(%rcx), %r8, %rbp
+ mov 16(vp), %r10
+ shrd R8(%rcx), %r9, %r8
+ shrd R8(%rcx), %r10, %r9
+ mov 24(vp), %r11
+ sub $4, %rax
+ jg L(top)
+
+ shrd R8(%rcx), %r11, %r10
+ add R32(%rbx), R32(%rbx)
+ ADCSBB (up), %rbp
+ ADCSBB 8(up), %r8
+ ADCSBB 16(up), %r9
+ ADCSBB 24(up), %r10
+ mov %rbp, (rp)
+L(wd3): mov %r8, 8(rp)
+L(wd2): mov %r9, 16(rp)
+L(wd1): mov %r10, 24(rp)
+ adc R32(%rax), R32(%rax) C rax is zero after loop
+ shr R8(%rcx), %r11
+ ADDSUB %r11, %rax
+IFRSB( neg %rax)
+ pop %rbp
+ pop %rbx
+ FUNC_EXIT()
+ ret
+EPILOGUE()
+PROLOGUE(func_nc)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8d ') C cnt
+IFDOS(` mov 64(%rsp), %r9 ') C cy
+ push %rbx
+ neg cy
+ sbb R32(%rbx), R32(%rbx) C initialise CF save register
+ jmp L(ent)
+EPILOGUE()
--- /dev/null
+dnl X86-64 mpn_add_n, mpn_sub_n, optimized for Intel Sandy Bridge.
+
+dnl Copyright 2003, 2004, 2005, 2007, 2008, 2010, 2011, 2012 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb
+C AMD K8,K9 1.85
+C AMD K10 ?
+C Intel P4 ?
+C Intel core2 5
+C Intel NHM 5.5
+C Intel SBR 1.61
+C Intel atom 3
+C VIA nano 3
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`vp', `%rdx')
+define(`n', `%rcx')
+define(`cy', `%r8') C (only for mpn_add_nc and mpn_sub_nc)
+
+ifdef(`OPERATION_add_n', `
+ define(ADCSBB, adc)
+ define(func, mpn_add_n)
+ define(func_nc, mpn_add_nc)')
+ifdef(`OPERATION_sub_n', `
+ define(ADCSBB, sbb)
+ define(func, mpn_sub_n)
+ define(func_nc, mpn_sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(func)
+ FUNC_ENTRY(4)
+ xor %r8, %r8
+L(ent): mov R32(n), R32(%rax)
+ shr $2, n
+ and $3, R32(%rax)
+ jz L(b0)
+ cmp $2, R32(%rax)
+ jz L(b2)
+ jg L(b3)
+
+L(b1): mov (up), %r10
+ test n, n
+ jnz L(gt1)
+ neg R32(%r8) C set CF from argument
+ ADCSBB (vp), %r10
+ mov %r10, (rp)
+ mov R32(n), R32(%rax) C zero rax
+ adc R32(%rax), R32(%rax)
+ FUNC_EXIT()
+ ret
+L(gt1): neg R32(%r8)
+ ADCSBB (vp), %r10
+ mov 8(up), %r11
+ lea 16(up), up
+ lea -16(vp), vp
+ lea -16(rp), rp
+ jmp L(m1)
+
+L(b3): mov (up), %rax
+ mov 8(up), %r9
+ mov 16(up), %r10
+ test n, n
+ jnz L(gt3)
+ neg R32(%r8)
+ lea -32(rp), rp
+ jmp L(e3)
+L(gt3): neg R32(%r8)
+ ADCSBB (vp), %rax
+ jmp L(m3)
+
+ nop C alignment
+ nop C alignment
+L(b0): mov (up), %r11
+ neg R32(%r8)
+ lea -24(vp), vp
+ lea -24(rp), rp
+ lea 8(up), up
+ jmp L(m0)
+
+L(b2): mov (up), %r9
+ mov 8(up), %r10
+ lea -8(vp), vp
+ test n, n
+ jnz L(gt2)
+ neg R32(%r8)
+ lea -40(rp), rp
+ jmp L(e2)
+L(gt2): neg R32(%r8)
+ lea -8(up), up
+ lea -8(rp), rp
+ jmp L(m2)
+
+ ALIGN(8)
+L(top): mov %r11, 24(rp)
+ ADCSBB (vp), %rax
+ lea 32(rp), rp
+L(m3): mov %rax, (rp)
+L(m2): ADCSBB 8(vp), %r9
+ mov 24(up), %r11
+ mov %r9, 8(rp)
+ ADCSBB 16(vp), %r10
+ lea 32(up), up
+L(m1): mov %r10, 16(rp)
+L(m0): ADCSBB 24(vp), %r11
+ mov (up), %rax
+ mov 8(up), %r9
+ lea 32(vp), vp
+ dec n
+ mov 16(up), %r10
+ jnz L(top)
+
+ mov %r11, 24(rp)
+L(e3): ADCSBB (vp), %rax
+ mov %rax, 32(rp)
+L(e2): ADCSBB 8(vp), %r9
+ mov %r9, 40(rp)
+L(e1): ADCSBB 16(vp), %r10
+ mov %r10, 48(rp)
+ mov R32(n), R32(%rax) C zero rax
+ adc R32(%rax), R32(%rax)
+ FUNC_EXIT()
+ ret
+EPILOGUE()
+PROLOGUE(func_nc)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8 ')
+ jmp L(ent)
+EPILOGUE()
--- /dev/null
+dnl X86-64 mpn_addmul_1 and mpn_submul_1 optimised for Intel Sandy Bridge.
+
+dnl Copyright 2003, 2004, 2005, 2007, 2008, 2011, 2012 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 4.77
+C AMD K10 4.77
+C AMD bd1 ?
+C AMD bobcat 5.78
+C Intel P4 15-17
+C Intel core2 5.4
+C Intel NHM 5.23
+C Intel SBR 3.25
+C Intel atom ?
+C VIA nano 5.5
+
+C The loop of this code is the result of running a code generation and
+C optimisation tool suite written by David Harvey and Torbjorn Granlund.
+
+C TODO
+C * The loop is great, but the prologue code was quickly written. Tune it!
+
+define(`rp', `%rdi') C rcx
+define(`up', `%rsi') C rdx
+define(`n_param', `%rdx') C r8
+define(`v0', `%rcx') C r9
+
+define(`n', `%rbx')
+
+ifdef(`OPERATION_addmul_1',`
+ define(`ADDSUB', `add')
+ define(`func', `mpn_addmul_1')
+')
+ifdef(`OPERATION_submul_1',`
+ define(`ADDSUB', `sub')
+ define(`func', `mpn_submul_1')
+')
+
+dnl Disable until tested ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1)
+
+IFDOS(` define(`up', ``%rsi'') ') dnl
+IFDOS(` define(`rp', ``%rcx'') ') dnl
+IFDOS(` define(`v0', ``%r9'') ') dnl
+IFDOS(` define(`r9', ``rdi'') ') dnl
+IFDOS(` define(`n', ``%r8'') ') dnl
+IFDOS(` define(`r8', ``r11'') ') dnl
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(func)
+
+IFDOS(``push %rsi '')
+IFDOS(``push %rdi '')
+IFDOS(``mov %rdx, %rsi '')
+
+ mov (up), %rax
+ push %rbx
+IFSTD(` mov R32(n_param), R32(%rdx) ')
+IFDOS(` mov n, %rdx ')
+IFSTD(` mov R32(n_param), R32(n) ')
+
+ lea -8(up,n,8), up
+ and $3, R32(%rdx)
+ jz L(b0)
+ cmp $2, R32(%rdx)
+ jz L(b2)
+ jnc L(b3)
+
+L(b1): mov (rp), %r8
+ lea -8(rp,n,8), rp
+ neg n
+ mov $0, R32(%r11)
+ add $4, n
+ jc L(end)
+ jmp L(top)
+
+L(b2): mov (rp), %r10
+ lea -8(rp,n,8), rp
+ neg n
+ add $1, n
+ mul v0
+ ADDSUB %rax, %r10
+ mov 8(up,n,8), %rax
+ mov %rdx, %r11
+ mov $0, R32(%r9)
+ jmp L(L2)
+
+L(b3): mov (rp), %r8
+ lea -8(rp,n,8), rp
+ neg n
+ add $2, n
+ mul v0
+ mov %rdx, %r9
+ mov $0, R32(%r11)
+ jmp L(L3)
+
+L(b0): mov (rp), %r10
+ lea -8(rp,n,8), rp
+ neg n
+ add $3, n
+ mul v0
+ ADDSUB %rax, %r10
+ mov %rdx, %r11
+ mov -8(up,n,8), %rax
+ adc $0, %r11
+ mov $0, R32(%r9)
+ jmp L(L0)
+
+ ALIGN(16)
+L(top): mul v0
+ ADDSUB %rax, %r8
+ mov %rdx, %r9
+ adc $0, %r9
+ mov -16(up,n,8), %rax
+ ADDSUB %r11, %r8
+ mov -16(rp,n,8), %r10
+ adc $0, %r9
+ mul v0
+ ADDSUB %rax, %r10
+ mov %rdx, %r11
+ mov -8(up,n,8), %rax
+ adc $0, %r11
+ mov %r8, -24(rp,n,8)
+L(L0): mul v0
+ ADDSUB %r9, %r10
+ mov -8(rp,n,8), %r8
+ adc $0, %r11
+ mov %rdx, %r9
+ mov %r10, -16(rp,n,8)
+L(L3): ADDSUB %rax, %r8
+ adc $0, %r9
+ mov (up,n,8), %rax
+ ADDSUB %r11, %r8
+ adc $0, %r9
+ mov (rp,n,8), %r10
+ mul v0
+ ADDSUB %rax, %r10
+ mov 8(up,n,8), %rax
+ mov %rdx, %r11
+ mov %r8, -8(rp,n,8)
+L(L2): adc $0, %r11
+ mov 8(rp,n,8), %r8
+ ADDSUB %r9, %r10
+ adc $0, %r11
+ mov %r10, (rp,n,8)
+ add $4, n
+ jnc L(top)
+
+L(end): mul v0
+ ADDSUB %rax, %r8
+ mov %rdx, %rax
+ adc $0, %rax
+ ADDSUB %r11, %r8
+ adc $0, %rax
+ mov %r8, (rp)
+
+ pop %rbx
+IFDOS(``pop %rdi '')
+IFDOS(``pop %rsi '')
+ ret
+EPILOGUE()
/* Sandy Bridge gmp-mparam.h -- Compiler/machine parameter header file.
Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
-2008, 2009, 2010, 2011 Free Software Foundation, Inc.
+2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 6
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 6
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 0 /* never mpn_mod_1_1p */
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 22
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 15
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 9
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 20
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 10
#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 34
+#define BMOD_1_TO_MOD_1_THRESHOLD 30
-#define MUL_TOOM22_THRESHOLD 20
+#define MUL_TOOM22_THRESHOLD 18
#define MUL_TOOM33_THRESHOLD 57
-#define MUL_TOOM44_THRESHOLD 166
-#define MUL_TOOM6H_THRESHOLD 387
-#define MUL_TOOM8H_THRESHOLD 527
+#define MUL_TOOM44_THRESHOLD 154
+#define MUL_TOOM6H_THRESHOLD 226
+#define MUL_TOOM8H_THRESHOLD 333
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 105
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 114
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 113
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 97
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 108
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 105
#define MUL_TOOM42_TO_TOOM63_THRESHOLD 114
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 138
#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 30
-#define SQR_TOOM3_THRESHOLD 93
-#define SQR_TOOM4_THRESHOLD 278
-#define SQR_TOOM6_THRESHOLD 369
-#define SQR_TOOM8_THRESHOLD 557
+#define SQR_TOOM2_THRESHOLD 26
+#define SQR_TOOM3_THRESHOLD 81
+#define SQR_TOOM4_THRESHOLD 250
+#define SQR_TOOM6_THRESHOLD 345
+#define SQR_TOOM8_THRESHOLD 381
-#define MULMOD_BNM1_THRESHOLD 13
-#define SQRMOD_BNM1_THRESHOLD 18
+#define MULMID_TOOM42_THRESHOLD 24
-#define MUL_FFT_MODF_THRESHOLD 376 /* k = 5 */
+#define MULMOD_BNM1_THRESHOLD 14
+#define SQRMOD_BNM1_THRESHOLD 14
+
+#define POWM_SEC_TABLE 4,35,516,1036,1222
+
+#define MUL_FFT_MODF_THRESHOLD 380 /* k = 5 */
#define MUL_FFT_TABLE3 \
- { { 376, 5}, { 17, 6}, { 9, 5}, { 21, 6}, \
+ { { 380, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
{ 11, 5}, { 23, 6}, { 21, 7}, { 11, 6}, \
- { 23, 7}, { 12, 6}, { 25, 7}, { 13, 6}, \
- { 27, 7}, { 21, 8}, { 11, 7}, { 25, 8}, \
+ { 23, 7}, { 21, 8}, { 11, 7}, { 24, 8}, \
{ 13, 7}, { 27, 8}, { 15, 7}, { 31, 8}, \
{ 17, 7}, { 35, 8}, { 19, 7}, { 39, 8}, \
{ 21, 9}, { 11, 8}, { 27, 9}, { 15, 8}, \
{ 35, 9}, { 19, 8}, { 41, 9}, { 23, 8}, \
- { 47, 9}, { 27,10}, { 15, 9}, { 39,10}, \
+ { 49, 9}, { 27,10}, { 15, 9}, { 39,10}, \
{ 23, 9}, { 51,11}, { 15,10}, { 31, 9}, \
- { 67,10}, { 39, 9}, { 79,10}, { 47, 9}, \
+ { 67,10}, { 39, 9}, { 83,10}, { 47, 9}, \
{ 95,10}, { 55,11}, { 31,10}, { 79,11}, \
{ 47,10}, { 95,12}, { 31,11}, { 63,10}, \
- { 135,11}, { 79,10}, { 159,11}, { 95,10}, \
- { 191, 8}, { 767,12}, { 63,11}, { 127,10}, \
- { 255, 9}, { 511,11}, { 143,10}, { 287,11}, \
- { 159, 9}, { 639,12}, { 95,11}, { 191,13}, \
- { 63,12}, { 127,10}, { 511,11}, { 271,10}, \
- { 543, 9}, { 1087,10}, { 607,12}, { 159,11}, \
- { 319,10}, { 639,11}, { 335,10}, { 671,11}, \
- { 351,10}, { 703, 9}, { 1407,10}, { 735,12}, \
- { 191,11}, { 415,10}, { 831,12}, { 223,11}, \
- { 447,13}, { 127,12}, { 255,11}, { 543,12}, \
- { 287,11}, { 607,12}, { 319,11}, { 639,12}, \
- { 351,11}, { 703,13}, { 191,12}, { 383,11}, \
- { 767,12}, { 415,11}, { 831,12}, { 447,11}, \
- { 895,12}, { 479,14}, { 127,13}, { 255,12}, \
- { 543,11}, { 1087,12}, { 607,13}, { 319,12}, \
- { 735,13}, { 383,12}, { 831,11}, { 1663,13}, \
- { 447,12}, { 959,11}, { 1919,13}, { 511,12}, \
- { 1087,11}, { 2175,13}, { 575,12}, { 1215,11}, \
+ { 135,11}, { 79,10}, { 159, 9}, { 319,10}, \
+ { 167,11}, { 95,10}, { 191, 9}, { 383,12}, \
+ { 63,11}, { 127,10}, { 255, 9}, { 511,10}, \
+ { 271,11}, { 143,10}, { 287, 9}, { 575,10}, \
+ { 303,11}, { 159,10}, { 319,12}, { 95,11}, \
+ { 191,10}, { 383,11}, { 207,10}, { 415,13}, \
+ { 63,12}, { 127,11}, { 255,10}, { 511,11}, \
+ { 271,10}, { 543,11}, { 287,10}, { 575,11}, \
+ { 303,10}, { 607,12}, { 159,11}, { 319,10}, \
+ { 639,11}, { 351,10}, { 703, 9}, { 1407,11}, \
+ { 367,12}, { 191,11}, { 383,10}, { 767,11}, \
+ { 415,10}, { 831,12}, { 223,11}, { 447,10}, \
+ { 895,13}, { 127,12}, { 255,11}, { 543,10}, \
+ { 1087,12}, { 287,11}, { 575,10}, { 1151,11}, \
+ { 607,12}, { 319,11}, { 639,12}, { 351,11}, \
+ { 703,10}, { 1407,11}, { 735,13}, { 191,12}, \
+ { 383,11}, { 767,12}, { 415,11}, { 831,10}, \
+ { 1663,12}, { 447,11}, { 895,14}, { 127,13}, \
+ { 255,12}, { 511,11}, { 1023,12}, { 543,11}, \
+ { 1087,12}, { 575,11}, { 1151,12}, { 607,11}, \
+ { 1215,13}, { 319,12}, { 639,11}, { 1279,12}, \
+ { 703,11}, { 1407,13}, { 383,12}, { 767,11}, \
+ { 1535,12}, { 831,11}, { 1663,13}, { 447,12}, \
+ { 959,11}, { 1919,14}, { 255,13}, { 511,12}, \
+ { 1087,13}, { 575,12}, { 1215,11}, { 2431,13}, \
+ { 639,12}, { 1279,13}, { 703,12}, { 1407,14}, \
+ { 383,13}, { 831,12}, { 1663,13}, { 959,12}, \
+ { 1919,14}, { 511,13}, { 1087,12}, { 2175,13}, \
+ { 1215,12}, { 2431,14}, { 639,13}, { 1343,12}, \
+ { 2687,13}, { 1407,12}, { 2815,13}, { 1471,14}, \
+ { 767,13}, { 1663,14}, { 895,13}, { 1919,15}, \
+ { 511,14}, { 1023,13}, { 2175,14}, { 1151,13}, \
+ { 2431,12}, { 4863,14}, { 1279,13}, { 2687,14}, \
+ { 1407,13}, { 2815,15}, { 767,14}, { 1663,13}, \
+ { 3455,14}, { 1919,13}, { 3839,16}, { 511,15}, \
+ { 1023,14}, { 2431,13}, { 4863,15}, { 1279,14}, \
+ { 2943,13}, { 5887,15}, { 32768,16}, { 65536,17}, \
+ { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+ {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 203
+#define MUL_FFT_THRESHOLD 4736
+
+#define SQR_FFT_MODF_THRESHOLD 304 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 304, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
+ { 21, 7}, { 11, 6}, { 23, 7}, { 21, 8}, \
+ { 11, 7}, { 24, 8}, { 13, 7}, { 27, 8}, \
+ { 15, 7}, { 31, 8}, { 21, 9}, { 11, 8}, \
+ { 27, 9}, { 15, 8}, { 33, 9}, { 19, 8}, \
+ { 41, 9}, { 23, 8}, { 47, 9}, { 27,10}, \
+ { 15, 9}, { 39,10}, { 23, 9}, { 47,11}, \
+ { 15,10}, { 31, 9}, { 63,10}, { 39, 9}, \
+ { 79,10}, { 47,11}, { 31,10}, { 79,11}, \
+ { 47,12}, { 31,11}, { 63,10}, { 127, 9}, \
+ { 255, 8}, { 511,10}, { 135,11}, { 79,10}, \
+ { 159, 9}, { 319,11}, { 95,10}, { 191, 9}, \
+ { 383,12}, { 63,11}, { 127,10}, { 255, 9}, \
+ { 511,10}, { 271, 9}, { 543,11}, { 143,10}, \
+ { 287, 9}, { 575,11}, { 159,10}, { 319, 9}, \
+ { 639,12}, { 95,11}, { 191,10}, { 383, 9}, \
+ { 767,11}, { 207,13}, { 63,12}, { 127,11}, \
+ { 255,10}, { 511,11}, { 271,10}, { 543,11}, \
+ { 287,10}, { 575,11}, { 303,12}, { 159,11}, \
+ { 319,10}, { 639,11}, { 351,10}, { 703,12}, \
+ { 191,11}, { 383,10}, { 767,11}, { 415,10}, \
+ { 831,12}, { 223,11}, { 447,10}, { 895,11}, \
+ { 479,10}, { 959,13}, { 127,12}, { 255,11}, \
+ { 511,10}, { 1023,11}, { 543,12}, { 287,11}, \
+ { 575,10}, { 1151,11}, { 607,12}, { 319,11}, \
+ { 639,10}, { 1279,12}, { 351,11}, { 703,13}, \
+ { 191,12}, { 383,11}, { 767,12}, { 415,11}, \
+ { 831,12}, { 447,11}, { 895,12}, { 479,11}, \
+ { 959,10}, { 1919,14}, { 127,13}, { 255,12}, \
+ { 511,11}, { 1023,12}, { 543,11}, { 1087,12}, \
+ { 575,11}, { 1151,12}, { 607,13}, { 319,12}, \
+ { 639,11}, { 1279,12}, { 703,11}, { 1407,13}, \
+ { 383,12}, { 767,11}, { 1535,12}, { 831,13}, \
+ { 447,12}, { 959,11}, { 1919,14}, { 255,13}, \
+ { 511,12}, { 1087,13}, { 575,12}, { 1215,11}, \
{ 2431,13}, { 639,12}, { 1279,13}, { 703,12}, \
{ 1407,14}, { 383,13}, { 767,12}, { 1535,13}, \
- { 831,12}, { 1727,13}, { 959,12}, { 1919,14}, \
- { 511,13}, { 1087,12}, { 2175,13}, { 1215,12}, \
- { 2431,14}, { 639,13}, { 1343,12}, { 2687,13}, \
- { 1471,12}, { 2943,14}, { 767,13}, { 1663,14}, \
- { 895,13}, { 1919,15}, { 511,14}, { 1023,13}, \
- { 2175,14}, { 1151,13}, { 2431,12}, { 4863,14}, \
- { 1279,13}, { 2687,14}, { 1407,13}, { 2943,15}, \
- { 767,14}, { 1535,13}, { 3199,14}, { 1663,13}, \
+ { 831,12}, { 1663,13}, { 959,12}, { 1919,15}, \
+ { 255,14}, { 511,13}, { 1087,12}, { 2175,13}, \
+ { 1215,12}, { 2431,14}, { 639,13}, { 1343,12}, \
+ { 2687,13}, { 1407,12}, { 2815,13}, { 1471,14}, \
+ { 767,13}, { 1663,14}, { 895,13}, { 1919,15}, \
+ { 511,14}, { 1023,13}, { 2175,14}, { 1151,13}, \
+ { 2431,12}, { 4863,14}, { 1279,13}, { 2687,14}, \
+ { 1407,13}, { 2815,15}, { 767,14}, { 1663,13}, \
{ 3455,14}, { 1919,16}, { 511,15}, { 1023,14}, \
{ 2431,13}, { 4863,15}, { 1279,14}, { 2943,13}, \
- { 5887,15}, { 1535,14}, { 16384,15}, { 32768,16}, \
- { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
- {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 184
-#define MUL_FFT_THRESHOLD 3712
+ { 5887,15}, { 32768,16}, { 65536,17}, { 131072,18}, \
+ { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
+ {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 198
+#define SQR_FFT_THRESHOLD 2752
-#define SQR_FFT_MODF_THRESHOLD 336 /* k = 5 */
-#define SQR_FFT_TABLE3 \
- { { 336, 5}, { 19, 6}, { 10, 5}, { 21, 6}, \
- { 21, 7}, { 11, 6}, { 23, 7}, { 12, 6}, \
- { 25, 7}, { 25, 8}, { 13, 7}, { 27, 8}, \
- { 15, 7}, { 31, 8}, { 17, 7}, { 35, 8}, \
- { 21, 9}, { 11, 8}, { 27, 9}, { 15, 8}, \
- { 35, 9}, { 19, 8}, { 41, 9}, { 23, 8}, \
- { 47, 9}, { 27,10}, { 15, 9}, { 39,10}, \
- { 23, 9}, { 51,11}, { 15,10}, { 31, 9}, \
- { 63,10}, { 39, 9}, { 79,10}, { 47,11}, \
- { 31,10}, { 79,11}, { 47,10}, { 95,12}, \
- { 31,11}, { 63,10}, { 127, 9}, { 255,10}, \
- { 135,11}, { 79, 8}, { 639,11}, { 95,10}, \
- { 191, 9}, { 383,12}, { 63, 9}, { 511,10}, \
- { 271,11}, { 143,10}, { 287, 9}, { 575,11}, \
- { 159,10}, { 319,12}, { 95,11}, { 191,10}, \
- { 383,11}, { 207,10}, { 415,13}, { 63,12}, \
- { 127,11}, { 255,10}, { 575,11}, { 303,10}, \
- { 639,11}, { 351,10}, { 703,12}, { 191,11}, \
- { 383,10}, { 767,11}, { 415,10}, { 831,12}, \
- { 223,11}, { 447,10}, { 959,13}, { 127,11}, \
- { 511,10}, { 1023,11}, { 607,10}, { 1215,12}, \
- { 319,11}, { 671,12}, { 351,11}, { 703,13}, \
- { 191,12}, { 383,11}, { 767,12}, { 415,11}, \
- { 831,12}, { 447,11}, { 895,12}, { 479,11}, \
- { 959,14}, { 127,13}, { 255,12}, { 543,11}, \
- { 1087,12}, { 575,11}, { 1151,12}, { 607,13}, \
- { 319,12}, { 671,11}, { 1343,12}, { 703,13}, \
- { 383,12}, { 831,13}, { 447,12}, { 959,11}, \
- { 1919,13}, { 511,12}, { 1023,13}, { 575,12}, \
- { 1215,13}, { 639,12}, { 1343,13}, { 703,14}, \
- { 383,13}, { 767,12}, { 1535,13}, { 831,12}, \
- { 1663,13}, { 959,12}, { 1919,14}, { 511,13}, \
- { 1087,12}, { 2175,13}, { 1215,14}, { 639,13}, \
- { 1343,12}, { 2687,13}, { 1407,12}, { 2815,13}, \
- { 1471,14}, { 767,13}, { 1599,12}, { 3199,13}, \
- { 1663,14}, { 895,13}, { 1919,15}, { 511,14}, \
- { 1023,13}, { 2175,14}, { 1151,13}, { 2431,12}, \
- { 4863,14}, { 1279,13}, { 2687,14}, { 1407,13}, \
- { 2815,15}, { 767,14}, { 1535,13}, { 3199,14}, \
- { 1663,13}, { 3455,14}, { 1919,16}, { 511,15}, \
- { 1023,14}, { 2431,13}, { 4863,15}, { 1279,14}, \
- { 2943,13}, { 5887,15}, { 1535,14}, { 16384,15}, \
- { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
- { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
- {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 177
-#define SQR_FFT_THRESHOLD 3264
-
-#define MULLO_BASECASE_THRESHOLD 5
-#define MULLO_DC_THRESHOLD 33
-#define MULLO_MUL_N_THRESHOLD 6633
-
-#define DC_DIV_QR_THRESHOLD 39
-#define DC_DIVAPPR_Q_THRESHOLD 119
-#define DC_BDIV_QR_THRESHOLD 31
-#define DC_BDIV_Q_THRESHOLD 78
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 51
+#define MULLO_MUL_N_THRESHOLD 8648
+
+#define DC_DIV_QR_THRESHOLD 63
+#define DC_DIVAPPR_Q_THRESHOLD 196
+#define DC_BDIV_QR_THRESHOLD 59
+#define DC_BDIV_Q_THRESHOLD 134
#define INV_MULMOD_BNM1_THRESHOLD 46
-#define INV_NEWTON_THRESHOLD 139
-#define INV_APPR_THRESHOLD 131
+#define INV_NEWTON_THRESHOLD 202
+#define INV_APPR_THRESHOLD 190
-#define BINV_NEWTON_THRESHOLD 198
-#define REDC_1_TO_REDC_2_THRESHOLD 23
-#define REDC_2_TO_REDC_N_THRESHOLD 59
+#define BINV_NEWTON_THRESHOLD 224
+#define REDC_1_TO_REDC_2_THRESHOLD 16
+#define REDC_2_TO_REDC_N_THRESHOLD 55
-#define MU_DIV_QR_THRESHOLD 1334
-#define MU_DIVAPPR_Q_THRESHOLD 1442
-#define MUPI_DIV_QR_THRESHOLD 66
-#define MU_BDIV_QR_THRESHOLD 1017
-#define MU_BDIV_Q_THRESHOLD 1442
+#define MU_DIV_QR_THRESHOLD 1442
+#define MU_DIVAPPR_Q_THRESHOLD 1528
+#define MUPI_DIV_QR_THRESHOLD 85
+#define MU_BDIV_QR_THRESHOLD 1187
+#define MU_BDIV_Q_THRESHOLD 1387
#define MATRIX22_STRASSEN_THRESHOLD 15
-#define HGCD_THRESHOLD 125 /* hardwired, tuneup crashes */
-#define GCD_DC_THRESHOLD 396
-#define GCDEXT_DC_THRESHOLD 368
-#define JACOBI_BASE_METHOD 1
+#define HGCD_THRESHOLD 113
+#define HGCD_APPR_THRESHOLD 84
+#define HGCD_REDUCE_THRESHOLD 2681
+#define GCD_DC_THRESHOLD 555
+#define GCDEXT_DC_THRESHOLD 396
+#define JACOBI_BASE_METHOD 4
#define GET_STR_DC_THRESHOLD 12
-#define GET_STR_PRECOMPUTE_THRESHOLD 21
-#define SET_STR_DC_THRESHOLD 650
-#define SET_STR_PRECOMPUTE_THRESHOLD 1585
+#define GET_STR_PRECOMPUTE_THRESHOLD 20
+#define SET_STR_DC_THRESHOLD 1204
+#define SET_STR_PRECOMPUTE_THRESHOLD 2251
+
+#define FAC_DSC_THRESHOLD 800
+#define FAC_ODD_THRESHOLD 28
--- /dev/null
+dnl X86-64 mpn_lshift optimised for Intel Sandy Bridge.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_lshift)
+include_mpn(`x86_64/fastsse/lshift-movdqu2.asm')
--- /dev/null
+dnl X86-64 mpn_lshiftc optimised for Intel Sandy Bridge.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_lshiftc)
+include_mpn(`x86_64/fastsse/lshiftc-movdqu2.asm')
--- /dev/null
+dnl X86-64 mpn_mul_1 optimised for Intel Sandy Bridge.
+
+dnl Copyright 2003, 2004, 2005, 2007, 2008, 2011, 2012 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9
+C AMD K10
+C AMD bd1
+C AMD bobcat
+C Intel P4
+C Intel core2
+C Intel NHM
+C Intel SBR
+C Intel atom
+C VIA nano
+
+C The loop of this code is the result of running a code generation and
+C optimisation tool suite written by David Harvey and Torbjorn Granlund.
+
+C TODO
+C * The loop is great, but the prologue code was quickly written. Tune it!
+C * Add mul_1c entry point.
+
+define(`rp', `%rdi') C rcx
+define(`up', `%rsi') C rdx
+define(`n_param', `%rdx') C r8
+define(`v0', `%rcx') C r9
+
+define(`n', `%r11')
+
+dnl Disable until tested ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+IFDOS(` define(`up', ``%rsi'') ') dnl
+IFDOS(` define(`rp', ``%rcx'') ') dnl
+IFDOS(` define(`v0', ``%r9'') ') dnl
+IFDOS(` define(`r9', ``rdi'') ') dnl
+IFDOS(` define(`n', ``%r8'') ') dnl
+IFDOS(` define(`r8', ``r11'') ') dnl
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+
+PROLOGUE(mpn_mul_1)
+IFDOS(``push %rsi '')
+IFDOS(``push %rdi '')
+IFDOS(``mov %rdx, %rsi '')
+
+ mov (up), %rax
+IFSTD(` mov R32(n_param), R32(%r10) ')
+IFDOS(` mov n, %r10 ')
+IFSTD(` mov R32(n_param), R32(n) ')
+
+ lea (up,n_param,8), up
+ lea -8(rp,n_param,8), rp
+ neg n
+ mul v0
+ and $3, R32(%r10)
+ jz L(b0)
+ cmp $2, R32(%r10)
+ jb L(b1)
+ jz L(b2)
+
+L(b3): add $-1, n
+ mov %rax, %r9
+ mov %rdx, %r8
+ mov 16(up,n,8), %rax
+ jmp L(L3)
+
+L(b1): mov %rax, %r9
+ mov %rdx, %r8
+ add $1, n
+ jnc L(L1)
+ mov %rax, (rp)
+ mov %rdx, %rax
+ ret
+
+L(b2): add $-2, n
+ mov %rax, %r8
+ mov %rdx, %r9
+ mov 24(up,n,8), %rax
+ jmp L(L2)
+
+L(b0): mov %rax, %r8
+ mov %rdx, %r9
+ mov 8(up,n,8), %rax
+ jmp L(L0)
+
+ ALIGN(8)
+L(top): mov %rdx, %r8
+ add %rax, %r9
+L(L1): mov 0(up,n,8), %rax
+ adc $0, %r8
+ mul v0
+ add %rax, %r8
+ mov %r9, 0(rp,n,8)
+ mov 8(up,n,8), %rax
+ mov %rdx, %r9
+ adc $0, %r9
+L(L0): mul v0
+ mov %r8, 8(rp,n,8)
+ add %rax, %r9
+ mov %rdx, %r8
+ mov 16(up,n,8), %rax
+ adc $0, %r8
+L(L3): mul v0
+ mov %r9, 16(rp,n,8)
+ mov %rdx, %r9
+ add %rax, %r8
+ mov 24(up,n,8), %rax
+ adc $0, %r9
+L(L2): mul v0
+ mov %r8, 24(rp,n,8)
+ add $4, n
+ jnc L(top)
+
+L(end): add %rax, %r9
+ mov %rdx, %rax
+ adc $0, %rax
+ mov %r9, (rp)
+
+IFDOS(``pop %rdi '')
+IFDOS(``pop %rsi '')
+ ret
+EPILOGUE()
--- /dev/null
+dnl X86-64 mpn_rsh1add_n, mpn_rsh1sub_n optimised for Intel Sandy Bridge.
+
+dnl Copyright 2003, 2005, 2009, 2010, 2011, 2012 Free Software Foundation,
+dnl Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb
+C AMD K8,K9 ?
+C AMD K10 4.25
+C Intel P4 21.5
+C Intel core2 3.2
+C Intel NHM 3.87
+C Intel SBR 2.05
+C Intel atom ?
+C VIA nano 44.9
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`vp', `%rdx')
+define(`n', `%rcx')
+
+ifdef(`OPERATION_rsh1add_n', `
+ define(ADDSUB, add)
+ define(ADCSBB, adc)
+ define(func_n, mpn_rsh1add_n)
+ define(func_nc, mpn_rsh1add_nc)')
+ifdef(`OPERATION_rsh1sub_n', `
+ define(ADDSUB, sub)
+ define(ADCSBB, sbb)
+ define(func_n, mpn_rsh1sub_n)
+ define(func_nc, mpn_rsh1sub_nc)')
+
+MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1add_nc mpn_rsh1sub_n mpn_rsh1sub_nc)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+
+ ALIGN(16)
+PROLOGUE(func_nc)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8 ')
+ push %rbx
+ push %rbp
+
+ neg %r8 C set C flag from parameter
+ mov (up), %rbp
+ ADCSBB (vp), %rbp
+
+ jmp L(ent)
+EPILOGUE()
+
+ ALIGN(16)
+PROLOGUE(func_n)
+ FUNC_ENTRY(4)
+ push %rbx
+ push %rbp
+
+ mov (up), %rbp
+ ADDSUB (vp), %rbp
+L(ent):
+ sbb R32(%rbx), R32(%rbx) C save cy
+ mov R32(%rbp), R32(%rax)
+ and $1, R32(%rax) C return value
+
+ mov R32(n), R32(%r11)
+ and $3, R32(%r11)
+
+ cmp $1, R32(%r11)
+ je L(do) C jump if n = 1 5 9 ...
+
+L(n1): cmp $2, R32(%r11)
+ jne L(n2) C jump unless n = 2 6 10 ...
+ add R32(%rbx), R32(%rbx) C restore cy
+ mov 8(up), %r10
+ ADCSBB 8(vp), %r10
+ lea 8(up), up
+ lea 8(vp), vp
+ lea 8(rp), rp
+ sbb R32(%rbx), R32(%rbx) C save cy
+
+ shrd $1, %r10, %rbp
+ mov %rbp, -8(rp)
+ jmp L(cj1)
+
+L(n2): cmp $3, R32(%r11)
+ jne L(n3) C jump unless n = 3 7 11 ...
+ add R32(%rbx), R32(%rbx) C restore cy
+ mov 8(up), %r9
+ mov 16(up), %r10
+ ADCSBB 8(vp), %r9
+ ADCSBB 16(vp), %r10
+ lea 16(up), up
+ lea 16(vp), vp
+ lea 16(rp), rp
+ sbb R32(%rbx), R32(%rbx) C save cy
+
+ shrd $1, %r9, %rbp
+ mov %rbp, -16(rp)
+ jmp L(cj2)
+
+L(n3): dec n C come here for n = 4 8 12 ...
+ add R32(%rbx), R32(%rbx) C restore cy
+ mov 8(up), %r8
+ mov 16(up), %r9
+ ADCSBB 8(vp), %r8
+ ADCSBB 16(vp), %r9
+ mov 24(up), %r10
+ ADCSBB 24(vp), %r10
+ lea 24(up), up
+ lea 24(vp), vp
+ lea 24(rp), rp
+ sbb R32(%rbx), R32(%rbx) C save cy
+
+ shrd $1, %r8, %rbp
+ mov %rbp, -24(rp)
+ shrd $1, %r9, %r8
+ mov %r8, -16(rp)
+L(cj2): shrd $1, %r10, %r9
+ mov %r9, -8(rp)
+L(cj1): mov %r10, %rbp
+
+L(do):
+ shr $2, n C 4
+ je L(end) C 2
+ ALIGN(16)
+L(top): add R32(%rbx), R32(%rbx) C restore cy
+
+ mov 8(up), %r8
+ mov 16(up), %r9
+ ADCSBB 8(vp), %r8
+ ADCSBB 16(vp), %r9
+ mov 24(up), %r10
+ mov 32(up), %r11
+ ADCSBB 24(vp), %r10
+ ADCSBB 32(vp), %r11
+
+ lea 32(up), up
+ lea 32(vp), vp
+
+ sbb R32(%rbx), R32(%rbx) C save cy
+
+ shrd $1, %r8, %rbp
+ mov %rbp, (rp)
+ shrd $1, %r9, %r8
+ mov %r8, 8(rp)
+ shrd $1, %r10, %r9
+ mov %r9, 16(rp)
+ shrd $1, %r11, %r10
+ mov %r10, 24(rp)
+
+ dec n
+ mov %r11, %rbp
+ lea 32(rp), rp
+ jne L(top)
+
+L(end): shrd $1, %rbx, %rbp
+ mov %rbp, (rp)
+ pop %rbp
+ pop %rbx
+ FUNC_EXIT()
+ ret
+EPILOGUE()
--- /dev/null
+dnl X86-64 mpn_rshift optimised for Intel Sandy Bridge.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_rshift)
+include_mpn(`x86_64/fastsse/rshift-movdqu2.asm')
divert(-1)
-dnl Copyright 2008 Free Software Foundation, Inc.
+dnl Copyright 2008, 2011, 2012 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or
-dnl modify it under the terms of the GNU Lesser General Public License as
-dnl published by the Free Software Foundation; either version 3 of the
-dnl License, or (at your option) any later version.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl Lesser General Public License for more details.
-dnl
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
define(`DARWIN')
-define(`LEA',`
- lea $1(%rip), $2
+define(`LEA',`dnl
+ifdef(`PIC',
+ `lea $1(%rip), $2'
+,
+ `movabs `$'$1, $2')
')
dnl Usage: CALL(funcname)
define(`CALL',`call GSYM_PREFIX`'$1')
-define(`JUMPTABSECT', `DATA')
+dnl Usage: JUMPTABSECT
+dnl
+dnl CAUTION: Do not put anything sensible here, like RODATA. That works with
+dnl some Darwin tool chains, but silently breaks with other. (Note that
+dnl putting jump tables in the text segment is a really poor idea for PC many
+dnl processors, since they cannot cache the same thing in both L1D and L2I.)
+
+define(`JUMPTABSECT', `.text')
+
+
+dnl Usage: JMPENT(targlabel,tablabel)
+
+define(`JMPENT',`dnl
+ifdef(`PIC',
+ `.set $1_tmp, $1-$2
+ .long $1_tmp'
+,
+ `.quad $1'
+)')
+
+dnl Target ABI macros. For Darwin we override IFELF (and leave default for
+dnl IFDOS and IFSTD).
+
+define(`IFELF', `')
+
+
+dnl Usage: PROTECT(symbol)
+dnl
+dnl Used for private GMP symbols that should never be overridden by users.
+dnl This can save reloc entries and improve shlib sharing as well as
+dnl application startup times
+
+define(`PROTECT', `.private_extern $1')
+
divert`'dnl
--- /dev/null
+dnl x86-64 mpn_div_qr_2n_pi1
+dnl -- Divide an mpn number by a normalized 2-limb number,
+dnl using a single-limb inverse.
+
+dnl Copyright 2007, 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C c/l
+C INPUT PARAMETERS
+define(`qp', `%rdi')
+define(`rp', `%rsi')
+define(`up_param', `%rdx')
+define(`un', `%rcx')
+define(`d1', `%r8')
+define(`d0', `%r9')
+define(`di_param', `8(%rsp)')
+
+define(`di', `%r10')
+define(`up', `%r11')
+define(`u2', `%rbx')
+define(`u1', `%r12')
+define(`t1', `%r13')
+define(`t0', `%r14')
+define(`md1', `%r15')
+
+C TODO
+C * Store qh in the same stack slot as di_param, instead of pushing
+C it. (we could put it in register %rbp, but then we would need to
+C save and restore that instead, which doesn't seem like a win).
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_div_qr_2n_pi1)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8 ')
+IFDOS(` mov 64(%rsp), %r9 ')
+IFDOS(`define(`di_param', `72(%rsp)')')
+ mov di_param, di
+ mov up_param, up
+ push %r15
+ push %r14
+ push %r13
+ push %r12
+ push %rbx
+
+ mov -16(up, un, 8), u1
+ mov -8(up, un, 8), u2
+
+ mov u1, t0
+ mov u2, t1
+ sub d0, t0
+ sbb d1, t1
+ cmovnc t0, u1
+ cmovnc t1, u2
+ C push qh which is !carry
+ sbb %rax, %rax
+ inc %rax
+ push %rax
+ lea -2(un), un
+ mov d1, md1
+ neg md1
+
+ jmp L(next)
+
+ ALIGN(16)
+L(loop):
+ C udiv_qr_3by2 (q,u2,u1,u2,u1,n0, d1,d0,di)
+ C Based on the optimized divrem_2.asm code.
+
+ mov di, %rax
+ mul u2
+ mov u1, t0
+ add %rax, t0 C q0 in t0
+ adc u2, %rdx
+ mov %rdx, t1 C q in t1
+ imul md1, %rdx
+ mov d0, %rax
+ lea (%rdx, u1), u2
+ mul t1
+ mov (up, un, 8), u1
+ sub d0, u1
+ sbb d1, u2
+ sub %rax, u1
+ sbb %rdx, u2
+ xor R32(%rax), R32(%rax)
+ xor R32(%rdx), R32(%rdx)
+ cmp t0, u2
+ cmovnc d0, %rax
+ cmovnc d1, %rdx
+ adc $0, t1
+ nop
+ add %rax, u1
+ adc %rdx, u2
+ cmp d1, u2
+ jae L(fix)
+L(bck):
+ mov t1, (qp, un, 8)
+L(next):
+ sub $1, un
+ jnc L(loop)
+L(end):
+ mov u2, 8(rp)
+ mov u1, (rp)
+
+ C qh on stack
+ pop %rax
+
+ pop %rbx
+ pop %r12
+ pop %r13
+ pop %r14
+ pop %r15
+ FUNC_EXIT()
+ ret
+
+L(fix): C Unlikely update. u2 >= d1
+ seta %dl
+ cmp d0, u1
+ setae %al
+ orb %dl, %al C "orb" form to placate Sun tools
+ je L(bck)
+ inc t1
+ sub d0, u1
+ sbb d1, u2
+ jmp L(bck)
+EPILOGUE()
--- /dev/null
+dnl x86-64 mpn_div_qr_2u_pi1
+dnl -- Divide an mpn number by an unnormalized 2-limb number,
+dnl using a single-limb inverse and shifting the dividend on the fly.
+
+dnl Copyright 2007, 2008, 2010, 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C c/l
+C INPUT PARAMETERS
+define(`qp', `%rdi')
+define(`rp', `%rsi')
+define(`up_param', `%rdx')
+define(`un_param', `%rcx') dnl %rcx needed for shift count
+define(`d1', `%r8')
+define(`d0', `%r9')
+define(`shift_param', `FRAME+8(%rsp)')
+define(`di_param', `FRAME+16(%rsp)')
+
+define(`di', `%r10')
+define(`up', `%r11')
+define(`un', `%rbp')
+define(`u2', `%rbx')
+define(`u1', `%r12')
+define(`u0', `%rsi') dnl Same as rp, which is saved and restored.
+define(`t1', `%r13')
+define(`t0', `%r14')
+define(`md1', `%r15')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+deflit(`FRAME', 0)
+PROLOGUE(mpn_div_qr_2u_pi1)
+ mov di_param, di
+ mov up_param, up
+ push %r15
+ push %r14
+ push %r13
+ push %r12
+ push %rbx
+ push %rbp
+ push rp
+deflit(`FRAME', 56)
+ lea -2(un_param), un
+ mov d1, md1
+ neg md1
+
+ C int parameter, 32 bits only
+ movl shift_param, R32(%rcx)
+
+ C FIXME: Different code for SHLD_SLOW
+
+ xor R32(u2), R32(u2)
+ mov 8(up, un, 8), u1
+ shld %cl, u1, u2
+ C Remains to read (up, un, 8) and shift u1, u0
+ C udiv_qr_3by2 (qh,u2,u1,u2,u1,n0, d1,d0,di)
+ mov di, %rax
+ mul u2
+ mov (up, un, 8), u0
+ shld %cl, u0, u1
+ mov u1, t0
+ add %rax, t0 C q0 in t0
+ adc u2, %rdx
+ mov %rdx, t1 C q in t1
+ imul md1, %rdx
+ mov d0, %rax
+ lea (%rdx, u1), u2
+ mul t1
+ mov u0, u1
+ shl %cl, u1
+ sub d0, u1
+ sbb d1, u2
+ sub %rax, u1
+ sbb %rdx, u2
+ xor R32(%rax), R32(%rax)
+ xor R32(%rdx), R32(%rdx)
+ cmp t0, u2
+ cmovnc d0, %rax
+ cmovnc d1, %rdx
+ adc $0, t1
+ nop
+ add %rax, u1
+ adc %rdx, u2
+ cmp d1, u2
+ jae L(fix_qh)
+L(bck_qh):
+ push t1 C push qh on stack
+
+ jmp L(next)
+
+ ALIGN(16)
+L(loop):
+ C udiv_qr_3by2 (q,u2,u1,u2,u1,n0, d1,d0,di)
+ C Based on the optimized divrem_2.asm code.
+
+ mov di, %rax
+ mul u2
+ mov (up, un, 8), u0
+ xor R32(t1), R32(t1)
+ shld %cl, u0, t1
+ or t1, u1
+ mov u1, t0
+ add %rax, t0 C q0 in t0
+ adc u2, %rdx
+ mov %rdx, t1 C q in t1
+ imul md1, %rdx
+ mov d0, %rax
+ lea (%rdx, u1), u2
+ mul t1
+ mov u0, u1
+ shl %cl, u1
+ sub d0, u1
+ sbb d1, u2
+ sub %rax, u1
+ sbb %rdx, u2
+ xor R32(%rax), R32(%rax)
+ xor R32(%rdx), R32(%rdx)
+ cmp t0, u2
+ cmovnc d0, %rax
+ cmovnc d1, %rdx
+ adc $0, t1
+ nop
+ add %rax, u1
+ adc %rdx, u2
+ cmp d1, u2
+ jae L(fix)
+L(bck):
+ mov t1, (qp, un, 8)
+L(next):
+ sub $1, un
+ jnc L(loop)
+L(end):
+ C qh on stack
+ pop %rax
+ pop rp
+ shrd %cl, u2, u1
+ shr %cl, u2
+ mov u2, 8(rp)
+ mov u1, (rp)
+
+ pop %rbp
+ pop %rbx
+ pop %r12
+ pop %r13
+ pop %r14
+ pop %r15
+ ret
+
+L(fix): C Unlikely update. u2 >= d1
+ seta %dl
+ cmp d0, u1
+ setae %al
+ orb %dl, %al C "orb" form to placate Sun tools
+ je L(bck)
+ inc t1
+ sub d0, u1
+ sbb d1, u2
+ jmp L(bck)
+
+C Duplicated, just jumping back to a different address.
+L(fix_qh): C Unlikely update. u2 >= d1
+ seta %dl
+ cmp d0, u1
+ setae %al
+ orb %dl, %al C "orb" form to placate Sun tools
+ je L(bck_qh)
+ inc t1
+ sub d0, u1
+ sbb d1, u2
+ jmp L(bck_qh)
+EPILOGUE()
dnl AMD64 mpn_divexact_1 -- mpn by limb exact division.
-dnl Copyright 2001, 2002, 2004, 2005, 2006 Free Software Foundation, Inc.
+dnl Copyright 2001, 2002, 2004, 2005, 2006, 2011, 2012 Free Software
+dnl Foundation, Inc.
dnl This file is part of the GNU MP Library.
C cycles/limb
-C K8,K9: 10
-C K10: 10
-C P4: 33
-C P6 core2: 13.25
-C P6 corei7: 14
-C P6 atom: 42
+C AMD K8,K9 10
+C AMD K10 10
+C Intel P4 33
+C Intel core2 13.25
+C Intel corei 14
+C Intel atom 42
+C VIA nano 43
C A quick adoption of the 32-bit K7 code.
C n rdx
C divisor rcx
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_divexact_1)
+ FUNC_ENTRY(4)
push %rbx
mov %rcx, %rax
shr R32(%rax)
and $127, R32(%rax) C d/2, 7 bits
-ifdef(`PIC',`
- mov binvert_limb_table@GOTPCREL(%rip), %rdx
-',`
- movabs $binvert_limb_table, %rdx
-')
+ LEA( binvert_limb_table, %rdx)
movzbl (%rdx,%rax), R32(%rax) C inv 8 bits
imul %r10, %rax
mov %rax, (%rdi)
pop %rbx
+ FUNC_EXIT()
ret
L(one): shr R8(%rcx), %rax
imul %r10, %rax
mov %rax, (%rdi)
pop %rbx
+ FUNC_EXIT()
ret
EPILOGUE()
dnl x86-64 mpn_divrem_1 -- mpn by limb division.
-dnl Copyright 2004, 2005, 2007, 2008, 2009 Free Software Foundation, Inc.
+dnl Copyright 2004, 2005, 2007, 2008, 2009, 2010, 2011, 2012 Free Software
+dnl Foundation, Inc.
dnl This file is part of the GNU MP Library.
C norm unorm frac
-C K8 13 13 12
-C P4 44.2 44.2 42.3
-C P6 core2 25 24.5 19.3
-C P6 corei7 21.5 20.7 18
-C P6 atom 42 52 37
-
-C TODO
-C * Compute the inverse without relying on the div instruction.
-C Newton's method and mulq, or perhaps the faster fdiv.
-C * Tune prologue.
-C * Optimize for Core 2.
-
-C The code for unnormalized divisors works also for normalized divisors, but
-C for some reason it runs really slowly (on K8) for that case. Use special
-C code until we can address this. The Intel Atom is also affected, but
-C understandably (shld slowness).
-define(`SPECIAL_CODE_FOR_NORMALIZED_DIVISOR',1)
+C AMD K8,K9 13 13 12
+C AMD K10 13 13 12
+C Intel P4 43 44 43
+C Intel core2 24.5 24.5 19.5
+C Intel corei 20.5 19.5 18
+C Intel atom 43 46 36
+C VIA nano 25.5 25.5 24
C mp_limb_t
C mpn_divrem_1 (mp_ptr qp, mp_size_t fn,
C rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15
C cnt qp d dinv
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+IFSTD(`define(`CNTOFF', `40($1)')')
+IFDOS(`define(`CNTOFF', `104($1)')')
+
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_preinv_divrem_1)
- xor %eax, %eax
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8 ')
+IFDOS(` mov 64(%rsp), %r9 ')
+ xor R32(%rax), R32(%rax)
push %r13
push %r12
push %rbp
test d, d
js L(nent)
- mov 40(%rsp), R8(cnt)
+
+ mov CNTOFF(%rsp), R8(cnt)
shl R8(cnt), d
jmp L(uent)
EPILOGUE()
ALIGN(16)
PROLOGUE(mpn_divrem_1)
- xor %eax, %eax
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8 ')
+ xor R32(%rax), R32(%rax)
push %r13
push %r12
push %rbp
lea -8(qp,un_param,8), qp
xor R32(%rbp), R32(%rbp)
-
-ifdef(`SPECIAL_CODE_FOR_NORMALIZED_DIVISOR',`
test d, d
jns L(unnormalized)
dec un
mov %rbp, %rax
sub d, %rbp
- cmovb %rax, %rbp
- sbb %eax, %eax
- inc %eax
+ cmovc %rax, %rbp
+ sbb R32(%rax), R32(%rax)
+ inc R32(%rax)
mov %rax, (qp)
lea -8(qp), qp
L(8):
- mov d, %rdx
- mov $-1, %rax
- not %rdx
- div d C FREE rax rdx rcx r9 r10 r11
+IFSTD(` push %rdi ')
+IFSTD(` push %rsi ')
+ push %r8
+IFSTD(` mov d, %rdi ')
+IFDOS(` mov d, %rcx ')
+ CALL( mpn_invert_limb)
+ pop %r8
+IFSTD(` pop %rsi ')
+IFSTD(` pop %rdi ')
+
mov %rax, dinv
mov %rbp, %rax
jmp L(nent)
ALIGN(16)
-L(nloop): C cycK8 cycP6 cycP4
+L(ntop): C K8-K10 P6-CNR P6-NHM P4
mov (up,un,8), %r10 C
- lea 1(%rax), %rbp C
- mul dinv C 0,13 0,19 0,45
- add %r10, %rax C 4 8 12
- adc %rbp, %rdx C 5 9 13
- mov %rax, %rbp C 5 9 13
- mov %rdx, %r13 C 6 11 23
- imul d, %rdx C 6 11 23
- sub %rdx, %r10 C 10 16 33
+ mul dinv C 0,13 0,20 0,18 0,45
+ add %r10, %rax C 4 8 3 12
+ adc %rbp, %rdx C 5 9 10 13
+ mov %rax, %rbp C 5 9 4 13
+ mov %rdx, %r13 C 6 11 12 23
+ imul d, %rdx C 6 11 11 23
+ sub %rdx, %r10 C 10 16 14 33
mov d, %rax C
- add %r10, %rax C 11 17 34
- cmp %rbp, %r10 C 11 17 34
- cmovb %r10, %rax C 12 18 35
+ add %r10, %rax C 11 17 15 34
+ cmp %rbp, %r10 C 11 17 15 34
+ cmovc %r10, %rax C 12 18 16 35
adc $-1, %r13 C
cmp d, %rax C
jae L(nfx) C
L(nok): mov %r13, (qp) C
sub $8, qp C
-L(nent):dec un C
- jns L(nloop) C
+L(nent):lea 1(%rax), %rbp C
+ dec un C
+ jns L(ntop) C
- xor %ecx, %ecx
+ xor R32(%rcx), R32(%rcx)
jmp L(87)
L(nfx): sub d, %rax
inc %r13
jmp L(nok)
-')
L(unnormalized):
test un, un
dec un
L(44):
bsr d, %rcx
- not %ecx
- sal %cl, d
- sal %cl, %rbp
- mov d, %rdx
- mov $-1, %rax
- not %rdx
- div d C FREE rax rdx r9 r10 r11
- test un, un
+ not R32(%rcx)
+ shl R8(%rcx), d
+ shl R8(%rcx), %rbp
+
+ push %rcx
+IFSTD(` push %rdi ')
+IFSTD(` push %rsi ')
+ push %r8
+IFSTD(` mov d, %rdi ')
+IFDOS(` mov d, %rcx ')
+ CALL( mpn_invert_limb)
+ pop %r8
+IFSTD(` pop %rsi ')
+IFSTD(` pop %rdi ')
+ pop %rcx
+
mov %rax, dinv
mov %rbp, %rax
+ test un, un
je L(87)
-L(uent):
- mov -8(up,un,8), %rbp
- shr %cl, %rax
- shld %cl, %rbp, %rax
- sub $2, un
- js L(ulast)
+
+L(uent):dec un
+ mov (up,un,8), %rbp
+ neg R32(%rcx)
+ shr R8(%rcx), %rbp
+ neg R32(%rcx)
+ or %rbp, %rax
+ jmp L(ent)
ALIGN(16)
-L(uloop):
- nop
- mov (up,un,8), %r10
- lea 1(%rax), %r11
- shld %cl, %r10, %rbp
+L(utop):mov (up,un,8), %r10
+ shl R8(%rcx), %rbp
+ neg R32(%rcx)
+ shr R8(%rcx), %r10
+ neg R32(%rcx)
+ or %r10, %rbp
mul dinv
add %rbp, %rax
adc %r11, %rdx
mov d, %rax
add %rbp, %rax
cmp %r11, %rbp
- cmovb %rbp, %rax
+ cmovc %rbp, %rax
adc $-1, %r13
cmp d, %rax
jae L(ufx)
L(uok): mov %r13, (qp)
sub $8, qp
+L(ent): mov (up,un,8), %rbp
dec un
- mov %r10, %rbp
- jns L(uloop)
-L(ulast):
lea 1(%rax), %r11
- sal %cl, %rbp
+ jns L(utop)
+
+L(uend):shl R8(%rcx), %rbp
mul dinv
add %rbp, %rax
adc %r11, %rdx
mov d, %rax
add %rbp, %rax
cmp %r11, %rbp
- cmovb %rbp, %rax
+ cmovc %rbp, %rax
adc $-1, %r13
cmp d, %rax
- jae L(93)
-L(69): mov %r13, (qp)
+ jae L(efx)
+L(eok): mov %r13, (qp)
sub $8, qp
jmp L(87)
L(ufx): sub d, %rax
inc %r13
jmp L(uok)
-
-L(93): sub d, %rax
+L(efx): sub d, %rax
inc %r13
- jmp L(69)
+ jmp L(eok)
L(87): mov d, %rbp
neg %rbp
- jmp L(87b)
-
- ALIGN(16)
-L(floop): C cycK8 cycP6 cycP4
- lea 1(%rax), %r11 C
- mul dinv C 0,12
- add %r11, %rdx C 5
- mov %rax, %r11 C 4
- mov %rdx, %r13 C 6
- imul %rbp, %rdx C 6
+ jmp L(fent)
+
+ ALIGN(16) C K8-K10 P6-CNR P6-NHM P4
+L(ftop):mul dinv C 0,12 0,17 0,17
+ add %r11, %rdx C 5 8 10
+ mov %rax, %r11 C 4 8 3
+ mov %rdx, %r13 C 6 9 11
+ imul %rbp, %rdx C 6 9 11
mov d, %rax C
- add %rdx, %rax C 10
- cmp %r11, %rdx C 10
- cmovb %rdx, %rax C 11
+ add %rdx, %rax C 10 14 14
+ cmp %r11, %rdx C 10 14 14
+ cmovc %rdx, %rax C 11 15 15
adc $-1, %r13 C
mov %r13, (qp) C
sub $8, qp C
-L(87b): dec fn C
- jns L(floop) C
+L(fent):lea 1(%rax), %r11 C
+ dec fn C
+ jns L(ftop) C
- shr %cl, %rax
+ shr R8(%rcx), %rax
L(ret): pop %rbx
pop %rbp
pop %r12
pop %r13
+ FUNC_EXIT()
ret
EPILOGUE()
include(`../config.m4')
-C norm frac
-C K8 20 20
-C P4 73 73
-C P6 core2 37 37
-C P6 corei7 33 33
-
-C TODO
-C * Perhaps compute the inverse without relying on divq? Could either use
-C Newton's method and mulq, or perhaps the faster fdiv.
-C * The loop has not been carefully tuned, nor analysed for critical path
-C length. It seems that 20 c/l is a bit long, compared to the 13 c/l for
-C mpn_divrem_1.
-C * Clean up. This code is really crude.
+C c/l
+C AMD K8,K9 18
+C AMD K10 18
+C Intel P4 68
+C Intel core2 34
+C Intel corei 30.5
+C Intel atom 73
+C VIA nano 33
C INPUT PARAMETERS
define(`un_param', `%rcx')
define(`dp', `%r8')
-define(`dinv', `%r9')
-
-
-C rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15
-C cnt qp d dinv
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_divrem_2)
-
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8 ')
push %r15
- lea (%rdx,%rcx,8), %rax
push %r14
push %r13
- mov %rsi, %r13
push %r12
- lea -24(%rax), %r12
+ lea -24(%rdx,%rcx,8), %r12 C r12 = &up[un-1]
+ mov %rsi, %r13
push %rbp
mov %rdi, %rbp
push %rbx
- mov 8(%r8), %r11
- mov -8(%rax), %r9
- mov (%r8), %r8
- mov -16(%rax), %r10
+ mov 8(%r8), %r11 C d1
+ mov 16(%r12), %rbx
+ mov (%r8), %r8 C d0
+ mov 8(%r12), %r10
+
xor R32(%r15), R32(%r15)
- cmp %r9, %r11
+ cmp %rbx, %r11
ja L(2)
setb %dl
cmp %r10, %r8
setbe %al
- orb %al, %dl
- jne L(23)
+ orb %al, %dl C "orb" form to placate Sun tools
+ je L(2)
+ inc R32(%r15)
+ sub %r8, %r10
+ sbb %r11, %rbx
L(2):
- lea -3(%rcx,%r13), %rbx C un + fn - 3
- test %rbx, %rbx
- js L(6)
- mov %r11, %rdx
- mov $-1, %rax
- not %rdx
- div %r11
+ lea -3(%rcx,%r13), %r14 C un + fn - 3
+ test %r14, %r14
+ js L(end)
+
+ push %r8
+ push %r10
+ push %r11
+IFSTD(` mov %r11, %rdi ')
+IFDOS(` mov %r11, %rcx ')
+ CALL( mpn_invert_limb)
+ pop %r11
+ pop %r10
+ pop %r8
+
mov %r11, %rdx
mov %rax, %rdi
imul %rax, %rdx
- mov %rdx, %r14
+ mov %rdx, %r9
mul %r8
- mov %rdx, %rcx
- mov $-1, %rdx
- add %r8, %r14
- adc $0, %rdx
- add %rcx, %r14
- adc $0, %rdx
- js L(8)
-L(18):
- dec %rdi
- sub %r11, %r14
- sbb $0, %rdx
- jns L(18)
-L(8):
-
-C rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15
-C n2 un n1 dinv qp d0 d1 up fn msl
-C n2 un -d1 n1 dinv XX XX
-
-ifdef(`NEW',`
- lea (%rbp,%rbx,8), %rbp
- mov %rbx, %rcx C un
- mov %r9, %rbx
- mov %rdi, %r9 C di
- mov %r10, %r14
+ xor R32(%rcx), R32(%rcx)
+ add %r8, %r9
+ adc $-1, %rcx
+ add %rdx, %r9
+ adc $0, %rcx
+ js 2f
+1: dec %rdi
+ sub %r11, %r9
+ sbb $0, %rcx
+ jns 1b
+2:
+
+ lea (%rbp,%r14,8), %rbp
mov %r11, %rsi
neg %rsi C -d1
+
+C rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15
+C n2 un -d1 dinv qp d0 q0 d1 up fn msl
+
ALIGN(16)
-L(loop):
- mov %r9, %rax C di ncp
- mul %rbx C 0, 18
- add %r14, %rax C 4
- mov %rax, %r10 C q0 5
+L(top): mov %rdi, %rax C di ncp
+ mul %rbx C 0, 17
+ mov %r10, %rcx C
+ add %rax, %rcx C 4
adc %rbx, %rdx C 5
- mov %rdx, %rdi C q 6
+ mov %rdx, %r9 C q 6
imul %rsi, %rdx C 6
mov %r8, %rax C ncp
- lea (%rdx, %r14), %rbx C n1 -= ... 7
- mul %rdi C 7
- xor R32(%r14), R32(%r14) C
- cmp %rcx, %r13 C
+ lea (%rdx, %r10), %rbx C n1 -= ... 10
+ xor R32(%r10), R32(%r10) C
+ mul %r9 C 7
+ cmp %r14, %r13 C
jg L(19) C
- mov (%r12), %r14 C
+ mov (%r12), %r10 C
sub $8, %r12 C
-L(19): sub %r8, %r14 C ncp
- sbb %r11, %rbx C 9
- sub %rax, %r14 C 11
+L(19): sub %r8, %r10 C ncp
+ sbb %r11, %rbx C 11
+ sub %rax, %r10 C 11
sbb %rdx, %rbx C 12
- inc %rdi C 7
+ xor R32(%rax), R32(%rax) C
xor R32(%rdx), R32(%rdx) C
- cmp %r10, %rbx C 13
- mov %r8, %rax C d0 ncp
- adc $-1, %rdx C mask 14
- add %rdx, %rdi C q-- 15
- and %rdx, %rax C d0 or 0 15
- and %r11, %rdx C d1 or 0 15
- add %rax, %r14 C 16
+ cmp %rcx, %rbx C 13
+ cmovnc %r8, %rax C 14
+ cmovnc %r11, %rdx C 14
+ adc $0, %r9 C adjust q 14
+ nop
+ add %rax, %r10 C 15
adc %rdx, %rbx C 16
- cmp %r11, %rbx C 17
+ cmp %r11, %rbx C
jae L(fix) C
-L(bck): mov %rdi, (%rbp) C
- sub $8, %rbp C
- dec %rcx
- jns L(loop)
-
- mov %r14, %r10
- mov %rbx, %r9
-',`
- lea (%rbp,%rbx,8), %rbp
- mov %rbx, %rcx
- mov %r9, %rax
- mov %r10, %rsi
- ALIGN(16)
-L(loop):
- mov %rax, %r14 C 0, 19
- mul %rdi C 0
- mov %r11, %r9 C 1
- add %rsi, %rax C 4
- mov %rax, %rbx C q0 5
- adc %r14, %rdx C q 5
- lea 1(%rdx), %r10 C 6
- mov %rdx, %rax C 6
- imul %rdx, %r9 C 6
- sub %r9, %rsi C 10
- xor R32(%r9), R32(%r9) C
- mul %r8 C 7
- cmp %rcx, %r13 C
- jg L(13) C
- mov (%r12), %r9 C
- sub $8, %r12 C
-L(13): sub %r8, %r9 C ncp
- sbb %r11, %rsi C 11
- sub %rax, %r9 C 11
- sbb %rdx, %rsi C 12
- cmp %rbx, %rsi C 13
- sbb %rax, %rax C 14
- not %rax C 15
- add %rax, %r10 C 16
- mov %r8, %rbx C ncp
- and %rax, %rbx C 16
- and %r11, %rax C 16
- add %rbx, %r9 C 17
- adc %rsi, %rax C 18
- cmp %rax, %r11 C 19
- jbe L(fix) C
-L(bck): mov %r10, (%rbp) C
+L(bck): mov %r9, (%rbp) C
sub $8, %rbp C
- mov %r9, %rsi C 18
- dec %rcx
- jns L(loop)
-
- mov %rsi, %r10
- mov %rax, %r9
-')
-L(6):
- mov %r10, 8(%r12)
- mov %r9, 16(%r12)
+ dec %r14
+ jns L(top)
+
+L(end): mov %r10, 8(%r12)
+ mov %rbx, 16(%r12)
pop %rbx
pop %rbp
pop %r12
pop %r14
mov %r15, %rax
pop %r15
+ FUNC_EXIT()
ret
-L(23): inc R32(%r15)
- sub %r8, %r10
- sbb %r11, %r9
- jmp L(2)
-
-ifdef(`NEW',`
L(fix): seta %dl
- cmp %r8, %r14
+ cmp %r8, %r10
setae %al
- orb %dl, %al
+ orb %dl, %al C "orb" form to placate Sun tools
je L(bck)
- inc %rdi
- sub %r8, %r14
+ inc %r9
+ sub %r8, %r10
sbb %r11, %rbx
jmp L(bck)
-',`
-L(fix): jb L(88)
- cmp %r8, %r9
- jb L(bck)
-L(88): inc %r10
- sub %r8, %r9
- sbb %r11, %rax
- jmp L(bck)
-')
EPILOGUE()
--- /dev/null
+divert(-1)
+dnl Copyright 2011, 2012 Free Software Foundation, Inc.
+dnl
+dnl This file is part of the GNU MP Library.
+dnl
+dnl The GNU MP Library is free software; you can redistribute it and/or
+dnl modify it under the terms of the GNU Lesser General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+dnl
+dnl The GNU MP Library is distributed in the hope that it will be useful,
+dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+dnl Lesser General Public License for more details.
+dnl
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+define(`HOST_DOS64')
+
+
+dnl On DOS64 we always generate position-independent-code
+dnl
+
+define(`PIC')
+
+
+define(`LEA',`
+ lea $1(%rip), $2
+')
+
+
+dnl Usage: JUMPTABSECT
+
+define(`JUMPTABSECT', `RODATA')
+
+
+dnl Usage: JMPENT(targlabel,tablabel)
+
+define(`JMPENT', `.long $1-$2')
+
+
+dnl Usage: FUNC_ENTRY(nregparmas)
+dnl Usage: FUNC_EXIT()
+
+dnl FUNC_ENTRY and FUNC_EXIT provide an easy path for adoption of standard
+dnl ABI assembly to the DOS64 ABI.
+
+define(`FUNC_ENTRY',
+ `push %rdi
+ push %rsi
+ mov %rcx, %rdi
+ifelse(eval($1>=2),1,`dnl
+ mov %rdx, %rsi
+ifelse(eval($1>=3),1,`dnl
+ mov %r8, %rdx
+ifelse(eval($1>=4),1,`dnl
+ mov %r9, %rcx
+')')')')
+
+define(`FUNC_EXIT',
+ `pop %rsi
+ pop %rdi')
+
+
+dnl Target ABI macros. For DOS64 we override the defaults.
+
+define(`IFDOS', `$1')
+define(`IFSTD', `')
+define(`IFELF', `')
+
+
+dnl Usage: PROTECT(symbol)
+dnl
+dnl Used for private GMP symbols that should never be overridden by users.
+dnl This can save reloc entries and improve shlib sharing as well as
+dnl application startup times
+
+define(`PROTECT', `')
+
+
+divert`'dnl
--- /dev/null
+This directory contains code for x86-64 processors with fast
+implementations of SSE operations, hence the name "fastsse".
+
+Current processors that might benefit from this code are:
+
+ AMD K10
+ AMD Bulldozer
+ Intel Nocona
+ Intel Nehalem/Westmere
+ Intel Sandybridge/Ivybridge
+ VIA Nano
+
+Current processors that do not benefit from this code are:
+
+ AMD K8
+ AMD Bobcat
+ Intel Atom
+
+Intel Conroe/Penryn is a border case; its handling of non-aligned
+128-bit memory operands is poor.
--- /dev/null
+dnl AMD64 mpn_com optimised for CPUs with fast SSE.
+
+dnl Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb cycles/limb cycles/limb good
+C aligned unaligned best seen for cpu?
+C AMD K8,K9 2.0 2.0 N
+C AMD K10 0.85 1.3 Y/N
+C AMD bd1 1.40 1.40 Y
+C AMD bobcat 3.1 3.1 N
+C Intel P4 2.28 illop Y
+C Intel core2 1.02 1.02 N
+C Intel NHM 0.53 0.68 Y
+C Intel SBR 0.51 0.75 Y
+C Intel atom 3.68 3.68 N
+C VIA nano 1.17 5.09 Y/N
+
+C We try to do as many 16-byte operations as possible. The top-most and
+C bottom-most writes might need 8-byte operations. We can always write using
+C aligned 16-byte operations, we read with both aligned and unaligned 16-byte
+C operations.
+
+C Instead of having separate loops for reading aligned and unaligned, we read
+C using MOVDQU. This seems to work great except for core2; there performance
+C doubles when reading using MOVDQA (for aligned source). It is unclear how to
+C best handle the unaligned case there.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`n', `%rdx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_com)
+ FUNC_ENTRY(3)
+
+ test n, n
+ jz L(don)
+
+ pcmpeqb %xmm7, %xmm7 C set to 111...111
+
+ test $8, R8(rp) C is rp 16-byte aligned?
+ jz L(ali) C jump if rp aligned
+ mov (up), %rax
+ lea 8(up), up
+ not %rax
+ mov %rax, (rp)
+ lea 8(rp), rp
+ dec n
+
+ sub $14, n
+ jc L(sma)
+
+ ALIGN(16)
+L(top): movdqu (up), %xmm0
+ movdqu 16(up), %xmm1
+ movdqu 32(up), %xmm2
+ movdqu 48(up), %xmm3
+ movdqu 64(up), %xmm4
+ movdqu 80(up), %xmm5
+ movdqu 96(up), %xmm6
+ lea 112(up), up
+ pxor %xmm7, %xmm0
+ pxor %xmm7, %xmm1
+ pxor %xmm7, %xmm2
+ pxor %xmm7, %xmm3
+ pxor %xmm7, %xmm4
+ pxor %xmm7, %xmm5
+ pxor %xmm7, %xmm6
+ movdqa %xmm0, (rp)
+ movdqa %xmm1, 16(rp)
+ movdqa %xmm2, 32(rp)
+ movdqa %xmm3, 48(rp)
+ movdqa %xmm4, 64(rp)
+ movdqa %xmm5, 80(rp)
+ movdqa %xmm6, 96(rp)
+ lea 112(rp), rp
+L(ali): sub $14, n
+ jnc L(top)
+
+L(sma): add $14, n
+ test $8, R8(n)
+ jz 1f
+ movdqu (up), %xmm0
+ movdqu 16(up), %xmm1
+ movdqu 32(up), %xmm2
+ movdqu 48(up), %xmm3
+ lea 64(up), up
+ pxor %xmm7, %xmm0
+ pxor %xmm7, %xmm1
+ pxor %xmm7, %xmm2
+ pxor %xmm7, %xmm3
+ movdqa %xmm0, (rp)
+ movdqa %xmm1, 16(rp)
+ movdqa %xmm2, 32(rp)
+ movdqa %xmm3, 48(rp)
+ lea 64(rp), rp
+1:
+ test $4, R8(n)
+ jz 1f
+ movdqu (up), %xmm0
+ movdqu 16(up), %xmm1
+ lea 32(up), up
+ pxor %xmm7, %xmm0
+ pxor %xmm7, %xmm1
+ movdqa %xmm0, (rp)
+ movdqa %xmm1, 16(rp)
+ lea 32(rp), rp
+1:
+ test $2, R8(n)
+ jz 1f
+ movdqu (up), %xmm0
+ lea 16(up), up
+ pxor %xmm7, %xmm0
+ movdqa %xmm0, (rp)
+ lea 16(rp), rp
+1:
+ test $1, R8(n)
+ jz 1f
+ mov (up), %rax
+ not %rax
+ mov %rax, (rp)
+1:
+L(don): FUNC_EXIT()
+ ret
+EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_copyd optimised for CPUs with fast SSE copying and SSSE3.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb cycles/limb cycles/limb good
+C aligned unaligned best seen for cpu?
+C AMD K8,K9 2.0 illop 1.0/1.0 N
+C AMD K10 0.85 illop Y/N
+C AMD bd1 1.39 1.40 Y
+C AMD bobcat 1.97 8.35 1.5/1.5 N
+C Intel P4 2.26 illop Y/N
+C Intel core2 0.52 0.68-0.80 opt/0.68 Y
+C Intel NHM 0.52 0.64 opt/opt Y
+C Intel SBR 0.51 0.54 opt/0.51 Y
+C Intel atom 1.16 1.66 opt/opt Y
+C VIA nano 1.09 1.07 opt/opt Y
+
+C We use only 16-byte operations, except for unaligned top-most and bottom-most
+C limbs. We use the SSSE3 palignr instruction when rp - up = 8 (mod 16).
+C
+C For operands of < COPYD_SSE_THRESHOLD limbs, we use a plain 64-bit loop,
+C taken from the x86_64 default code.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`n', `%rdx')
+
+C There are three instructions for loading an aligned 128-bit quantity. We use
+C movaps, since it has the shortest coding.
+define(`movdqa', ``movaps'')
+
+ifdef(`COPYD_SSE_THRESHOLD',`',`define(`COPYD_SSE_THRESHOLD', 7)')
+
+ASM_START()
+ TEXT
+ ALIGN(64)
+PROLOGUE(mpn_copyd)
+ FUNC_ENTRY(3)
+
+ lea -8(up,n,8), up
+ lea -8(rp,n,8), rp
+
+ cmp $COPYD_SSE_THRESHOLD, n
+ jbe L(bc)
+
+ bt $3, R32(rp) C is rp 16-byte aligned?
+ jc L(rp_aligned) C jump if rp aligned
+
+ mov (up), %rax C copy one limb
+ mov %rax, (rp)
+ lea -8(up), up
+ lea -8(rp), rp
+ dec n
+
+L(rp_aligned):
+ bt $3, R32(up)
+ jnc L(uent)
+
+ifelse(eval(COPYD_SSE_THRESHOLD >= 8),1,
+` sub $8, n',
+` jmp L(am)')
+
+ ALIGN(16)
+L(atop):movdqa -8(up), %xmm0
+ movdqa -24(up), %xmm1
+ movdqa -40(up), %xmm2
+ movdqa -56(up), %xmm3
+ lea -64(up), up
+ movdqa %xmm0, -8(rp)
+ movdqa %xmm1, -24(rp)
+ movdqa %xmm2, -40(rp)
+ movdqa %xmm3, -56(rp)
+ lea -64(rp), rp
+L(am): sub $8, n
+ jnc L(atop)
+
+ bt $2, R32(n)
+ jnc 1f
+ movdqa -8(up), %xmm0
+ movdqa -24(up), %xmm1
+ lea -32(up), up
+ movdqa %xmm0, -8(rp)
+ movdqa %xmm1, -24(rp)
+ lea -32(rp), rp
+
+1: bt $1, R32(n)
+ jnc 1f
+ movdqa -8(up), %xmm0
+ lea -16(up), up
+ movdqa %xmm0, -8(rp)
+ lea -16(rp), rp
+
+1: bt $0, n
+ jnc 1f
+ mov (up), %r8
+ mov %r8, (rp)
+
+1: FUNC_EXIT()
+ ret
+
+L(uent):sub $16, n
+ movdqa (up), %xmm0
+ jc L(uend)
+
+ ALIGN(16)
+L(utop):sub $16, n
+ movdqa -16(up), %xmm1
+ palignr($8, %xmm1, %xmm0)
+ movdqa %xmm0, -8(rp)
+ movdqa -32(up), %xmm2
+ palignr($8, %xmm2, %xmm1)
+ movdqa %xmm1, -24(rp)
+ movdqa -48(up), %xmm3
+ palignr($8, %xmm3, %xmm2)
+ movdqa %xmm2, -40(rp)
+ movdqa -64(up), %xmm0
+ palignr($8, %xmm0, %xmm3)
+ movdqa %xmm3, -56(rp)
+ movdqa -80(up), %xmm1
+ palignr($8, %xmm1, %xmm0)
+ movdqa %xmm0, -72(rp)
+ movdqa -96(up), %xmm2
+ palignr($8, %xmm2, %xmm1)
+ movdqa %xmm1, -88(rp)
+ movdqa -112(up), %xmm3
+ palignr($8, %xmm3, %xmm2)
+ movdqa %xmm2, -104(rp)
+ movdqa -128(up), %xmm0
+ palignr($8, %xmm0, %xmm3)
+ movdqa %xmm3, -120(rp)
+ lea -128(up), up
+ lea -128(rp), rp
+ jnc L(utop)
+
+L(uend):bt $3, R32(n)
+ jnc 1f
+ movdqa -16(up), %xmm1
+ palignr($8, %xmm1, %xmm0)
+ movdqa %xmm0, -8(rp)
+ movdqa -32(up), %xmm0
+ palignr($8, %xmm0, %xmm1)
+ movdqa %xmm1, -24(rp)
+ movdqa -48(up), %xmm1
+ palignr($8, %xmm1, %xmm0)
+ movdqa %xmm0, -40(rp)
+ movdqa -64(up), %xmm0
+ palignr($8, %xmm0, %xmm1)
+ movdqa %xmm1, -56(rp)
+ lea -64(up), up
+ lea -64(rp), rp
+
+1: bt $2, R32(n)
+ jnc 1f
+ movdqa -16(up), %xmm1
+ palignr($8, %xmm1, %xmm0)
+ movdqa %xmm0, -8(rp)
+ movdqa -32(up), %xmm0
+ palignr($8, %xmm0, %xmm1)
+ movdqa %xmm1, -24(rp)
+ lea -32(up), up
+ lea -32(rp), rp
+
+1: bt $1, R32(n)
+ jnc 1f
+ movdqa -16(up), %xmm1
+ palignr($8, %xmm1, %xmm0)
+ movdqa %xmm0, -8(rp)
+ lea -16(up), up
+ lea -16(rp), rp
+
+1: bt $0, n
+ jnc 1f
+ mov (up), %r8
+ mov %r8, (rp)
+
+1: FUNC_EXIT()
+ ret
+
+C Basecase code. Needed for good small operands speed, not for
+C correctness as the above code is currently written.
+
+L(bc): sub $4, R32(n)
+ jc L(end)
+
+ ALIGN(16)
+L(top): mov (up), %r8
+ mov -8(up), %r9
+ lea -32(rp), rp
+ mov -16(up), %r10
+ mov -24(up), %r11
+ lea -32(up), up
+ mov %r8, 32(rp)
+ mov %r9, 24(rp)
+ifelse(eval(COPYD_SSE_THRESHOLD >= 8),1,
+` sub $4, R32(n)')
+ mov %r10, 16(rp)
+ mov %r11, 8(rp)
+ifelse(eval(COPYD_SSE_THRESHOLD >= 8),1,
+` jnc L(top)')
+
+L(end): bt $0, R32(n)
+ jnc 1f
+ mov (up), %r8
+ mov %r8, (rp)
+ lea -8(rp), rp
+ lea -8(up), up
+1: bt $1, R32(n)
+ jnc 1f
+ mov (up), %r8
+ mov -8(up), %r9
+ mov %r8, (rp)
+ mov %r9, -8(rp)
+1: FUNC_EXIT()
+ ret
+EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_copyd optimised for CPUs with fast SSE.
+
+dnl Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb good for cpu?
+C AMD K8,K9
+C AMD K10 0.85 Y
+C AMD bd1 0.8 Y
+C AMD bobcat
+C Intel P4 2.28 Y
+C Intel core2 1
+C Intel NHM 0.5 Y
+C Intel SBR 0.5 Y
+C Intel atom
+C VIA nano 1.1 Y
+
+C We try to do as many 16-byte operations as possible. The top-most and
+C bottom-most writes might need 8-byte operations. We can always write using
+C aligned 16-byte operations, we read with both aligned and unaligned 16-byte
+C operations.
+
+C Instead of having separate loops for reading aligned and unaligned, we read
+C using MOVDQU. This seems to work great except for core2; there performance
+C doubles when reading using MOVDQA (for aligned source). It is unclear how to
+C best handle the unaligned case there.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`n', `%rdx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_copyd)
+ FUNC_ENTRY(3)
+
+ test n, n
+ jz L(don)
+
+ lea -16(rp,n,8), rp
+ lea -16(up,n,8), up
+
+ test $8, R8(rp) C is rp 16-byte aligned?
+ jz L(ali) C jump if rp aligned
+ mov 8(up), %rax
+ lea -8(up), up
+ mov %rax, 8(rp)
+ lea -8(rp), rp
+ dec n
+
+ sub $16, n
+ jc L(sma)
+
+ ALIGN(16)
+L(top): movdqu (up), %xmm0
+ movdqu -16(up), %xmm1
+ movdqu -32(up), %xmm2
+ movdqu -48(up), %xmm3
+ movdqu -64(up), %xmm4
+ movdqu -80(up), %xmm5
+ movdqu -96(up), %xmm6
+ movdqu -112(up), %xmm7
+ lea -128(up), up
+ movdqa %xmm0, (rp)
+ movdqa %xmm1, -16(rp)
+ movdqa %xmm2, -32(rp)
+ movdqa %xmm3, -48(rp)
+ movdqa %xmm4, -64(rp)
+ movdqa %xmm5, -80(rp)
+ movdqa %xmm6, -96(rp)
+ movdqa %xmm7, -112(rp)
+ lea -128(rp), rp
+L(ali): sub $16, n
+ jnc L(top)
+
+L(sma): test $8, R8(n)
+ jz 1f
+ movdqu (up), %xmm0
+ movdqu -16(up), %xmm1
+ movdqu -32(up), %xmm2
+ movdqu -48(up), %xmm3
+ lea -64(up), up
+ movdqa %xmm0, (rp)
+ movdqa %xmm1, -16(rp)
+ movdqa %xmm2, -32(rp)
+ movdqa %xmm3, -48(rp)
+ lea -64(rp), rp
+1:
+ test $4, R8(n)
+ jz 1f
+ movdqu (up), %xmm0
+ movdqu -16(up), %xmm1
+ lea -32(up), up
+ movdqa %xmm0, (rp)
+ movdqa %xmm1, -16(rp)
+ lea -32(rp), rp
+1:
+ test $2, R8(n)
+ jz 1f
+ movdqu (up), %xmm0
+ lea -16(up), up
+ movdqa %xmm0, (rp)
+ lea -16(rp), rp
+1:
+ test $1, R8(n)
+ jz 1f
+ mov 8(up), %r8
+ mov %r8, 8(rp)
+1:
+L(don): FUNC_EXIT()
+ ret
+EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_copyi optimised for CPUs with fast SSE copying and SSSE3.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb cycles/limb cycles/limb good
+C aligned unaligned best seen for cpu?
+C AMD K8,K9 2.0 illop 1.0/1.0 N
+C AMD K10 0.85 illop Y/N
+C AMD bd1 1.39 ? 1.45 Y/N
+C AMD bobcat 1.97 ? 8.17 1.5/1.5 N
+C Intel P4 2.26 illop Y/N
+C Intel core2 0.52 0.82 opt/0.74 Y
+C Intel NHM 0.52 0.65 opt/opt Y
+C Intel SBR 0.51 0.55 opt/0.51 Y
+C Intel atom 1.16 1.70 opt/opt Y
+C VIA nano 1.09 1.10 opt/opt Y
+
+C We use only 16-byte operations, except for unaligned top-most and bottom-most
+C limbs. We use the SSSE3 palignr instruction when rp - up = 8 (mod 16). That
+C instruction is better adapted to mpn_copyd's needs, we need to contort the
+C code to use it here.
+C
+C For operands of < COPYI_SSE_THRESHOLD limbs, we use a plain 64-bit loop,
+C taken from the x86_64 default code.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`n', `%rdx')
+
+C There are three instructions for loading an aligned 128-bit quantity. We use
+C movaps, since it has the shortest coding.
+define(`movdqa', ``movaps'')
+
+ifdef(`COPYI_SSE_THRESHOLD',`',`define(`COPYI_SSE_THRESHOLD', 7)')
+
+ASM_START()
+ TEXT
+ ALIGN(64)
+PROLOGUE(mpn_copyi)
+ FUNC_ENTRY(3)
+
+ cmp $COPYI_SSE_THRESHOLD, n
+ jbe L(bc)
+
+ bt $3, R32(rp) C is rp 16-byte aligned?
+ jnc L(rp_aligned) C jump if rp aligned
+
+ movsq C copy one limb
+ dec n
+
+L(rp_aligned):
+ bt $3, R32(up)
+ jc L(uent)
+
+ifelse(eval(COPYI_SSE_THRESHOLD >= 8),1,
+` sub $8, n',
+` jmp L(am)')
+
+ ALIGN(16)
+L(atop):movdqa 0(up), %xmm0
+ movdqa 16(up), %xmm1
+ movdqa 32(up), %xmm2
+ movdqa 48(up), %xmm3
+ lea 64(up), up
+ movdqa %xmm0, (rp)
+ movdqa %xmm1, 16(rp)
+ movdqa %xmm2, 32(rp)
+ movdqa %xmm3, 48(rp)
+ lea 64(rp), rp
+L(am): sub $8, n
+ jnc L(atop)
+
+ bt $2, R32(n)
+ jnc 1f
+ movdqa (up), %xmm0
+ movdqa 16(up), %xmm1
+ lea 32(up), up
+ movdqa %xmm0, (rp)
+ movdqa %xmm1, 16(rp)
+ lea 32(rp), rp
+
+1: bt $1, R32(n)
+ jnc 1f
+ movdqa (up), %xmm0
+ lea 16(up), up
+ movdqa %xmm0, (rp)
+ lea 16(rp), rp
+
+1: bt $0, n
+ jnc 1f
+ mov (up), %r8
+ mov %r8, (rp)
+
+1: FUNC_EXIT()
+ ret
+
+L(uent):
+C Code handling up - rp = 8 (mod 16)
+
+C FIXME: The code below only handles overlap if it is close to complete, or
+C quite separate: up-rp < 5 or up-up > 15 limbs
+ lea -40(up), %rax C 40 = 5 * GMP_LIMB_BYTES
+ sub rp, %rax
+ cmp $80, %rax C 80 = (15-5) * GMP_LIMB_BYTES
+ jbe L(bc) C deflect to plain loop
+
+ sub $16, n
+ jc L(uend)
+
+ movdqa 120(up), %xmm3
+
+ sub $16, n
+ jmp L(um)
+
+ ALIGN(16)
+L(utop):movdqa 120(up), %xmm3
+ movdqa %xmm0, -128(rp)
+ sub $16, n
+L(um): movdqa 104(up), %xmm2
+ palignr($8, %xmm2, %xmm3)
+ movdqa 88(up), %xmm1
+ movdqa %xmm3, 112(rp)
+ palignr($8, %xmm1, %xmm2)
+ movdqa 72(up), %xmm0
+ movdqa %xmm2, 96(rp)
+ palignr($8, %xmm0, %xmm1)
+ movdqa 56(up), %xmm3
+ movdqa %xmm1, 80(rp)
+ palignr($8, %xmm3, %xmm0)
+ movdqa 40(up), %xmm2
+ movdqa %xmm0, 64(rp)
+ palignr($8, %xmm2, %xmm3)
+ movdqa 24(up), %xmm1
+ movdqa %xmm3, 48(rp)
+ palignr($8, %xmm1, %xmm2)
+ movdqa 8(up), %xmm0
+ movdqa %xmm2, 32(rp)
+ palignr($8, %xmm0, %xmm1)
+ movdqa -8(up), %xmm3
+ movdqa %xmm1, 16(rp)
+ palignr($8, %xmm3, %xmm0)
+ lea 128(up), up
+ lea 128(rp), rp
+ jnc L(utop)
+
+ movdqa %xmm0, -128(rp)
+
+L(uend):bt $3, R32(n)
+ jnc 1f
+ movdqa 56(up), %xmm3
+ movdqa 40(up), %xmm2
+ palignr($8, %xmm2, %xmm3)
+ movdqa 24(up), %xmm1
+ movdqa %xmm3, 48(rp)
+ palignr($8, %xmm1, %xmm2)
+ movdqa 8(up), %xmm0
+ movdqa %xmm2, 32(rp)
+ palignr($8, %xmm0, %xmm1)
+ movdqa -8(up), %xmm3
+ movdqa %xmm1, 16(rp)
+ palignr($8, %xmm3, %xmm0)
+ lea 64(up), up
+ movdqa %xmm0, (rp)
+ lea 64(rp), rp
+
+1: bt $2, R32(n)
+ jnc 1f
+ movdqa 24(up), %xmm1
+ movdqa 8(up), %xmm0
+ palignr($8, %xmm0, %xmm1)
+ movdqa -8(up), %xmm3
+ movdqa %xmm1, 16(rp)
+ palignr($8, %xmm3, %xmm0)
+ lea 32(up), up
+ movdqa %xmm0, (rp)
+ lea 32(rp), rp
+
+1: bt $1, R32(n)
+ jnc 1f
+ movdqa 8(up), %xmm0
+ movdqa -8(up), %xmm3
+ palignr($8, %xmm3, %xmm0)
+ lea 16(up), up
+ movdqa %xmm0, (rp)
+ lea 16(rp), rp
+
+1: bt $0, n
+ jnc 1f
+ mov (up), %r8
+ mov %r8, (rp)
+
+1: FUNC_EXIT()
+ ret
+
+C Basecase code. Needed for good small operands speed, not for
+C correctness as the above code is currently written.
+
+L(bc): lea -8(rp), rp
+ sub $4, R32(n)
+ jc L(end)
+
+ ALIGN(16)
+L(top): mov (up), %r8
+ mov 8(up), %r9
+ lea 32(rp), rp
+ mov 16(up), %r10
+ mov 24(up), %r11
+ lea 32(up), up
+ mov %r8, -24(rp)
+ mov %r9, -16(rp)
+ifelse(eval(1 || COPYI_SSE_THRESHOLD >= 8),1,
+` sub $4, R32(n)')
+ mov %r10, -8(rp)
+ mov %r11, (rp)
+ifelse(eval(1 || COPYI_SSE_THRESHOLD >= 8),1,
+` jnc L(top)')
+
+L(end): bt $0, R32(n)
+ jnc 1f
+ mov (up), %r8
+ mov %r8, 8(rp)
+ lea 8(rp), rp
+ lea 8(up), up
+1: bt $1, R32(n)
+ jnc 1f
+ mov (up), %r8
+ mov 8(up), %r9
+ mov %r8, 8(rp)
+ mov %r9, 16(rp)
+1: FUNC_EXIT()
+ ret
+EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_copyi optimised for CPUs with fast SSE.
+
+dnl Copyright 2003, 2005, 2007, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb good for cpu?
+C AMD K8,K9
+C AMD K10 0.85 1.64 Y/N
+C AMD bd1 1.4 1.4 Y
+C AMD bobcat
+C Intel P4 2.3 2.3 Y
+C Intel core2 1.0 1.0
+C Intel NHM 0.5 0.67 Y
+C Intel SBR 0.5 0.75 Y
+C Intel atom
+C VIA nano 1.16 5.16 Y/N
+
+C We try to do as many 16-byte operations as possible. The top-most and
+C bottom-most writes might need 8-byte operations. We can always write using
+C aligned 16-byte operations, we read with both aligned and unaligned 16-byte
+C operations.
+
+C Instead of having separate loops for reading aligned and unaligned, we read
+C using MOVDQU. This seems to work great except for core2; there performance
+C doubles when reading using MOVDQA (for aligned source). It is unclear how to
+C best handle the unaligned case there.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`n', `%rdx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+dnl define(`movdqu', lddqu)
+
+ASM_START()
+ TEXT
+ ALIGN(64)
+PROLOGUE(mpn_copyi)
+ FUNC_ENTRY(3)
+
+ cmp $3, n
+ jc L(bc)
+
+ test $8, R8(rp) C is rp 16-byte aligned?
+ jz L(ali) C jump if rp aligned
+ movsq C copy single limb
+ dec n
+
+ sub $16, n
+ jc L(sma)
+
+ ALIGN(16)
+L(top): movdqu (up), %xmm0
+ movdqu 16(up), %xmm1
+ movdqu 32(up), %xmm2
+ movdqu 48(up), %xmm3
+ movdqu 64(up), %xmm4
+ movdqu 80(up), %xmm5
+ movdqu 96(up), %xmm6
+ movdqu 112(up), %xmm7
+ lea 128(up), up
+ movdqa %xmm0, (rp)
+ movdqa %xmm1, 16(rp)
+ movdqa %xmm2, 32(rp)
+ movdqa %xmm3, 48(rp)
+ movdqa %xmm4, 64(rp)
+ movdqa %xmm5, 80(rp)
+ movdqa %xmm6, 96(rp)
+ movdqa %xmm7, 112(rp)
+ lea 128(rp), rp
+L(ali): sub $16, n
+ jnc L(top)
+
+L(sma): test $8, R8(n)
+ jz 1f
+ movdqu (up), %xmm0
+ movdqu 16(up), %xmm1
+ movdqu 32(up), %xmm2
+ movdqu 48(up), %xmm3
+ lea 64(up), up
+ movdqa %xmm0, (rp)
+ movdqa %xmm1, 16(rp)
+ movdqa %xmm2, 32(rp)
+ movdqa %xmm3, 48(rp)
+ lea 64(rp), rp
+1:
+ test $4, R8(n)
+ jz 1f
+ movdqu (up), %xmm0
+ movdqu 16(up), %xmm1
+ lea 32(up), up
+ movdqa %xmm0, (rp)
+ movdqa %xmm1, 16(rp)
+ lea 32(rp), rp
+1:
+ test $2, R8(n)
+ jz 1f
+ movdqu (up), %xmm0
+ lea 16(up), up
+ movdqa %xmm0, (rp)
+ lea 16(rp), rp
+ ALIGN(16)
+1:
+L(end): bt $0, n
+ jnc 1f
+ mov (up), %r8
+ mov %r8, (rp)
+1:
+ FUNC_EXIT()
+ ret
+
+C Basecase code. Needed for good small operands speed, not for
+C correctness as the above code is currently written.
+
+L(bc): sub $2, n
+ jc L(end)
+ ALIGN(16)
+1: mov (up), %rax
+ mov 8(up), %rcx
+ lea 16(up), up
+ mov %rax, (rp)
+ mov %rcx, 8(rp)
+ lea 16(rp), rp
+ sub $2, n
+ jnc 1b
+
+ bt $0, n
+ jnc L(ret)
+ mov (up), %rax
+ mov %rax, (rp)
+L(ret): FUNC_EXIT()
+ ret
+EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_lshift optimised for CPUs with fast SSE including fast movdqu.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb cycles/limb cycles/limb good
+C aligned unaligned best seen for cpu?
+C AMD K8,K9 3 3 2.35 no, use shl/shr
+C AMD K10 1.5-1.8 1.5-1.8 1.33 yes
+C AMD bd1 1.7-1.9 1.7-1.9 1.33 yes
+C AMD bobcat 3.17 3.17 yes, bad for n < 20
+C Intel P4 4.67 4.67 2.7 no, slow movdqu
+C Intel core2 2.15 2.15 1.25 no, use shld/shrd
+C Intel NHM 1.66 1.66 1.25 no, use shld/shrd
+C Intel SBR 1.3 1.3 1.25 yes, bad for n = 4-6
+C Intel atom 11.7 11.7 4.5 no
+C VIA nano 5.7 5.95 2.0 no, slow movdqu
+
+C We try to do as many aligned 16-byte operations as possible. The top-most
+C and bottom-most writes might need 8-byte operations.
+C
+C This variant rely on fast load movdqu, and uses it even for aligned operands,
+C in order to avoid the need for two separate loops.
+C
+C TODO
+C * Could 2-limb wind-down code be simplified?
+C * Improve basecase code, using shld/shrd for SBR, discrete integer shifts
+C for other affected CPUs.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`ap', `%rsi')
+define(`n', `%rdx')
+define(`cnt', `%rcx')
+
+ASM_START()
+ TEXT
+ ALIGN(64)
+PROLOGUE(mpn_lshift)
+ FUNC_ENTRY(4)
+ movd R32(%rcx), %xmm4
+ mov $64, R32(%rax)
+ sub R32(%rcx), R32(%rax)
+ movd R32(%rax), %xmm5
+
+ neg R32(%rcx)
+ mov -8(ap,n,8), %rax
+ shr R8(%rcx), %rax
+
+ cmp $3, n
+ jle L(bc)
+
+ lea (rp,n,8), R32(%rcx)
+ bt $3, R32(%rcx)
+ jnc L(rp_aligned)
+
+C Do one initial limb in order to make rp aligned
+ movq -8(ap,n,8), %xmm0
+ movq -16(ap,n,8), %xmm1
+ psllq %xmm4, %xmm0
+ psrlq %xmm5, %xmm1
+ por %xmm1, %xmm0
+ movq %xmm0, -8(rp,n,8)
+ dec n
+
+L(rp_aligned):
+ lea 1(n), %r8d
+
+ and $6, R32(%r8)
+ jz L(ba0)
+ cmp $4, R32(%r8)
+ jz L(ba4)
+ jc L(ba2)
+L(ba6): add $-4, n
+ jmp L(i56)
+L(ba0): add $-6, n
+ jmp L(i70)
+L(ba4): add $-2, n
+ jmp L(i34)
+L(ba2): add $-8, n
+ jle L(end)
+
+ ALIGN(16)
+L(top): movdqu 40(ap,n,8), %xmm1
+ movdqu 48(ap,n,8), %xmm0
+ psllq %xmm4, %xmm0
+ psrlq %xmm5, %xmm1
+ por %xmm1, %xmm0
+ movdqa %xmm0, 48(rp,n,8)
+L(i70):
+ movdqu 24(ap,n,8), %xmm1
+ movdqu 32(ap,n,8), %xmm0
+ psllq %xmm4, %xmm0
+ psrlq %xmm5, %xmm1
+ por %xmm1, %xmm0
+ movdqa %xmm0, 32(rp,n,8)
+L(i56):
+ movdqu 8(ap,n,8), %xmm1
+ movdqu 16(ap,n,8), %xmm0
+ psllq %xmm4, %xmm0
+ psrlq %xmm5, %xmm1
+ por %xmm1, %xmm0
+ movdqa %xmm0, 16(rp,n,8)
+L(i34):
+ movdqu -8(ap,n,8), %xmm1
+ movdqu (ap,n,8), %xmm0
+ psllq %xmm4, %xmm0
+ psrlq %xmm5, %xmm1
+ por %xmm1, %xmm0
+ movdqa %xmm0, (rp,n,8)
+ sub $8, n
+ jg L(top)
+
+L(end): bt $0, R32(n)
+ jc L(end8)
+
+ movdqu (ap), %xmm1
+ pxor %xmm0, %xmm0
+ punpcklqdq %xmm1, %xmm0
+ psllq %xmm4, %xmm1
+ psrlq %xmm5, %xmm0
+ por %xmm1, %xmm0
+ movdqa %xmm0, (rp)
+ FUNC_EXIT()
+ ret
+
+C Basecase
+ ALIGN(16)
+L(bc): dec R32(n)
+ jz L(end8)
+
+ movq (ap,n,8), %xmm1
+ movq -8(ap,n,8), %xmm0
+ psllq %xmm4, %xmm1
+ psrlq %xmm5, %xmm0
+ por %xmm1, %xmm0
+ movq %xmm0, (rp,n,8)
+ sub $2, R32(n)
+ jl L(end8)
+ movq 8(ap), %xmm1
+ movq (ap), %xmm0
+ psllq %xmm4, %xmm1
+ psrlq %xmm5, %xmm0
+ por %xmm1, %xmm0
+ movq %xmm0, 8(rp)
+
+L(end8):movq (ap), %xmm0
+ psllq %xmm4, %xmm0
+ movq %xmm0, (rp)
+ FUNC_EXIT()
+ ret
+EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_lshift optimised for CPUs with fast SSE.
+
+dnl Contributed to the GNU project by David Harvey and Torbjorn Granlund.
+
+dnl Copyright 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb cycles/limb good
+C 16-byte aligned 16-byte unaligned for cpu?
+C AMD K8,K9 ? ?
+C AMD K10 1.68 (1.45) 1.75 (1.49) Y
+C AMD bd1 1.82 (1.75) 1.82 (1.75) Y
+C AMD bobcat 4 4
+C Intel P4 3 (2.7) 3 (2.7) Y
+C Intel core2 2.05 (1.67) 2.55 (1.75)
+C Intel NHM 2.05 (1.75) 2.09 (2)
+C Intel SBR 1.5 (1.3125) 1.5 (1.4375) Y
+C Intel atom ? ?
+C VIA nano 2.25 (2) 2.5 (2) Y
+
+C We try to do as many 16-byte operations as possible. The top-most and
+C bottom-most writes might need 8-byte operations.
+
+C There are two inner-loops, one for when rp = ap (mod 16) and one when this is
+C not true. The aligned case reads 16+8 bytes, the unaligned case reads
+C 16+8+X bytes, where X is 8 or 16 depending on how punpcklqdq is implemented.
+
+C This is not yet great code:
+C (1) The unaligned case makes many reads.
+C (2) We should do some unrolling, at least 2-way.
+C With 2-way unrolling but no scheduling we reach 1.5 c/l on K10 and 2 c/l on
+C Nano.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`ap', `%rsi')
+define(`n', `%rdx')
+define(`cnt', `%rcx')
+
+ASM_START()
+ TEXT
+ ALIGN(64)
+PROLOGUE(mpn_lshift)
+ movd R32(%rcx), %xmm4
+ mov $64, R32(%rax)
+ sub R32(%rcx), R32(%rax)
+ movd R32(%rax), %xmm5
+
+ neg R32(%rcx)
+ mov -8(ap,n,8), %rax
+ shr R8(%rcx), %rax
+
+ cmp $2, n
+ jle L(le2)
+
+ lea (rp,n,8), R32(%rcx)
+ test $8, R8(%rcx)
+ je L(rp_aligned)
+
+C Do one initial limb in order to make rp aligned
+ movq -8(ap,n,8), %xmm0
+ movq -16(ap,n,8), %xmm1
+ psllq %xmm4, %xmm0
+ psrlq %xmm5, %xmm1
+ por %xmm1, %xmm0
+ movq %xmm0, -8(rp,n,8)
+ dec n
+
+L(rp_aligned):
+ lea (ap,n,8), R32(%rcx)
+ test $8, R8(%rcx)
+ je L(aent)
+ jmp L(uent)
+C *****************************************************************************
+
+C Handle the case when ap != rp (mod 16).
+
+ ALIGN(16)
+L(utop):movdqa -8(ap,n,8), %xmm0
+ movq (ap,n,8), %xmm1
+ punpcklqdq 8(ap,n,8), %xmm1
+ psllq %xmm4, %xmm1
+ psrlq %xmm5, %xmm0
+ por %xmm1, %xmm0
+ movdqa %xmm0, (rp,n,8)
+L(uent):sub $2, n
+ ja L(utop)
+
+ jne L(end8)
+
+ movq (ap), %xmm1
+ pxor %xmm0, %xmm0
+ punpcklqdq %xmm1, %xmm0
+ punpcklqdq 8(ap), %xmm1
+ psllq %xmm4, %xmm1
+ psrlq %xmm5, %xmm0
+ por %xmm1, %xmm0
+ movdqa %xmm0, (rp)
+ ret
+C *****************************************************************************
+
+C Handle the case when ap = rp (mod 16).
+
+ ALIGN(16)
+L(atop):movdqa (ap,n,8), %xmm0 C xmm0 = B*ap[n-1] + ap[n-2]
+ movq -8(ap,n,8), %xmm1 C xmm1 = ap[n-3]
+ punpcklqdq %xmm0, %xmm1 C xmm1 = B*ap[n-2] + ap[n-3]
+ psllq %xmm4, %xmm0
+ psrlq %xmm5, %xmm1
+ por %xmm1, %xmm0
+ movdqa %xmm0, (rp,n,8)
+L(aent):
+ sub $2, n
+ ja L(atop)
+ jne L(end8)
+
+ movdqa (ap), %xmm1
+ pxor %xmm0, %xmm0
+ punpcklqdq %xmm1, %xmm0
+ psllq %xmm4, %xmm1
+ psrlq %xmm5, %xmm0
+ por %xmm1, %xmm0
+ movdqa %xmm0, (rp)
+ ret
+C *****************************************************************************
+
+ ALIGN(16)
+L(le2): jne L(end8)
+
+ movq 8(ap), %xmm0
+ movq (ap), %xmm1
+ psllq %xmm4, %xmm0
+ psrlq %xmm5, %xmm1
+ por %xmm1, %xmm0
+ movq %xmm0, 8(rp)
+
+L(end8):movq (ap), %xmm0
+ psllq %xmm4, %xmm0
+ movq %xmm0, (rp)
+ ret
+EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_lshiftc optimised for CPUs with fast SSE including fast movdqu.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb cycles/limb cycles/limb good
+C aligned unaligned best seen for cpu?
+C AMD K8,K9 3 3 ? no, use shl/shr
+C AMD K10 1.8-2.0 1.8-2.0 ? yes
+C AMD bd1 1.9 1.9 ? yes
+C AMD bobcat 3.67 3.67 yes, bad for n < 20
+C Intel P4 4.75 4.75 ? no, slow movdqu
+C Intel core2 2.27 2.27 ? no, use shld/shrd
+C Intel NHM 2.15 2.15 ? no, use shld/shrd
+C Intel SBR 1.45 1.45 ? yes, bad for n = 4-6
+C Intel atom 12.9 12.9 ? no
+C VIA nano 6.18 6.44 ? no, slow movdqu
+
+C We try to do as many aligned 16-byte operations as possible. The top-most
+C and bottom-most writes might need 8-byte operations.
+C
+C This variant rely on fast load movdqu, and uses it even for aligned operands,
+C in order to avoid the need for two separate loops.
+C
+C TODO
+C * Could 2-limb wind-down code be simplified?
+C * Improve basecase code, using shld/shrd for SBR, discrete integer shifts
+C for other affected CPUs.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`ap', `%rsi')
+define(`n', `%rdx')
+define(`cnt', `%rcx')
+
+ASM_START()
+ TEXT
+ ALIGN(64)
+PROLOGUE(mpn_lshiftc)
+ FUNC_ENTRY(4)
+ movd R32(%rcx), %xmm4
+ mov $64, R32(%rax)
+ sub R32(%rcx), R32(%rax)
+ movd R32(%rax), %xmm5
+
+ neg R32(%rcx)
+ mov -8(ap,n,8), %rax
+ shr R8(%rcx), %rax
+
+ pcmpeqb %xmm3, %xmm3 C set to 111...111
+
+ cmp $3, n
+ jle L(bc)
+
+ lea (rp,n,8), R32(%rcx)
+ bt $3, R32(%rcx)
+ jnc L(rp_aligned)
+
+C Do one initial limb in order to make rp aligned
+ movq -8(ap,n,8), %xmm0
+ movq -16(ap,n,8), %xmm1
+ psllq %xmm4, %xmm0
+ psrlq %xmm5, %xmm1
+ por %xmm1, %xmm0
+ pxor %xmm3, %xmm0
+ movq %xmm0, -8(rp,n,8)
+ dec n
+
+L(rp_aligned):
+ lea 1(n), %r8d
+
+ and $6, R32(%r8)
+ jz L(ba0)
+ cmp $4, R32(%r8)
+ jz L(ba4)
+ jc L(ba2)
+L(ba6): add $-4, n
+ jmp L(i56)
+L(ba0): add $-6, n
+ jmp L(i70)
+L(ba4): add $-2, n
+ jmp L(i34)
+L(ba2): add $-8, n
+ jle L(end)
+
+ ALIGN(16)
+L(top): movdqu 40(ap,n,8), %xmm1
+ movdqu 48(ap,n,8), %xmm0
+ psllq %xmm4, %xmm0
+ psrlq %xmm5, %xmm1
+ por %xmm1, %xmm0
+ pxor %xmm3, %xmm0
+ movdqa %xmm0, 48(rp,n,8)
+L(i70):
+ movdqu 24(ap,n,8), %xmm1
+ movdqu 32(ap,n,8), %xmm0
+ psllq %xmm4, %xmm0
+ psrlq %xmm5, %xmm1
+ por %xmm1, %xmm0
+ pxor %xmm3, %xmm0
+ movdqa %xmm0, 32(rp,n,8)
+L(i56):
+ movdqu 8(ap,n,8), %xmm1
+ movdqu 16(ap,n,8), %xmm0
+ psllq %xmm4, %xmm0
+ psrlq %xmm5, %xmm1
+ por %xmm1, %xmm0
+ pxor %xmm3, %xmm0
+ movdqa %xmm0, 16(rp,n,8)
+L(i34):
+ movdqu -8(ap,n,8), %xmm1
+ movdqu (ap,n,8), %xmm0
+ psllq %xmm4, %xmm0
+ psrlq %xmm5, %xmm1
+ por %xmm1, %xmm0
+ pxor %xmm3, %xmm0
+ movdqa %xmm0, (rp,n,8)
+ sub $8, n
+ jg L(top)
+
+L(end): bt $0, R32(n)
+ jc L(end8)
+
+ movdqu (ap), %xmm1
+ pxor %xmm0, %xmm0
+ punpcklqdq %xmm1, %xmm0
+ psllq %xmm4, %xmm1
+ psrlq %xmm5, %xmm0
+ por %xmm1, %xmm0
+ pxor %xmm3, %xmm0
+ movdqa %xmm0, (rp)
+ FUNC_EXIT()
+ ret
+
+C Basecase
+ ALIGN(16)
+L(bc): dec R32(n)
+ jz L(end8)
+
+ movq (ap,n,8), %xmm1
+ movq -8(ap,n,8), %xmm0
+ psllq %xmm4, %xmm1
+ psrlq %xmm5, %xmm0
+ por %xmm1, %xmm0
+ pxor %xmm3, %xmm0
+ movq %xmm0, (rp,n,8)
+ sub $2, R32(n)
+ jl L(end8)
+ movq 8(ap), %xmm1
+ movq (ap), %xmm0
+ psllq %xmm4, %xmm1
+ psrlq %xmm5, %xmm0
+ por %xmm1, %xmm0
+ pxor %xmm3, %xmm0
+ movq %xmm0, 8(rp)
+
+L(end8):movq (ap), %xmm0
+ psllq %xmm4, %xmm0
+ pxor %xmm3, %xmm0
+ movq %xmm0, (rp)
+ FUNC_EXIT()
+ ret
+EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_lshiftc optimised for CPUs with fast SSE.
+
+dnl Contributed to the GNU project by David Harvey and Torbjorn Granlund.
+
+dnl Copyright 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb cycles/limb good
+C 16-byte aligned 16-byte unaligned for cpu?
+C AMD K8,K9 ? ?
+C AMD K10 1.85 (1.635) 1.9 (1.67) Y
+C AMD bd1 1.82 (1.75) 1.82 (1.75) Y
+C AMD bobcat 4.5 4.5
+C Intel P4 3.6 (3.125) 3.6 (3.125) Y
+C Intel core2 2.05 (1.67) 2.55 (1.75)
+C Intel NHM 2.05 (1.875) 2.6 (2.25)
+C Intel SBR 1.55 (1.44) 2 (1.57) Y
+C Intel atom ? ?
+C VIA nano 2.5 (2.5) 2.5 (2.5) Y
+
+C We try to do as many 16-byte operations as possible. The top-most and
+C bottom-most writes might need 8-byte operations. We always write using
+C 16-byte operations, we read with both 8-byte and 16-byte operations.
+
+C There are two inner-loops, one for when rp = ap (mod 16) and one when this is
+C not true. The aligned case reads 16+8 bytes, the unaligned case reads
+C 16+8+X bytes, where X is 8 or 16 depending on how punpcklqdq is implemented.
+
+C This is not yet great code:
+C (1) The unaligned case makes too many reads.
+C (2) We should do some unrolling, at least 2-way.
+C With 2-way unrolling but no scheduling we reach 1.5 c/l on K10 and 2 c/l on
+C Nano.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`ap', `%rsi')
+define(`n', `%rdx')
+define(`cnt', `%rcx')
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_lshiftc)
+ movd R32(%rcx), %xmm4
+ mov $64, R32(%rax)
+ sub R32(%rcx), R32(%rax)
+ movd R32(%rax), %xmm5
+
+ neg R32(%rcx)
+ mov -8(ap,n,8), %rax
+ shr R8(%rcx), %rax
+
+ pcmpeqb %xmm7, %xmm7 C set to 111...111
+
+ cmp $2, n
+ jle L(le2)
+
+ lea (rp,n,8), R32(%rcx)
+ test $8, R8(%rcx)
+ je L(rp_aligned)
+
+C Do one initial limb in order to make rp aligned
+ movq -8(ap,n,8), %xmm0
+ movq -16(ap,n,8), %xmm1
+ psllq %xmm4, %xmm0
+ psrlq %xmm5, %xmm1
+ por %xmm1, %xmm0
+ pxor %xmm7, %xmm0
+ movq %xmm0, -8(rp,n,8)
+ dec n
+
+L(rp_aligned):
+ lea (ap,n,8), R32(%rcx)
+ test $8, R8(%rcx)
+ je L(aent)
+ jmp L(uent)
+C *****************************************************************************
+
+C Handle the case when ap != rp (mod 16).
+
+ ALIGN(16)
+L(utop):movq (ap,n,8), %xmm1
+ punpcklqdq 8(ap,n,8), %xmm1
+ movdqa -8(ap,n,8), %xmm0
+ psllq %xmm4, %xmm1
+ psrlq %xmm5, %xmm0
+ por %xmm1, %xmm0
+ pxor %xmm7, %xmm0
+ movdqa %xmm0, (rp,n,8)
+L(uent):sub $2, n
+ ja L(utop)
+
+ jne L(end8)
+
+ movq (ap), %xmm1
+ pxor %xmm0, %xmm0
+ punpcklqdq %xmm1, %xmm0
+ punpcklqdq 8(ap), %xmm1
+ psllq %xmm4, %xmm1
+ psrlq %xmm5, %xmm0
+ por %xmm1, %xmm0
+ pxor %xmm7, %xmm0
+ movdqa %xmm0, (rp)
+ ret
+C *****************************************************************************
+
+C Handle the case when ap = rp (mod 16).
+
+ ALIGN(16)
+L(atop):movdqa (ap,n,8), %xmm0 C xmm0 = B*ap[n-1] + ap[n-2]
+ movq -8(ap,n,8), %xmm1 C xmm1 = ap[n-3]
+ punpcklqdq %xmm0, %xmm1 C xmm1 = B*ap[n-2] + ap[n-3]
+ psllq %xmm4, %xmm0
+ psrlq %xmm5, %xmm1
+ por %xmm1, %xmm0
+ pxor %xmm7, %xmm0
+ movdqa %xmm0, (rp,n,8)
+L(aent):sub $2, n
+ ja L(atop)
+
+ jne L(end8)
+
+ movdqa (ap), %xmm0
+ pxor %xmm1, %xmm1
+ punpcklqdq %xmm0, %xmm1
+ psllq %xmm4, %xmm0
+ psrlq %xmm5, %xmm1
+ por %xmm1, %xmm0
+ pxor %xmm7, %xmm0
+ movdqa %xmm0, (rp)
+ ret
+C *****************************************************************************
+
+ ALIGN(16)
+L(le2): jne L(end8)
+
+ movq 8(ap), %xmm0
+ movq (ap), %xmm1
+ psllq %xmm4, %xmm0
+ psrlq %xmm5, %xmm1
+ por %xmm1, %xmm0
+ pxor %xmm7, %xmm0
+ movq %xmm0, 8(rp)
+
+L(end8):movq (ap), %xmm0
+ psllq %xmm4, %xmm0
+ pxor %xmm7, %xmm0
+ movq %xmm0, (rp)
+ ret
+EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_rshift optimised for CPUs with fast SSE including fast movdqu.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb cycles/limb cycles/limb good
+C aligned unaligned best seen for cpu?
+C AMD K8,K9 3 3 2.35 no, use shl/shr
+C AMD K10 1.5-1.8 1.5-1.8 1.33 yes
+C AMD bd1 1.7-1.9 1.7-1.9 1.33 yes
+C AMD bobcat 3.17 3.17 yes, bad for n < 20
+C Intel P4 4.67 4.67 2.7 no, slow movdqu
+C Intel core2 2.15 2.15 1.25 no, use shld/shrd
+C Intel NHM 1.66 1.66 1.25 no, use shld/shrd
+C Intel SBR 1.3 1.3 1.25 yes, bad for n = 4-6
+C Intel atom 11.7 11.7 4.5 no
+C VIA nano 5.7 5.95 2.0 no, slow movdqu
+
+C We try to do as many aligned 16-byte operations as possible. The top-most
+C and bottom-most writes might need 8-byte operations.
+C
+C This variant rely on fast load movdqu, and uses it even for aligned operands,
+C in order to avoid the need for two separate loops.
+C
+C TODO
+C * Could 2-limb wind-down code be simplified?
+C * Improve basecase code, using shld/shrd for SBR, discrete integer shifts
+C for other affected CPUs.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`ap', `%rsi')
+define(`n', `%rdx')
+define(`cnt', `%rcx')
+
+ASM_START()
+ TEXT
+ ALIGN(64)
+PROLOGUE(mpn_rshift)
+ FUNC_ENTRY(4)
+ movd R32(%rcx), %xmm4
+ mov $64, R32(%rax)
+ sub R32(%rcx), R32(%rax)
+ movd R32(%rax), %xmm5
+
+ neg R32(%rcx)
+ mov (ap), %rax
+ shl R8(%rcx), %rax
+
+ cmp $3, n
+ jle L(bc)
+
+ bt $3, R32(rp)
+ jnc L(rp_aligned)
+
+C Do one initial limb in order to make rp aligned
+ movq (ap), %xmm0
+ movq 8(ap), %xmm1
+ psrlq %xmm4, %xmm0
+ psllq %xmm5, %xmm1
+ por %xmm1, %xmm0
+ movq %xmm0, (rp)
+ lea 8(ap), ap
+ lea 8(rp), rp
+ dec n
+
+L(rp_aligned):
+ lea 1(n), %r8d
+ lea (ap,n,8), ap
+ lea (rp,n,8), rp
+ neg n
+
+ and $6, R32(%r8)
+ jz L(bu0)
+ cmp $4, R32(%r8)
+ jz L(bu4)
+ jc L(bu2)
+L(bu6): add $4, n
+ jmp L(i56)
+L(bu0): add $6, n
+ jmp L(i70)
+L(bu4): add $2, n
+ jmp L(i34)
+L(bu2): add $8, n
+ jge L(end)
+
+ ALIGN(16)
+L(top): movdqu -64(ap,n,8), %xmm1
+ movdqu -56(ap,n,8), %xmm0
+ psllq %xmm5, %xmm0
+ psrlq %xmm4, %xmm1
+ por %xmm1, %xmm0
+ movdqa %xmm0, -64(rp,n,8)
+L(i70):
+ movdqu -48(ap,n,8), %xmm1
+ movdqu -40(ap,n,8), %xmm0
+ psllq %xmm5, %xmm0
+ psrlq %xmm4, %xmm1
+ por %xmm1, %xmm0
+ movdqa %xmm0, -48(rp,n,8)
+L(i56):
+ movdqu -32(ap,n,8), %xmm1
+ movdqu -24(ap,n,8), %xmm0
+ psllq %xmm5, %xmm0
+ psrlq %xmm4, %xmm1
+ por %xmm1, %xmm0
+ movdqa %xmm0, -32(rp,n,8)
+L(i34):
+ movdqu -16(ap,n,8), %xmm1
+ movdqu -8(ap,n,8), %xmm0
+ psllq %xmm5, %xmm0
+ psrlq %xmm4, %xmm1
+ por %xmm1, %xmm0
+ movdqa %xmm0, -16(rp,n,8)
+ add $8, n
+ jl L(top)
+
+L(end): bt $0, R32(n)
+ jc L(e1)
+
+ movdqu -16(ap), %xmm1
+ movq -8(ap), %xmm0
+ psrlq %xmm4, %xmm1
+ psllq %xmm5, %xmm0
+ por %xmm1, %xmm0
+ movdqa %xmm0, -16(rp)
+ FUNC_EXIT()
+ ret
+
+L(e1): movq -8(ap), %xmm0
+ psrlq %xmm4, %xmm0
+ movq %xmm0, -8(rp)
+ FUNC_EXIT()
+ ret
+
+C Basecase
+ ALIGN(16)
+L(bc): dec R32(n)
+ jnz 1f
+ movq (ap), %xmm0
+ psrlq %xmm4, %xmm0
+ movq %xmm0, (rp)
+ FUNC_EXIT()
+ ret
+
+1: movq (ap), %xmm1
+ movq 8(ap), %xmm0
+ psrlq %xmm4, %xmm1
+ psllq %xmm5, %xmm0
+ por %xmm1, %xmm0
+ movq %xmm0, (rp)
+ dec R32(n)
+ jnz 1f
+ movq 8(ap), %xmm0
+ psrlq %xmm4, %xmm0
+ movq %xmm0, 8(rp)
+ FUNC_EXIT()
+ ret
+
+1: movq 8(ap), %xmm1
+ movq 16(ap), %xmm0
+ psrlq %xmm4, %xmm1
+ psllq %xmm5, %xmm0
+ por %xmm1, %xmm0
+ movq %xmm0, 8(rp)
+ movq 16(ap), %xmm0
+ psrlq %xmm4, %xmm0
+ movq %xmm0, 16(rp)
+ FUNC_EXIT()
+ ret
+EPILOGUE()
+++ /dev/null
-/* Fat binary fallback mpn_divexact_by3c.
-
-Copyright 2003, 2009 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-
-#include "mpn/generic/diveby3.c"
-/* x86 fat binary initializers.
+/* x86_64 fat binary initializers.
Contributed to the GNU project by Kevin Ryde (original x86_32 code) and
Torbjorn Granlund (port to x86_64)
THEY'RE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
COMPLETELY IN FUTURE GNU MP RELEASES.
-Copyright 2003, 2004, 2009, 2011 Free Software Foundation, Inc.
+Copyright 2003, 2004, 2009, 2011, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
/* fat_entry.asm */
-long __gmpn_cpuid __GMP_PROTO ((char dst[12], int id));
+long __gmpn_cpuid (char [12], int);
#if WANT_FAKE_CPUID
{ "k8", "AuthenticAMD", MAKE_FMS (15, 0) },
{ "k10", "AuthenticAMD", MAKE_FMS (16, 0) },
{ "bobcat", "AuthenticAMD", MAKE_FMS (20, 1) },
+ { "bulldozer", "AuthenticAMD", MAKE_FMS (21, 1) },
{ "nano", "CentaurHauls", MAKE_FMS (6, 15) },
};
struct cpuvec_t __gmpn_cpuvec = {
__MPN(add_n_init),
+ __MPN(addlsh1_n_init),
+ __MPN(addlsh2_n_init),
__MPN(addmul_1_init),
+ __MPN(addmul_2_init),
+ __MPN(bdiv_dbm1c_init),
+ __MPN(com_init),
__MPN(copyd_init),
__MPN(copyi_init),
__MPN(divexact_1_init),
- __MPN(divexact_by3c_init),
__MPN(divrem_1_init),
__MPN(gcd_1_init),
__MPN(lshift_init),
+ __MPN(lshiftc_init),
__MPN(mod_1_init),
+ __MPN(mod_1_1p_init),
+ __MPN(mod_1_1p_cps_init),
+ __MPN(mod_1s_2p_init),
+ __MPN(mod_1s_2p_cps_init),
+ __MPN(mod_1s_4p_init),
+ __MPN(mod_1s_4p_cps_init),
__MPN(mod_34lsub1_init),
__MPN(modexact_1c_odd_init),
__MPN(mul_1_init),
__MPN(mul_basecase_init),
+ __MPN(mullo_basecase_init),
__MPN(preinv_divrem_1_init),
__MPN(preinv_mod_1_init),
+ __MPN(redc_1_init),
+ __MPN(redc_2_init),
__MPN(rshift_init),
__MPN(sqr_basecase_init),
__MPN(sub_n_init),
+ __MPN(sublsh1_n_init),
__MPN(submul_1_init),
0
};
+int __gmpn_cpuvec_initialized = 0;
/* The following setups start with generic x86, then overwrite with
specifics for a chip, and higher versions of that chip.
family = ((fms >> 8) & 0xf) + ((fms >> 20) & 0xff);
model = ((fms >> 4) & 0xf) + ((fms >> 12) & 0xf0);
+ /* Check extended feature flags */
+ __gmpn_cpuid (dummy_string, 0x80000001);
+ if ((dummy_string[4 + 29 / 8] & (1 << (29 % 8))) == 0)
+ abort (); /* longmode-capable-bit turned off! */
+
/*********************************************************/
/*** WARNING: keep this list in sync with config.guess ***/
/*********************************************************/
{
switch (family)
{
- case 4:
- case 5:
- abort (); /* 32-bit processors */
-
case 6:
switch (model)
{
- case 0x00:
- case 0x01:
- case 0x02:
- case 0x03:
- case 0x04:
- case 0x05:
- case 0x06:
- case 0x07:
- case 0x08:
- case 0x09: /* Banias */
- case 0x0a:
- case 0x0b:
- case 0x0c:
- case 0x0d: /* Dothan */
- case 0x0e: /* Yonah */
- abort (); /* 32-bit processors */
-
case 0x0f: /* Conroe Merom Kentsfield Allendale */
case 0x10:
case 0x11:
CPUVEC_SETUP_core2;
break;
- case 0x1c: /* Silverthorne */
- case 0x26: /* Lincroft */
- case 0x27: /* Saltwell */
+ case 0x1c: /* Atom Silverthorne */
+ case 0x26: /* Atom Lincroft */
+ case 0x27: /* Atom Saltwell? */
+ case 0x36: /* Atom Cedarview/Saltwell */
CPUVEC_SETUP_atom;
break;
case 0x2a: /* SB */
case 0x2d: /* SBC-EP */
+ case 0x3a: /* IBR */
+ case 0x3c: /* Haswell */
CPUVEC_SETUP_core2;
+ CPUVEC_SETUP_coreinhm;
CPUVEC_SETUP_coreisbr;
break;
}
{
switch (family)
{
- case 5:
- case 6:
- abort ();
+ case 0x0f: /* k8 */
+ case 0x11: /* "fam 11h", mix of k8 and k10 */
+ case 0x13:
+ case 0x16:
+ case 0x17:
+ CPUVEC_SETUP_k8;
+ break;
- case 15: /* k8 */
- case 16: /* k10 */
- /* CPUVEC_SETUP_athlon */
+ case 0x10: /* k10 */
+ case 0x12: /* k10 (llano) */
+ CPUVEC_SETUP_k8;
+ CPUVEC_SETUP_k10;
break;
+
+ case 0x14: /* bobcat */
+ CPUVEC_SETUP_k8;
+ CPUVEC_SETUP_k10;
+ CPUVEC_SETUP_bobcat;
+ break;
+
+ case 0x15: /* bulldozer */
+ CPUVEC_SETUP_k8;
+ CPUVEC_SETUP_k10;
+ CPUVEC_SETUP_bd1;
}
}
else if (strcmp (vendor_string, "CentaurHauls") == 0)
{
switch (family)
{
- case 5:
- abort (); /* 32-bit processors */
-
case 6:
- if (model < 15)
- abort (); /* 32-bit processors */
-
- CPUVEC_SETUP_nano;
+ if (model >= 15)
+ CPUVEC_SETUP_nano;
break;
}
}
/* Set this once the threshold fields are ready.
Use volatile to prevent it getting moved. */
- ((volatile struct cpuvec_t *) &__gmpn_cpuvec)->initialized = 1;
+ *((volatile int *) &__gmpn_cpuvec_initialized) = 1;
}
dnl Contributed to the GNU project by Kevin Ryde (original x86_32 code) and
dnl Torbjorn Granlund (port to x86_64)
-dnl Copyright 2003, 2009, 2011 Free Software Foundation, Inc.
-dnl
+dnl Copyright 2003, 2009, 2011, 2012 Free Software Foundation, Inc.
+
dnl This file is part of the GNU MP Library.
-dnl
+
dnl The GNU MP Library is free software; you can redistribute it and/or
dnl modify it under the terms of the GNU Lesser General Public License as
dnl published by the Free Software Foundation; either version 3 of the
dnl License, or (at your option) any later version.
-dnl
+
dnl The GNU MP Library is distributed in the hope that it will be useful,
dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
dnl Lesser General Public License for more details.
-dnl
+
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
ifdef(`PIC',
`define(`PIC_OR_DARWIN')')
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
TEXT
-
dnl Usage: FAT_ENTRY(name, offset)
dnl
dnl Emit a fat binary entrypoint function of the given name. This is the
dnl For non-PIC, the jumps are 5 bytes each, aligning them to 8 should be
dnl fine for all x86s.
dnl
-dnl For PIC, the jumps are 20 bytes each, and are best aligned to 16 to
-dnl ensure at least the first two instructions don't cross a cache line
+dnl For ELF/DARWIN PIC, the jumps are 20 bytes each, and are best aligned to
+dnl 16 to ensure at least the first two instructions don't cross a cache line
dnl boundary.
dnl
+dnl For DOS64, the jumps are 6 bytes. The same form works also for GNU/Linux
+dnl (at least with certain assembler/linkers) but FreeBSD 8.2 crashes. Not
+dnl tested on Darwin, Slowaris, NetBSD, etc.
+dnl
dnl Note the extra `' ahead of PROLOGUE obscures it from the HAVE_NATIVE
dnl grepping in configure, stopping that code trying to eval something with
dnl $1 in it.
define(FAT_ENTRY,
m4_assert_numargs(2)
+`ifdef(`HOST_DOS64',
+` ALIGN(8)
+`'PROLOGUE($1)
+ jmp *$2+GSYM_PREFIX`'__gmpn_cpuvec(%rip)
+EPILOGUE()
+',
` ALIGN(ifdef(`PIC',16,8))
`'PROLOGUE($1)
ifdef(`PIC_OR_DARWIN',
jmp *GSYM_PREFIX`'__gmpn_cpuvec+$2
')
EPILOGUE()
-')
+')')
dnl FAT_ENTRY for each CPUVEC_FUNCS_LIST
EPILOGUE()
')
+dnl FAT_INIT for each CPUVEC_FUNCS_LIST
+dnl
+
+define(`CPUVEC_offset',0)
+foreach(i,
+`FAT_INIT(MPN(i`'_init),CPUVEC_offset)
+define(`CPUVEC_offset',eval(CPUVEC_offset + 1))',
+CPUVEC_FUNCS_LIST)
+
L(fat_init):
C al __gmpn_cpuvec byte offset
movzbl %al, %eax
- push %rdi
- push %rsi
+IFSTD(` push %rdi ')
+IFSTD(` push %rsi ')
push %rdx
push %rcx
push %r8
pop %r8
pop %rcx
pop %rdx
- pop %rsi
- pop %rdi
+IFSTD(` pop %rsi ')
+IFSTD(` pop %rdi ')
ifdef(`PIC_OR_DARWIN',`
LEA( GSYM_PREFIX`'__gmpn_cpuvec, %r10)
- jmp *(%r10,%rax)
+ jmp *(%r10,%rax,8)
',`dnl non-PIC
- jmp *GSYM_PREFIX`'__gmpn_cpuvec(%rax)
+ jmp *GSYM_PREFIX`'__gmpn_cpuvec(,%rax,8)
')
-dnl FAT_INIT for each CPUVEC_FUNCS_LIST
-dnl
-
-define(`CPUVEC_offset',0)
-foreach(i,
-`FAT_INIT(MPN(i`'_init),CPUVEC_offset)
-define(`CPUVEC_offset',eval(CPUVEC_offset + 8))',
-CPUVEC_FUNCS_LIST)
-
-
C long __gmpn_cpuid (char dst[12], int id);
C
-C This is called only once, so just something simple and compact is fine.
+C This is called only 3 times, so just something simple and compact is fine.
+
+define(`rp', `%rdi')
+define(`idx', `%rsi')
PROLOGUE(__gmpn_cpuid)
+ FUNC_ENTRY(2)
mov %rbx, %r8
- mov %esi, %eax
+ mov R32(idx), R32(%rax)
cpuid
- mov %ebx, (%rdi)
- mov %edx, 4(%rdi)
- mov %ecx, 8(%rdi)
+ mov %ebx, (rp)
+ mov %edx, 4(rp)
+ mov %ecx, 8(rp)
mov %r8, %rbx
+ FUNC_EXIT()
ret
EPILOGUE()
+++ /dev/null
-/* Fat binary fallback mpn_gcd_1.
-
-Copyright 2003 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-
-#include "mpn/generic/gcd_1.c"
/* Fat binary x86_64 gmp-mparam.h -- Compiler/machine parameter header file.
-Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2009 Free Software
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2009, 2011 Free Software
Foundation, Inc.
This file is part of the GNU MP Library.
preinv. */
#define USE_PREINV_DIVREM_1 1
+#define BMOD_1_TO_MOD_1_THRESHOLD 20
+
/* mpn_sqr_basecase is faster than mpn_mul_basecase at all sizes, no need
for mpn_sqr to call the latter. */
#define SQR_BASECASE_THRESHOLD 0
+++ /dev/null
-/* Fat binary fallback mpn_modexact_1c_odd.
-
-Copyright 2003 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-
-#include "mpn/generic/mode1o.c"
--- /dev/null
+/* Fat binary fallback mpn_redc_2.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+
+#include "mpn/generic/redc_2.c"
dnl Based on the K7 gcd_1.asm, by Kevin Ryde. Rehacked for AMD64 by Torbjorn
dnl Granlund.
-dnl Copyright 2000, 2001, 2002, 2005, 2009 Free Software Foundation, Inc.
+dnl Copyright 2000, 2001, 2002, 2005, 2009, 2011, 2012 Free Software
+dnl Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
-C K8: 6.75 cycles/bit (approx) 1x1 gcd
-C 10.0 cycles/limb Nx1 reduction (modexact_1_odd)
-
-
-dnl Reduce using x%y if x is more than DIV_THRESHOLD bits bigger than y,
-dnl where x is the larger of the two. See tune/README for more.
-dnl
-dnl div at 80 cycles compared to the gcd at about 7 cycles/bitpair
-dnl suggests 80/7*2=23
-
-deflit(DIV_THRESHOLD, 23)
-
+C cycles/bit (approx)
+C AMD K8,K9 5.21 (4.95)
+C AMD K10 5.15 (5.00)
+C AMD bd1 5.42 (5.14)
+C AMD bobcat 6.71 (6.56)
+C Intel P4 13.5 (12.75)
+C Intel core2 6.20 (6.16)
+C Intel NHM 6.49 (6.25)
+C Intel SBR 7.75 (7.57)
+C Intel atom 8.77 (8.54)
+C VIA nano 6.60 (6.20)
+C Numbers measured with: speed -CD -s16-64 -t48 mpn_gcd_1
C ctz_table[n] is the number of trailing zeros on n, or MAXSHIFT if n==0.
-
-deflit(MAXSHIFT, 6)
+deflit(MAXSHIFT, 7)
deflit(MASK, eval((m4_lshift(1,MAXSHIFT))-1))
DEF_OBJECT(ctz_table,64)
')
END_OBJECT(ctz_table)
-C mp_limb_t mpn_gcd_1 (mp_srcptr up, mp_size_t n, mp_limb_t vlimb);
-
+C Threshold of when to call bmod when U is one limb. Should be about
+C (time_in_cycles(bmod_1,1) + call_overhead) / (cycles/bit).
+define(`BMOD_THRES_LOG2', 8)
C INPUT PARAMETERS
define(`up', `%rdi')
define(`n', `%rsi')
-define(`vlimb', `%rdx')
+define(`v0', `%rdx')
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+IFDOS(`define(`STACK_ALLOC', 40)')
+IFSTD(`define(`STACK_ALLOC', 8)')
+
+ASM_START()
TEXT
ALIGN(16)
-
PROLOGUE(mpn_gcd_1)
- mov (%rdi), %r8 C src low limb
- or %rdx, %r8 C x | y
+ FUNC_ENTRY(3)
+ mov (up), %rax C U low limb
mov $-1, R32(%rcx)
+ or v0, %rax C x | y
L(twos):
inc R32(%rcx)
- shr %r8
+ shr %rax
jnc L(twos)
- shr R8(%rcx), %rdx
- mov R32(%rcx), R32(%r8) C common twos
+ shr R8(%rcx), v0
+ push %rcx C common twos
L(divide_strip_y):
- shr %rdx
+ shr v0
jnc L(divide_strip_y)
- adc %rdx, %rdx
-
- push %r8
- push %rdx
- sub $8, %rsp C maintain ABI required rsp alignment
-
+ adc v0, v0
+
+ cmp $1, n
+ jnz L(reduce_nby1)
+
+C Both U and V are single limbs, reduce with bmod if u0 >> v0.
+ mov (up), %r8
+ mov %r8, %rax
+ shr $BMOD_THRES_LOG2, %r8
+ cmp %r8, v0
+ ja L(noreduce)
+ push v0
+ sub $STACK_ALLOC, %rsp C maintain ABI required rsp alignment
+
+L(bmod):
+IFDOS(` mov %rdx, %r8 ')
+IFDOS(` mov %rsi, %rdx ')
+IFDOS(` mov %rdi, %rcx ')
CALL( mpn_modexact_1_odd)
- add $8, %rsp
+L(reduced):
+ add $STACK_ALLOC, %rsp
pop %rdx
- pop %r8
+L(noreduce):
+ LEA( ctz_table, %rsi)
test %rax, %rax
-
mov %rax, %rcx
- jnz L(strip_x)
-
+ jnz L(mid)
+ jmp L(end)
+
+L(reduce_nby1):
+ push v0
+ sub $STACK_ALLOC, %rsp C maintain ABI required rsp alignment
+
+ cmp $BMOD_1_TO_MOD_1_THRESHOLD, n
+ jl L(bmod)
+IFDOS(` mov %rdx, %r8 ')
+IFDOS(` mov %rsi, %rdx ')
+IFDOS(` mov %rdi, %rcx ')
+ CALL( mpn_mod_1)
+ jmp L(reduced)
+
+ ALIGN(16) C K8 BC P4 NHM SBR
+L(top): cmovc %rcx, %rax C if x-y < 0 0
+ cmovc %rdi, %rdx C use x,y-x 0
+L(mid): and $MASK, R32(%rcx) C 0
+ movzbl (%rsi,%rcx), R32(%rcx) C 1
+ jz L(shift_alot) C 1
+ shr R8(%rcx), %rax C 3
+ mov %rax, %rdi C 4
+ mov %rdx, %rcx C 3
+ sub %rax, %rcx C 4
+ sub %rdx, %rax C 4
+ jnz L(top) C 5
+
+L(end): pop %rcx
mov %rdx, %rax
- jmp L(done)
-
-L(strip_x):
- LEA( ctz_table, %r9)
- jmp L(strip_x_top)
-
- ALIGN(16)
-L(top):
- cmovc %r10, %rcx C if x-y gave carry, use x,y-x 0
- cmovc %rax, %rdx C 0
-
-L(strip_x_top):
- mov %rcx, %rax C 1
- and $MASK, R32(%rcx) C 1
-
- mov (%r9,%rcx), R8(%rcx) C 1
-
- shr R8(%rcx), %rax C 4
- cmp $MAXSHIFT, R8(%rcx) C 4
-
- mov %rax, %rcx C 5
- mov %rdx, %r10 C 5
- je L(strip_x_top) C 5
-
- sub %rax, %r10 C 6
- sub %rdx, %rcx C 6
- jnz L(top) C 6
-
-L(done):
- mov %r8, %rcx
shl R8(%rcx), %rax
+ FUNC_EXIT()
ret
+L(shift_alot):
+ shr $MAXSHIFT, %rax
+ mov %rax, %rcx
+ jmp L(mid)
EPILOGUE()
-/* AMD K8 gmp-mparam.h -- Compiler/machine parameter header file.
+/* AMD K8-K10 gmp-mparam.h -- Compiler/machine parameter header file.
Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
-2008, 2009, 2010 Free Software Foundation, Inc.
+2008, 2009, 2010, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 7
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 5
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 6
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 11
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 13
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 4
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 14
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 28
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 7
#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 19
+#define BMOD_1_TO_MOD_1_THRESHOLD 15
-#define MUL_TOOM22_THRESHOLD 28
+#define MUL_TOOM22_THRESHOLD 27
#define MUL_TOOM33_THRESHOLD 81
-#define MUL_TOOM44_THRESHOLD 232
-#define MUL_TOOM6H_THRESHOLD 369
-#define MUL_TOOM8H_THRESHOLD 478
+#define MUL_TOOM44_THRESHOLD 234
+#define MUL_TOOM6H_THRESHOLD 418
+#define MUL_TOOM8H_THRESHOLD 466
#define MUL_TOOM32_TO_TOOM43_THRESHOLD 97
#define MUL_TOOM32_TO_TOOM53_THRESHOLD 160
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 160
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 187
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 145
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 175
#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 32
-#define SQR_TOOM3_THRESHOLD 113
+#define SQR_TOOM2_THRESHOLD 36
+#define SQR_TOOM3_THRESHOLD 117
#define SQR_TOOM4_THRESHOLD 327
#define SQR_TOOM6_THRESHOLD 446
-#define SQR_TOOM8_THRESHOLD 597
+#define SQR_TOOM8_THRESHOLD 547
+
+#define MULMID_TOOM42_THRESHOLD 36
#define MULMOD_BNM1_THRESHOLD 17
#define SQRMOD_BNM1_THRESHOLD 17
+#define POWM_SEC_TABLE 2,67,322,991
+
#define MUL_FFT_MODF_THRESHOLD 570 /* k = 5 */
#define MUL_FFT_TABLE3 \
{ { 570, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
#define SQR_FFT_TABLE3_SIZE 203
#define SQR_FFT_THRESHOLD 5248
-#define MULLO_BASECASE_THRESHOLD 0
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
#define MULLO_DC_THRESHOLD 35
-#define MULLO_MUL_N_THRESHOLD 14709
+#define MULLO_MUL_N_THRESHOLD 15604
#define DC_DIV_QR_THRESHOLD 56
#define DC_DIVAPPR_Q_THRESHOLD 220
#define DC_BDIV_QR_THRESHOLD 52
#define DC_BDIV_Q_THRESHOLD 152
-#define INV_MULMOD_BNM1_THRESHOLD 74
-#define INV_NEWTON_THRESHOLD 260
-#define INV_APPR_THRESHOLD 220
+#define INV_MULMOD_BNM1_THRESHOLD 54
+#define INV_NEWTON_THRESHOLD 226
+#define INV_APPR_THRESHOLD 214
-#define BINV_NEWTON_THRESHOLD 345
-#define REDC_1_TO_REDC_2_THRESHOLD 6
+#define BINV_NEWTON_THRESHOLD 327
+#define REDC_1_TO_REDC_2_THRESHOLD 4
#define REDC_2_TO_REDC_N_THRESHOLD 79
-#define MU_DIV_QR_THRESHOLD 1787
-#define MU_DIVAPPR_Q_THRESHOLD 1787
-#define MUPI_DIV_QR_THRESHOLD 126
-#define MU_BDIV_QR_THRESHOLD 1620
-#define MU_BDIV_Q_THRESHOLD 1787
-
-#define MATRIX22_STRASSEN_THRESHOLD 17
-#define HGCD_THRESHOLD 139
-#define GCD_DC_THRESHOLD 501
-#define GCDEXT_DC_THRESHOLD 474
-#define JACOBI_BASE_METHOD 1
-
-#define GET_STR_DC_THRESHOLD 17
-#define GET_STR_PRECOMPUTE_THRESHOLD 23
-#define SET_STR_DC_THRESHOLD 266
+#define MU_DIV_QR_THRESHOLD 1895
+#define MU_DIVAPPR_Q_THRESHOLD 1895
+#define MUPI_DIV_QR_THRESHOLD 106
+#define MU_BDIV_QR_THRESHOLD 1589
+#define MU_BDIV_Q_THRESHOLD 1718
+
+#define MATRIX22_STRASSEN_THRESHOLD 16
+#define HGCD_THRESHOLD 125
+#define HGCD_APPR_THRESHOLD 173
+#define HGCD_REDUCE_THRESHOLD 3524
+#define GCD_DC_THRESHOLD 555
+#define GCDEXT_DC_THRESHOLD 478
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 12
+#define GET_STR_PRECOMPUTE_THRESHOLD 28
+#define SET_STR_DC_THRESHOLD 248
#define SET_STR_PRECOMPUTE_THRESHOLD 1648
+
+#define FAC_DSC_THRESHOLD 1075
+#define FAC_ODD_THRESHOLD 0 /* always */
+
dnl Contributed to the GNU project by Torbjorn Granlund and Niels Möller.
-dnl Copyright 2004, 2007, 2008, 2009 Free Software Foundation, Inc.
+dnl Copyright 2004, 2007, 2008, 2009, 2011, 2012 Free Software Foundation,
+dnl Inc.
dnl This file is part of the GNU MP Library.
C cycles/limb (approx) div
-C K8,K9: 48 71
-C K10: 48 77
-C P4: 135 161
-C P6 core2: 69 116
-C P6 corei7: 55 89
-C P6 atom: 129 191
+C AMD K8,K9 48 71
+C AMD K10 48 77
+C Intel P4 135 161
+C Intel core2 69 116
+C Intel corei 55 89
+C Intel atom 129 191
+C VIA nano 79 157
C rax rcx rdx rdi rsi r8
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+PROTECT(`mpn_invert_limb_table')
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_invert_limb) C Kn C2 Ci
+ FUNC_ENTRY(1)
mov %rdi, %rax C 0 0 0
shr $55, %rax C 1 1 1
ifdef(`PIC',`
ifdef(`DARWIN',`
- mov approx_tab@GOTPCREL(%rip), %r8
+ mov mpn_invert_limb_table@GOTPCREL(%rip), %r8
add $-512, %r8
',`
- lea -512+approx_tab(%rip), %r8
+ lea -512+mpn_invert_limb_table(%rip), %r8
')',`
- movabs $-512+approx_tab, %r8
+ movabs $-512+mpn_invert_limb_table, %r8
')
movzwl (%r8,%rax,2), R32(%rcx) C %rcx = v0
dec R32(%rax)
sub R32(%rcx), R32(%rax) C %rax = v1
- C v2 = (v1 << 13) + (v1 * (2^60 - v1*d40) >> 47
+ C v2 = (v1 << 13) + (v1 * (2^60 - v1*d40) >> 47)
mov $0x1000000000000000, %rcx
imul %rax, %rsi C 14 17 13
sub %rsi, %rcx
shr $47, %rcx
add %rax, %rcx C %rcx = v2
- C v3 = (v2 << 31) + (v2 * (2^96 - v2 * d63 + (v2>>1) & mask) >> 65
+ C v3 = (v2 << 31) + (v2 * (2^96 - v2 * d63 + ((v2 >> 1) & mask)) >> 65
mov %rdi, %rsi C 0 0 0
- shr $1, %rsi C d/2
+ shr %rsi C d/2
sbb %rax, %rax C -d0 = -(d mod 2)
sub %rax, %rsi C d63 = ceil(d/2)
imul %rcx, %rsi C v2 * d63
and %rcx, %rax C v2 * d0
- shr $1, %rax C (v2>>1) * d0
+ shr %rax C (v2>>1) * d0
sub %rsi, %rax C (v2>>1) * d0 - v2 * d63
mul %rcx
sal $31, %rcx
- shr $1, %rdx
+ shr %rdx
add %rdx, %rcx C %rcx = v3
mov %rdi, %rax
adc %rdi, %rdx
sub %rdx, %rax
+ FUNC_EXIT()
ret
EPILOGUE()
-
- RODATA
- ALIGN(2)
-approx_tab:
- .value 0x7fd,0x7f5,0x7ed,0x7e5,0x7dd,0x7d5,0x7ce,0x7c6
- .value 0x7bf,0x7b7,0x7b0,0x7a8,0x7a1,0x79a,0x792,0x78b
- .value 0x784,0x77d,0x776,0x76f,0x768,0x761,0x75b,0x754
- .value 0x74d,0x747,0x740,0x739,0x733,0x72c,0x726,0x720
- .value 0x719,0x713,0x70d,0x707,0x700,0x6fa,0x6f4,0x6ee
- .value 0x6e8,0x6e2,0x6dc,0x6d6,0x6d1,0x6cb,0x6c5,0x6bf
- .value 0x6ba,0x6b4,0x6ae,0x6a9,0x6a3,0x69e,0x698,0x693
- .value 0x68d,0x688,0x683,0x67d,0x678,0x673,0x66e,0x669
- .value 0x664,0x65e,0x659,0x654,0x64f,0x64a,0x645,0x640
- .value 0x63c,0x637,0x632,0x62d,0x628,0x624,0x61f,0x61a
- .value 0x616,0x611,0x60c,0x608,0x603,0x5ff,0x5fa,0x5f6
- .value 0x5f1,0x5ed,0x5e9,0x5e4,0x5e0,0x5dc,0x5d7,0x5d3
- .value 0x5cf,0x5cb,0x5c6,0x5c2,0x5be,0x5ba,0x5b6,0x5b2
- .value 0x5ae,0x5aa,0x5a6,0x5a2,0x59e,0x59a,0x596,0x592
- .value 0x58e,0x58a,0x586,0x583,0x57f,0x57b,0x577,0x574
- .value 0x570,0x56c,0x568,0x565,0x561,0x55e,0x55a,0x556
- .value 0x553,0x54f,0x54c,0x548,0x545,0x541,0x53e,0x53a
- .value 0x537,0x534,0x530,0x52d,0x52a,0x526,0x523,0x520
- .value 0x51c,0x519,0x516,0x513,0x50f,0x50c,0x509,0x506
- .value 0x503,0x500,0x4fc,0x4f9,0x4f6,0x4f3,0x4f0,0x4ed
- .value 0x4ea,0x4e7,0x4e4,0x4e1,0x4de,0x4db,0x4d8,0x4d5
- .value 0x4d2,0x4cf,0x4cc,0x4ca,0x4c7,0x4c4,0x4c1,0x4be
- .value 0x4bb,0x4b9,0x4b6,0x4b3,0x4b0,0x4ad,0x4ab,0x4a8
- .value 0x4a5,0x4a3,0x4a0,0x49d,0x49b,0x498,0x495,0x493
- .value 0x490,0x48d,0x48b,0x488,0x486,0x483,0x481,0x47e
- .value 0x47c,0x479,0x477,0x474,0x472,0x46f,0x46d,0x46a
- .value 0x468,0x465,0x463,0x461,0x45e,0x45c,0x459,0x457
- .value 0x455,0x452,0x450,0x44e,0x44b,0x449,0x447,0x444
- .value 0x442,0x440,0x43e,0x43b,0x439,0x437,0x435,0x432
- .value 0x430,0x42e,0x42c,0x42a,0x428,0x425,0x423,0x421
- .value 0x41f,0x41d,0x41b,0x419,0x417,0x414,0x412,0x410
- .value 0x40e,0x40c,0x40a,0x408,0x406,0x404,0x402,0x400
ASM_END()
--- /dev/null
+dnl Table used for mpn_invert_limb
+
+dnl Contributed to the GNU project by Torbjorn Granlund and Niels Möller.
+
+dnl Copyright 2004, 2007, 2008, 2009, 2011, 2012 Free Software Foundation,
+dnl Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+PROTECT(`mpn_invert_limb_table')
+
+ASM_START()
+C Table entry X contains floor (0x7fd00 / (0x100 + X))
+
+ RODATA
+ ALIGN(2)
+ GLOBL mpn_invert_limb_table
+mpn_invert_limb_table:
+ .value 0x7fd,0x7f5,0x7ed,0x7e5,0x7dd,0x7d5,0x7ce,0x7c6
+ .value 0x7bf,0x7b7,0x7b0,0x7a8,0x7a1,0x79a,0x792,0x78b
+ .value 0x784,0x77d,0x776,0x76f,0x768,0x761,0x75b,0x754
+ .value 0x74d,0x747,0x740,0x739,0x733,0x72c,0x726,0x720
+ .value 0x719,0x713,0x70d,0x707,0x700,0x6fa,0x6f4,0x6ee
+ .value 0x6e8,0x6e2,0x6dc,0x6d6,0x6d1,0x6cb,0x6c5,0x6bf
+ .value 0x6ba,0x6b4,0x6ae,0x6a9,0x6a3,0x69e,0x698,0x693
+ .value 0x68d,0x688,0x683,0x67d,0x678,0x673,0x66e,0x669
+ .value 0x664,0x65e,0x659,0x654,0x64f,0x64a,0x645,0x640
+ .value 0x63c,0x637,0x632,0x62d,0x628,0x624,0x61f,0x61a
+ .value 0x616,0x611,0x60c,0x608,0x603,0x5ff,0x5fa,0x5f6
+ .value 0x5f1,0x5ed,0x5e9,0x5e4,0x5e0,0x5dc,0x5d7,0x5d3
+ .value 0x5cf,0x5cb,0x5c6,0x5c2,0x5be,0x5ba,0x5b6,0x5b2
+ .value 0x5ae,0x5aa,0x5a6,0x5a2,0x59e,0x59a,0x596,0x592
+ .value 0x58e,0x58a,0x586,0x583,0x57f,0x57b,0x577,0x574
+ .value 0x570,0x56c,0x568,0x565,0x561,0x55e,0x55a,0x556
+ .value 0x553,0x54f,0x54c,0x548,0x545,0x541,0x53e,0x53a
+ .value 0x537,0x534,0x530,0x52d,0x52a,0x526,0x523,0x520
+ .value 0x51c,0x519,0x516,0x513,0x50f,0x50c,0x509,0x506
+ .value 0x503,0x500,0x4fc,0x4f9,0x4f6,0x4f3,0x4f0,0x4ed
+ .value 0x4ea,0x4e7,0x4e4,0x4e1,0x4de,0x4db,0x4d8,0x4d5
+ .value 0x4d2,0x4cf,0x4cc,0x4ca,0x4c7,0x4c4,0x4c1,0x4be
+ .value 0x4bb,0x4b9,0x4b6,0x4b3,0x4b0,0x4ad,0x4ab,0x4a8
+ .value 0x4a5,0x4a3,0x4a0,0x49d,0x49b,0x498,0x495,0x493
+ .value 0x490,0x48d,0x48b,0x488,0x486,0x483,0x481,0x47e
+ .value 0x47c,0x479,0x477,0x474,0x472,0x46f,0x46d,0x46a
+ .value 0x468,0x465,0x463,0x461,0x45e,0x45c,0x459,0x457
+ .value 0x455,0x452,0x450,0x44e,0x44b,0x449,0x447,0x444
+ .value 0x442,0x440,0x43e,0x43b,0x439,0x437,0x435,0x432
+ .value 0x430,0x42e,0x42c,0x42a,0x428,0x425,0x423,0x421
+ .value 0x41f,0x41d,0x41b,0x419,0x417,0x414,0x412,0x410
+ .value 0x40e,0x40c,0x40a,0x408,0x406,0x404,0x402,0x400
+ASM_END()
--- /dev/null
+dnl AMD64 mpn_gcd_1.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_gcd_1)
+include_mpn(`x86_64/core2/gcd_1.asm')
--- /dev/null
+/* AMD K10 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+#if 0
+#undef mpn_sublsh_n
+#define mpn_sublsh_n(rp,up,vp,n,c) \
+ (((rp) == (up)) ? mpn_submul_1 (rp, vp, n, CNST_LIMB(1) << (c)) \
+ : MPN(mpn_sublsh_n)(rp,up,vp,n,c))
+#endif
+
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 17
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 28
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 7
+#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD 15
+
+#define MUL_TOOM22_THRESHOLD 28
+#define MUL_TOOM33_THRESHOLD 81
+#define MUL_TOOM44_THRESHOLD 242
+#define MUL_TOOM6H_THRESHOLD 418
+#define MUL_TOOM8H_THRESHOLD 478
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 97
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 155
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 149
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 163
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 226
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 34
+#define SQR_TOOM3_THRESHOLD 113
+#define SQR_TOOM4_THRESHOLD 336
+#define SQR_TOOM6_THRESHOLD 557
+#define SQR_TOOM8_THRESHOLD 0 /* always */
+
+#define MULMID_TOOM42_THRESHOLD 36
+
+#define MULMOD_BNM1_THRESHOLD 15
+#define SQRMOD_BNM1_THRESHOLD 18
+
+#define MUL_FFT_MODF_THRESHOLD 525 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 570, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
+ { 25, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \
+ { 31, 7}, { 25, 8}, { 13, 7}, { 29, 8}, \
+ { 15, 7}, { 31, 8}, { 17, 7}, { 35, 8}, \
+ { 19, 7}, { 39, 8}, { 21, 7}, { 43, 8}, \
+ { 23, 7}, { 47, 8}, { 25, 7}, { 51, 8}, \
+ { 29, 9}, { 15, 8}, { 37, 9}, { 19, 8}, \
+ { 43, 9}, { 23, 8}, { 51, 9}, { 27, 8}, \
+ { 55,10}, { 15, 9}, { 43,10}, { 23, 9}, \
+ { 55,10}, { 31, 9}, { 63, 5}, { 1023, 4}, \
+ { 2431, 5}, { 1279, 6}, { 671, 7}, { 367, 8}, \
+ { 189, 9}, { 95, 8}, { 195, 9}, { 111,11}, \
+ { 31, 9}, { 131,10}, { 71, 9}, { 155,10}, \
+ { 79, 9}, { 159,10}, { 87,11}, { 47,10}, \
+ { 111,11}, { 63,10}, { 135,11}, { 79,10}, \
+ { 167,11}, { 95,10}, { 191,11}, { 111,12}, \
+ { 63,11}, { 143,10}, { 287,11}, { 159,10}, \
+ { 319,11}, { 175,12}, { 95,11}, { 207,13}, \
+ { 63,12}, { 127,11}, { 255,10}, { 543,11}, \
+ { 287,12}, { 159,11}, { 319,10}, { 639,11}, \
+ { 335,10}, { 671,11}, { 351,10}, { 703,12}, \
+ { 191,11}, { 383,10}, { 767,11}, { 415,12}, \
+ { 223,13}, { 127,12}, { 255,11}, { 543,12}, \
+ { 287,11}, { 575,10}, { 1151,11}, { 607,12}, \
+ { 319,11}, { 639,10}, { 1279,11}, { 671,12}, \
+ { 351,11}, { 703,13}, { 191,12}, { 383,11}, \
+ { 767,12}, { 415,11}, { 831,12}, { 447,14}, \
+ { 127,13}, { 255,12}, { 543,11}, { 1087,12}, \
+ { 607,11}, { 1215,13}, { 319,12}, { 671,11}, \
+ { 1343,12}, { 735,13}, { 383,12}, { 767,11}, \
+ { 1535,12}, { 799,11}, { 1599,12}, { 831,13}, \
+ { 447,12}, { 895,11}, { 1791,12}, { 959,14}, \
+ { 255,13}, { 511,12}, { 1087,13}, { 575,12}, \
+ { 1215,13}, { 639,12}, { 1343,13}, { 703,12}, \
+ { 1407,14}, { 383,13}, { 767,12}, { 1599,13}, \
+ { 831,12}, { 1663,13}, { 895,12}, { 1791,13}, \
+ { 959,15}, { 255,14}, { 511,13}, { 1087,12}, \
+ { 2175,13}, { 1215,14}, { 639,13}, { 1471,14}, \
+ { 767,13}, { 1663,14}, { 895,13}, { 1855,15}, \
+ { 511,14}, { 1023,13}, { 2175,14}, { 1151,13}, \
+ { 2431,14}, { 1279,13}, { 2687,14}, { 1407,15}, \
+ { 767,14}, { 1535,13}, { 3071,14}, { 1791,16}, \
+ { 511,15}, { 1023,14}, { 2431,15}, { 1279,14}, \
+ { 2815,15}, { 1535,14}, { 3199,15}, { 1791,14}, \
+ { 3583,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
+ { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+ {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 185
+#define MUL_FFT_THRESHOLD 7552
+
+#define SQR_FFT_MODF_THRESHOLD 444 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 460, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
+ { 12, 5}, { 25, 6}, { 27, 7}, { 14, 6}, \
+ { 29, 7}, { 15, 6}, { 31, 7}, { 29, 8}, \
+ { 15, 7}, { 32, 8}, { 17, 7}, { 35, 8}, \
+ { 19, 7}, { 39, 8}, { 21, 7}, { 43, 8}, \
+ { 25, 7}, { 51, 8}, { 29, 9}, { 15, 8}, \
+ { 35, 9}, { 19, 8}, { 43, 9}, { 23, 8}, \
+ { 51, 9}, { 27, 8}, { 55,10}, { 15, 9}, \
+ { 31, 8}, { 63, 9}, { 43,10}, { 23, 9}, \
+ { 55,11}, { 15,10}, { 31, 9}, { 71,10}, \
+ { 39, 9}, { 83,10}, { 47, 6}, { 767, 4}, \
+ { 3263, 5}, { 1727, 4}, { 3455, 5}, { 1791, 6}, \
+ { 927, 7}, { 479, 6}, { 959, 7}, { 511, 8}, \
+ { 271, 9}, { 147,10}, { 87,11}, { 47,10}, \
+ { 95,12}, { 31,11}, { 63,10}, { 135,11}, \
+ { 79,10}, { 167,11}, { 95,10}, { 191,11}, \
+ { 111,12}, { 63,11}, { 127,10}, { 255,11}, \
+ { 143,10}, { 287, 9}, { 575,10}, { 303,11}, \
+ { 159,12}, { 95,11}, { 191,10}, { 383, 9}, \
+ { 767,10}, { 399,11}, { 207,13}, { 63,12}, \
+ { 127,11}, { 255,10}, { 511,11}, { 271,10}, \
+ { 543,11}, { 287,10}, { 575,12}, { 159,11}, \
+ { 319,10}, { 639,11}, { 335,10}, { 671,11}, \
+ { 351,10}, { 703,12}, { 191,11}, { 383,10}, \
+ { 767,11}, { 415,10}, { 831,11}, { 447,13}, \
+ { 127,12}, { 255,11}, { 511,10}, { 1023,11}, \
+ { 543,12}, { 287,11}, { 575,10}, { 1151,11}, \
+ { 607,10}, { 1215,12}, { 319,11}, { 639,10}, \
+ { 1279,11}, { 671,12}, { 351,11}, { 703,13}, \
+ { 191,12}, { 383,11}, { 767,12}, { 415,11}, \
+ { 831,12}, { 447,14}, { 127,13}, { 255,12}, \
+ { 511,11}, { 1023,12}, { 543,11}, { 1087,12}, \
+ { 575,11}, { 1151,12}, { 607,13}, { 319,12}, \
+ { 639,11}, { 1279,12}, { 671,11}, { 1343,12}, \
+ { 703,11}, { 1407,12}, { 735,13}, { 383,12}, \
+ { 767,11}, { 1535,12}, { 799,11}, { 1599,12}, \
+ { 831,13}, { 447,12}, { 959,14}, { 255,13}, \
+ { 511,12}, { 1087,13}, { 575,12}, { 1215,13}, \
+ { 639,12}, { 1343,13}, { 703,12}, { 1407,14}, \
+ { 383,13}, { 767,12}, { 1599,13}, { 831,12}, \
+ { 1663,13}, { 895,12}, { 1791,13}, { 959,15}, \
+ { 255,14}, { 511,13}, { 1087,12}, { 2175,13}, \
+ { 1215,14}, { 639,13}, { 1471,14}, { 767,13}, \
+ { 1663,14}, { 895,13}, { 1855,15}, { 511,14}, \
+ { 1023,13}, { 2175,14}, { 1151,13}, { 2303,14}, \
+ { 1279,13}, { 2559,14}, { 1407,15}, { 767,14}, \
+ { 1535,13}, { 3071,14}, { 1791,16}, { 511,15}, \
+ { 1023,14}, { 2303,15}, { 1279,14}, { 2687,15}, \
+ { 1535,14}, { 3199,15}, { 1791,16}, { 65536,17}, \
+ { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+ {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 203
+#define SQR_FFT_THRESHOLD 5248
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 61
+#define MULLO_MUL_N_THRESHOLD 15150
+
+#define DC_DIV_QR_THRESHOLD 56
+#define DC_DIVAPPR_Q_THRESHOLD 220
+#define DC_BDIV_QR_THRESHOLD 52
+#define DC_BDIV_Q_THRESHOLD 44
+
+#define INV_MULMOD_BNM1_THRESHOLD 54
+#define INV_NEWTON_THRESHOLD 222
+#define INV_APPR_THRESHOLD 214
+
+#define BINV_NEWTON_THRESHOLD 324
+#define REDC_1_TO_REDC_2_THRESHOLD 19
+#define REDC_2_TO_REDC_N_THRESHOLD 71
+
+#define MU_DIV_QR_THRESHOLD 1718
+#define MU_DIVAPPR_Q_THRESHOLD 1652
+#define MUPI_DIV_QR_THRESHOLD 102
+#define MU_BDIV_QR_THRESHOLD 1528
+#define MU_BDIV_Q_THRESHOLD 1589
+
+#define POWM_SEC_TABLE 2,23,322,2080
+
+#define MATRIX22_STRASSEN_THRESHOLD 17
+#define HGCD_THRESHOLD 144
+#define HGCD_APPR_THRESHOLD 175
+#define HGCD_REDUCE_THRESHOLD 3389
+#define GCD_DC_THRESHOLD 501
+#define GCDEXT_DC_THRESHOLD 465
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 14
+#define GET_STR_PRECOMPUTE_THRESHOLD 29
+#define SET_STR_DC_THRESHOLD 248
+#define SET_STR_PRECOMPUTE_THRESHOLD 1648
+
+#define FAC_DSC_THRESHOLD 1105
+#define FAC_ODD_THRESHOLD 30
--- /dev/null
+dnl AMD64 mpn_hamdist -- hamming distance.
+
+dnl Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 n/a
+C AMD K10 2
+C Intel P4 n/a
+C Intel core2 n/a
+C Intel corei 2.05
+C Intel atom n/a
+C VIA nano n/a
+
+C This is very straightforward 2-way unrolled code.
+
+C TODO
+C * Write something less basic. It should not be hard to reach 1.5 c/l with
+C 4-way unrolling.
+
+define(`ap', `%rdi')
+define(`bp', `%rsi')
+define(`n', `%rdx')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(32)
+PROLOGUE(mpn_hamdist)
+ FUNC_ENTRY(3)
+ mov (ap), %r8
+ xor (bp), %r8
+
+ lea (ap,n,8), ap C point at A operand end
+ lea (bp,n,8), bp C point at B operand end
+ neg n
+
+ bt $0, R32(n)
+ jnc L(2)
+
+L(1): .byte 0xf3,0x49,0x0f,0xb8,0xc0 C popcnt %r8, %rax
+ xor R32(%r10), R32(%r10)
+ add $1, n
+ js L(top)
+ FUNC_EXIT()
+ ret
+
+ ALIGN(16)
+L(2): mov 8(ap,n,8), %r9
+ .byte 0xf3,0x49,0x0f,0xb8,0xc0 C popcnt %r8, %rax
+ xor 8(bp,n,8), %r9
+ .byte 0xf3,0x4d,0x0f,0xb8,0xd1 C popcnt %r9, %r10
+ add $2, n
+ js L(top)
+ lea (%r10, %rax), %rax
+ FUNC_EXIT()
+ ret
+
+ ALIGN(16)
+L(top): mov (ap,n,8), %r8
+ lea (%r10, %rax), %rax
+ mov 8(ap,n,8), %r9
+ xor (bp,n,8), %r8
+ xor 8(bp,n,8), %r9
+ .byte 0xf3,0x49,0x0f,0xb8,0xc8 C popcnt %r8, %rcx
+ lea (%rcx, %rax), %rax
+ .byte 0xf3,0x4d,0x0f,0xb8,0xd1 C popcnt %r9, %r10
+ add $2, n
+ js L(top)
+
+ lea (%r10, %rax), %rax
+ FUNC_EXIT()
+ ret
+EPILOGUE()
--- /dev/null
+dnl X86-64 mpn_lshift optimised for Intel Sandy Bridge.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_lshift)
+include_mpn(`x86_64/fastsse/lshift-movdqu2.asm')
--- /dev/null
+dnl X86-64 mpn_lshiftc optimised for Intel Sandy Bridge.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_lshiftc)
+include_mpn(`x86_64/fastsse/lshiftc-movdqu2.asm')
--- /dev/null
+dnl AMD64 mpn_popcount -- population count.
+
+dnl Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 n/a
+C AMD K10 1.125
+C Intel P4 n/a
+C Intel core2 n/a
+C Intel corei 1.25
+C Intel atom n/a
+C VIA nano n/a
+
+C * The zero-offset of popcount is misassembled to the offset-less form, which
+C is one byte shorter and therefore will mess up the switching code.
+C * The outdated gas used in FreeBSD and NetBSD cannot handle the POPCNT insn,
+C which is the main reason for our usage of '.byte'.
+
+C TODO
+C * Improve switching code, the current code sucks.
+
+define(`up', `%rdi')
+define(`n', `%rsi')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(32)
+PROLOGUE(mpn_popcount)
+ FUNC_ENTRY(2)
+
+ifelse(1,1,`
+ lea (up,n,8), up
+
+C mov R32(n), R32(%rcx)
+C neg R32(%rcx)
+ imul $-1, R32(n), R32(%rcx)
+ and $8-1, R32(%rcx)
+
+ neg n
+
+ mov R32(%rcx), R32(%rax)
+ neg %rax
+ lea (up,%rax,8),up
+
+ xor R32(%rax), R32(%rax)
+
+ lea (%rcx,%rcx,4), %rcx
+
+ lea L(top)(%rip), %rdx
+ lea (%rdx,%rcx,2), %rdx
+ jmp *%rdx
+',`
+ lea (up,n,8), up
+
+ mov R32(n), R32(%rcx)
+ neg R32(%rcx)
+ and $8-1, R32(%rcx)
+
+ neg n
+
+ mov R32(%rcx), R32(%rax)
+ shl $3, R32(%rax)
+ sub %rax, up
+
+ xor R32(%rax), R32(%rax)
+
+C add R32(%rcx), R32(%rcx) C 2x
+C lea (%rcx,%rcx,4), %rcx C 10x
+ imul $10, R32(%rcx)
+
+ lea L(top)(%rip), %rdx
+ add %rcx, %rdx
+ jmp *%rdx
+')
+
+ ALIGN(32)
+L(top):
+C 0 = n mod 8
+ .byte 0xf3,0x4c,0x0f,0xb8,0x44,0xf7,0x00 C popcnt 0(up,n,8), %r8
+ add %r8, %rax
+C 7 = n mod 8
+ .byte 0xf3,0x4c,0x0f,0xb8,0x4c,0xf7,0x08 C popcnt 8(up,n,8), %r9
+ add %r9, %rax
+C 6 = n mod 8
+ .byte 0xf3,0x4c,0x0f,0xb8,0x44,0xf7,0x10 C popcnt 16(up,n,8), %r8
+ add %r8, %rax
+C 5 = n mod 8
+ .byte 0xf3,0x4c,0x0f,0xb8,0x4c,0xf7,0x18 C popcnt 24(up,n,8), %r9
+ add %r9, %rax
+C 4 = n mod 8
+ .byte 0xf3,0x4c,0x0f,0xb8,0x44,0xf7,0x20 C popcnt 32(up,n,8), %r8
+ add %r8, %rax
+C 3 = n mod 8
+ .byte 0xf3,0x4c,0x0f,0xb8,0x4c,0xf7,0x28 C popcnt 40(up,n,8), %r9
+ add %r9, %rax
+C 2 = n mod 8
+ .byte 0xf3,0x4c,0x0f,0xb8,0x44,0xf7,0x30 C popcnt 48(up,n,8), %r8
+ add %r8, %rax
+C 1 = n mod 8
+ .byte 0xf3,0x4c,0x0f,0xb8,0x4c,0xf7,0x38 C popcnt 56(up,n,8), %r9
+ add %r9, %rax
+
+ add $8, n
+ js L(top)
+ FUNC_EXIT()
+ ret
+EPILOGUE()
--- /dev/null
+dnl X86-64 mpn_rshift optimised for Intel Sandy Bridge.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_rshift)
+include_mpn(`x86_64/fastsse/rshift-movdqu2.asm')
--- /dev/null
+dnl AMD64 mpn_addlsh_n and mpn_rsblsh_n. R = V2^k +- U.
+
+dnl Copyright 2006, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 2.87 < 3.85 for lshift + add_n
+C AMD K10 2.75 < 3.85 for lshift + add_n
+C Intel P4 22 > 7.33 for lshift + add_n
+C Intel core2 4.1 > 3.27 for lshift + add_n
+C Intel NHM 4.4 > 3.75 for lshift + add_n
+C Intel SBR 3.17 < 3.46 for lshift + add_n
+C Intel atom ? ? 8.75 for lshift + add_n
+C VIA nano 4.7 < 6.25 for lshift + add_n
+
+C TODO
+C * Can we propagate carry into rdx instead of using a special carry register?
+C That could save enough insns to get to 10 cycles/iteration.
+
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`vp_param', `%rdx')
+define(`n_param', `%rcx')
+define(`cnt', `%r8')
+
+define(`vp', `%r12')
+define(`n', `%rbp')
+
+ifdef(`OPERATION_addlsh_n',`
+ define(ADDSUB, `add')
+ define(ADCSBB, `adc')
+ define(func, mpn_addlsh_n)
+')
+ifdef(`OPERATION_rsblsh_n',`
+ define(ADDSUB, `sub')
+ define(ADCSBB, `sbb')
+ define(func, mpn_rsblsh_n)
+')
+
+MULFUNC_PROLOGUE(mpn_addlsh_n mpn_rsblsh_n)
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(func)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8d ')
+ push %r12
+ push %rbp
+ push %rbx
+
+ mov (vp_param), %rax C load first V limb early
+
+ mov $0, R32(n)
+ sub n_param, n
+
+ lea -16(up,n_param,8), up
+ lea -16(rp,n_param,8), rp
+ lea 16(vp_param,n_param,8), vp
+
+ mov n_param, %r9
+
+ mov %r8, %rcx
+ mov $1, R32(%r8)
+ shl R8(%rcx), %r8
+
+ mul %r8 C initial multiply
+
+ and $3, R32(%r9)
+ jz L(b0)
+ cmp $2, R32(%r9)
+ jc L(b1)
+ jz L(b2)
+
+L(b3): mov %rax, %r11
+ ADDSUB 16(up,n,8), %r11
+ mov -8(vp,n,8), %rax
+ sbb R32(%rcx), R32(%rcx)
+ mov %rdx, %rbx
+ mul %r8
+ or %rax, %rbx
+ mov (vp,n,8), %rax
+ mov %rdx, %r9
+ mul %r8
+ or %rax, %r9
+ add $3, n
+ jnz L(lo3)
+ jmp L(cj3)
+
+L(b2): mov %rax, %rbx
+ mov -8(vp,n,8), %rax
+ mov %rdx, %r9
+ mul %r8
+ or %rax, %r9
+ add $2, n
+ jz L(cj2)
+ mov %rdx, %r10
+ mov -16(vp,n,8), %rax
+ mul %r8
+ or %rax, %r10
+ xor R32(%rcx), R32(%rcx) C clear carry register
+ jmp L(lo2)
+
+L(b1): mov %rax, %r9
+ mov %rdx, %r10
+ add $1, n
+ jnz L(gt1)
+ ADDSUB 8(up,n,8), %r9
+ jmp L(cj1)
+L(gt1): mov -16(vp,n,8), %rax
+ mul %r8
+ or %rax, %r10
+ mov %rdx, %r11
+ mov -8(vp,n,8), %rax
+ mul %r8
+ or %rax, %r11
+ ADDSUB 8(up,n,8), %r9
+ ADCSBB 16(up,n,8), %r10
+ ADCSBB 24(up,n,8), %r11
+ mov (vp,n,8), %rax
+ sbb R32(%rcx), R32(%rcx)
+ jmp L(lo1)
+
+L(b0): mov %rax, %r10
+ mov %rdx, %r11
+ mov -8(vp,n,8), %rax
+ mul %r8
+ or %rax, %r11
+ ADDSUB 16(up,n,8), %r10
+ ADCSBB 24(up,n,8), %r11
+ mov (vp,n,8), %rax
+ sbb R32(%rcx), R32(%rcx)
+ mov %rdx, %rbx
+ mul %r8
+ or %rax, %rbx
+ mov 8(vp,n,8), %rax
+ add $4, n
+ jz L(end)
+
+ ALIGN(8)
+L(top): mov %rdx, %r9
+ mul %r8
+ or %rax, %r9
+ mov %r10, -16(rp,n,8)
+L(lo3): mov %rdx, %r10
+ mov -16(vp,n,8), %rax
+ mul %r8
+ or %rax, %r10
+ mov %r11, -8(rp,n,8)
+L(lo2): mov %rdx, %r11
+ mov -8(vp,n,8), %rax
+ mul %r8
+ or %rax, %r11
+ add R32(%rcx), R32(%rcx)
+ ADCSBB (up,n,8), %rbx
+ ADCSBB 8(up,n,8), %r9
+ ADCSBB 16(up,n,8), %r10
+ ADCSBB 24(up,n,8), %r11
+ mov (vp,n,8), %rax
+ sbb R32(%rcx), R32(%rcx)
+ mov %rbx, (rp,n,8)
+L(lo1): mov %rdx, %rbx
+ mul %r8
+ or %rax, %rbx
+ mov %r9, 8(rp,n,8)
+L(lo0): mov 8(vp,n,8), %rax
+ add $4, n
+ jnz L(top)
+
+L(end): mov %rdx, %r9
+ mul %r8
+ or %rax, %r9
+ mov %r10, -16(rp,n,8)
+L(cj3): mov %r11, -8(rp,n,8)
+L(cj2): add R32(%rcx), R32(%rcx)
+ ADCSBB (up,n,8), %rbx
+ ADCSBB 8(up,n,8), %r9
+ mov %rbx, (rp,n,8)
+L(cj1): mov %r9, 8(rp,n,8)
+ mov %rdx, %rax
+ ADCSBB $0, %rax
+ pop %rbx
+ pop %rbp
+ pop %r12
+ FUNC_EXIT()
+ ret
+EPILOGUE()
--- /dev/null
+/* AMD K8 gmp-mparam.h -- Compiler/machine parameter header file.
+
+Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
+2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#define GMP_LIMB_BITS 64
+#define BYTES_PER_MP_LIMB 8
+
+// #undef mpn_sublsh_n
+// #define mpn_sublsh_n(rp,up,vp,n,c) \
+// (((rp) == (up)) ? mpn_submul_1 (rp, vp, n, CNST_LIMB(1) << (c)) \
+// : MPN(mpn_sublsh_n)(rp,up,vp,n,c))
+
+#define MOD_1_NORM_THRESHOLD 0 /* always */
+#define MOD_1_UNNORM_THRESHOLD 0 /* always */
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 4
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 13
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 35
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 7
+#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
+#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
+#define BMOD_1_TO_MOD_1_THRESHOLD 17
+
+#define MUL_TOOM22_THRESHOLD 27
+#define MUL_TOOM33_THRESHOLD 81
+#define MUL_TOOM44_THRESHOLD 242
+#define MUL_TOOM6H_THRESHOLD 369
+#define MUL_TOOM8H_THRESHOLD 482
+
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 97
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 153
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 113
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 187
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 166
+
+#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
+#define SQR_TOOM2_THRESHOLD 34
+#define SQR_TOOM3_THRESHOLD 115
+#define SQR_TOOM4_THRESHOLD 527
+#define SQR_TOOM6_THRESHOLD 587
+#define SQR_TOOM8_THRESHOLD 0 /* always */
+
+#define MULMID_TOOM42_THRESHOLD 36
+
+#define MULMOD_BNM1_THRESHOLD 18
+#define SQRMOD_BNM1_THRESHOLD 19
+
+#define MUL_FFT_MODF_THRESHOLD 642 /* k = 5 */
+#define MUL_FFT_TABLE3 \
+ { { 570, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
+ { 25, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \
+ { 31, 7}, { 25, 8}, { 13, 7}, { 29, 8}, \
+ { 15, 7}, { 31, 8}, { 17, 7}, { 35, 8}, \
+ { 19, 7}, { 39, 8}, { 21, 7}, { 43, 8}, \
+ { 23, 7}, { 47, 8}, { 25, 7}, { 51, 8}, \
+ { 29, 9}, { 15, 8}, { 37, 9}, { 19, 8}, \
+ { 43, 9}, { 23, 8}, { 51, 9}, { 27, 8}, \
+ { 55,10}, { 15, 9}, { 43,10}, { 23, 9}, \
+ { 55,10}, { 31, 9}, { 63, 5}, { 1023, 4}, \
+ { 2431, 5}, { 1279, 6}, { 671, 7}, { 367, 8}, \
+ { 189, 9}, { 95, 8}, { 195, 9}, { 111,11}, \
+ { 31, 9}, { 131,10}, { 71, 9}, { 155,10}, \
+ { 79, 9}, { 159,10}, { 87,11}, { 47,10}, \
+ { 111,11}, { 63,10}, { 135,11}, { 79,10}, \
+ { 167,11}, { 95,10}, { 191,11}, { 111,12}, \
+ { 63,11}, { 143,10}, { 287,11}, { 159,10}, \
+ { 319,11}, { 175,12}, { 95,11}, { 207,13}, \
+ { 63,12}, { 127,11}, { 255,10}, { 543,11}, \
+ { 287,12}, { 159,11}, { 319,10}, { 639,11}, \
+ { 335,10}, { 671,11}, { 351,10}, { 703,12}, \
+ { 191,11}, { 383,10}, { 767,11}, { 415,12}, \
+ { 223,13}, { 127,12}, { 255,11}, { 543,12}, \
+ { 287,11}, { 575,10}, { 1151,11}, { 607,12}, \
+ { 319,11}, { 639,10}, { 1279,11}, { 671,12}, \
+ { 351,11}, { 703,13}, { 191,12}, { 383,11}, \
+ { 767,12}, { 415,11}, { 831,12}, { 447,14}, \
+ { 127,13}, { 255,12}, { 543,11}, { 1087,12}, \
+ { 607,11}, { 1215,13}, { 319,12}, { 671,11}, \
+ { 1343,12}, { 735,13}, { 383,12}, { 767,11}, \
+ { 1535,12}, { 799,11}, { 1599,12}, { 831,13}, \
+ { 447,12}, { 895,11}, { 1791,12}, { 959,14}, \
+ { 255,13}, { 511,12}, { 1087,13}, { 575,12}, \
+ { 1215,13}, { 639,12}, { 1343,13}, { 703,12}, \
+ { 1407,14}, { 383,13}, { 767,12}, { 1599,13}, \
+ { 831,12}, { 1663,13}, { 895,12}, { 1791,13}, \
+ { 959,15}, { 255,14}, { 511,13}, { 1087,12}, \
+ { 2175,13}, { 1215,14}, { 639,13}, { 1471,14}, \
+ { 767,13}, { 1663,14}, { 895,13}, { 1855,15}, \
+ { 511,14}, { 1023,13}, { 2175,14}, { 1151,13}, \
+ { 2431,14}, { 1279,13}, { 2687,14}, { 1407,15}, \
+ { 767,14}, { 1535,13}, { 3071,14}, { 1791,16}, \
+ { 511,15}, { 1023,14}, { 2431,15}, { 1279,14}, \
+ { 2815,15}, { 1535,14}, { 3199,15}, { 1791,14}, \
+ { 3583,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
+ { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+ {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 185
+#define MUL_FFT_THRESHOLD 11520
+
+#define SQR_FFT_MODF_THRESHOLD 565 /* k = 5 */
+#define SQR_FFT_TABLE3 \
+ { { 460, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
+ { 12, 5}, { 25, 6}, { 27, 7}, { 14, 6}, \
+ { 29, 7}, { 15, 6}, { 31, 7}, { 29, 8}, \
+ { 15, 7}, { 32, 8}, { 17, 7}, { 35, 8}, \
+ { 19, 7}, { 39, 8}, { 21, 7}, { 43, 8}, \
+ { 25, 7}, { 51, 8}, { 29, 9}, { 15, 8}, \
+ { 35, 9}, { 19, 8}, { 43, 9}, { 23, 8}, \
+ { 51, 9}, { 27, 8}, { 55,10}, { 15, 9}, \
+ { 31, 8}, { 63, 9}, { 43,10}, { 23, 9}, \
+ { 55,11}, { 15,10}, { 31, 9}, { 71,10}, \
+ { 39, 9}, { 83,10}, { 47, 6}, { 767, 4}, \
+ { 3263, 5}, { 1727, 4}, { 3455, 5}, { 1791, 6}, \
+ { 927, 7}, { 479, 6}, { 959, 7}, { 511, 8}, \
+ { 271, 9}, { 147,10}, { 87,11}, { 47,10}, \
+ { 95,12}, { 31,11}, { 63,10}, { 135,11}, \
+ { 79,10}, { 167,11}, { 95,10}, { 191,11}, \
+ { 111,12}, { 63,11}, { 127,10}, { 255,11}, \
+ { 143,10}, { 287, 9}, { 575,10}, { 303,11}, \
+ { 159,12}, { 95,11}, { 191,10}, { 383, 9}, \
+ { 767,10}, { 399,11}, { 207,13}, { 63,12}, \
+ { 127,11}, { 255,10}, { 511,11}, { 271,10}, \
+ { 543,11}, { 287,10}, { 575,12}, { 159,11}, \
+ { 319,10}, { 639,11}, { 335,10}, { 671,11}, \
+ { 351,10}, { 703,12}, { 191,11}, { 383,10}, \
+ { 767,11}, { 415,10}, { 831,11}, { 447,13}, \
+ { 127,12}, { 255,11}, { 511,10}, { 1023,11}, \
+ { 543,12}, { 287,11}, { 575,10}, { 1151,11}, \
+ { 607,10}, { 1215,12}, { 319,11}, { 639,10}, \
+ { 1279,11}, { 671,12}, { 351,11}, { 703,13}, \
+ { 191,12}, { 383,11}, { 767,12}, { 415,11}, \
+ { 831,12}, { 447,14}, { 127,13}, { 255,12}, \
+ { 511,11}, { 1023,12}, { 543,11}, { 1087,12}, \
+ { 575,11}, { 1151,12}, { 607,13}, { 319,12}, \
+ { 639,11}, { 1279,12}, { 671,11}, { 1343,12}, \
+ { 703,11}, { 1407,12}, { 735,13}, { 383,12}, \
+ { 767,11}, { 1535,12}, { 799,11}, { 1599,12}, \
+ { 831,13}, { 447,12}, { 959,14}, { 255,13}, \
+ { 511,12}, { 1087,13}, { 575,12}, { 1215,13}, \
+ { 639,12}, { 1343,13}, { 703,12}, { 1407,14}, \
+ { 383,13}, { 767,12}, { 1599,13}, { 831,12}, \
+ { 1663,13}, { 895,12}, { 1791,13}, { 959,15}, \
+ { 255,14}, { 511,13}, { 1087,12}, { 2175,13}, \
+ { 1215,14}, { 639,13}, { 1471,14}, { 767,13}, \
+ { 1663,14}, { 895,13}, { 1855,15}, { 511,14}, \
+ { 1023,13}, { 2175,14}, { 1151,13}, { 2303,14}, \
+ { 1279,13}, { 2559,14}, { 1407,15}, { 767,14}, \
+ { 1535,13}, { 3071,14}, { 1791,16}, { 511,15}, \
+ { 1023,14}, { 2303,15}, { 1279,14}, { 2687,15}, \
+ { 1535,14}, { 3199,15}, { 1791,16}, { 65536,17}, \
+ { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+ {2097152,22}, {4194304,23}, {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 203
+#define SQR_FFT_THRESHOLD 5568
+
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 63
+#define MULLO_MUL_N_THRESHOLD 22239
+
+#define DC_DIV_QR_THRESHOLD 40
+#define DC_DIVAPPR_Q_THRESHOLD 252
+#define DC_BDIV_QR_THRESHOLD 38
+#define DC_BDIV_Q_THRESHOLD 168
+
+#define INV_MULMOD_BNM1_THRESHOLD 67
+#define INV_NEWTON_THRESHOLD 246
+#define INV_APPR_THRESHOLD 236
+
+#define BINV_NEWTON_THRESHOLD 252
+#define REDC_1_TO_REDC_2_THRESHOLD 11
+#define REDC_2_TO_REDC_N_THRESHOLD 84
+
+#define MU_DIV_QR_THRESHOLD 1932
+#define MU_DIVAPPR_Q_THRESHOLD 1895
+#define MUPI_DIV_QR_THRESHOLD 99
+#define MU_BDIV_QR_THRESHOLD 1528
+#define MU_BDIV_Q_THRESHOLD 1787
+
+#define POWM_SEC_TABLE 3,35,322,1926
+
+#define MATRIX22_STRASSEN_THRESHOLD 21
+#define HGCD_THRESHOLD 140
+#define HGCD_APPR_THRESHOLD 190
+#define HGCD_REDUCE_THRESHOLD 4120
+#define GCD_DC_THRESHOLD 606
+#define GCDEXT_DC_THRESHOLD 492
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 18
+#define GET_STR_PRECOMPUTE_THRESHOLD 32
+#define SET_STR_DC_THRESHOLD 266
+#define SET_STR_PRECOMPUTE_THRESHOLD 2105
+
+#define FAC_DSC_THRESHOLD 1474
+#define FAC_ODD_THRESHOLD 24
dnl AMD64 logops.
-dnl Copyright 2004, 2005, 2006 Free Software Foundation, Inc.
+dnl Copyright 2004, 2005, 2006, 2011, 2012 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
C cycles/limb
-C K8,K9: 1.5
-C K10: 1.75-2 (fluctuating)
-C P4: 2.8/3.35/3.60 (variant1/variant2/variant3)
-C P6-15: 2.0
+C AMD K8,K9 1.5 with fluctuations for variant 2 and 3
+C AMD K10 1.5 with fluctuations for all variants
+C Intel P4 2.8/3.35/3.60 (variant1/variant2/variant3)
+C Intel core2 2
+C Intel NHM 2
+C Intel SBR 1.5/1.75/1.75
+C Intel atom 3.75
+C VIA nano 3.25
ifdef(`OPERATION_and_n',`
define(`func',`mpn_and_n')
define(`vp',`%rdx')
define(`n',`%rcx')
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
ASM_START()
TEXT
ALIGN(32)
PROLOGUE(func)
+ FUNC_ENTRY(4)
movq (vp), %r8
- movl %ecx, %eax
+ movl R32(%rcx), R32(%rax)
leaq (vp,n,8), vp
leaq (up,n,8), up
leaq (rp,n,8), rp
negq n
- andl $3, %eax
+ andl $3, R32(%rax)
je L(b00)
- cmpl $2, %eax
+ cmpl $2, R32(%rax)
jc L(b01)
je L(b10)
movq %r9, 24(rp,n,8)
addq $4, n
jnc L(oop)
-L(ret): ret
+L(ret): FUNC_EXIT()
+ ret
EPILOGUE()
')
TEXT
ALIGN(32)
PROLOGUE(func)
+ FUNC_ENTRY(4)
movq (vp), %r8
notq %r8
- movl %ecx, %eax
+ movl R32(%rcx), R32(%rax)
leaq (vp,n,8), vp
leaq (up,n,8), up
leaq (rp,n,8), rp
negq n
- andl $3, %eax
+ andl $3, R32(%rax)
je L(b00)
- cmpl $2, %eax
+ cmpl $2, R32(%rax)
jc L(b01)
je L(b10)
movq %r9, 24(rp,n,8)
addq $4, n
jnc L(oop)
-L(ret): ret
+L(ret): FUNC_EXIT()
+ ret
EPILOGUE()
')
TEXT
ALIGN(32)
PROLOGUE(func)
+ FUNC_ENTRY(4)
movq (vp), %r8
- movl %ecx, %eax
+ movl R32(%rcx), R32(%rax)
leaq (vp,n,8), vp
leaq (up,n,8), up
leaq (rp,n,8), rp
negq n
- andl $3, %eax
+ andl $3, R32(%rax)
je L(b00)
- cmpl $2, %eax
+ cmpl $2, R32(%rax)
jc L(b01)
je L(b10)
movq %r9, 24(rp,n,8)
addq $4, n
jnc L(oop)
-L(ret): ret
+L(ret): FUNC_EXIT()
+ ret
EPILOGUE()
')
dnl AMD64 mpn_lshift -- mpn left shift.
-dnl Copyright 2003, 2005, 2007, 2009 Free Software Foundation, Inc.
+dnl Copyright 2003, 2005, 2007, 2009, 2011, 2012 Free Software Foundation,
+dnl Inc.
dnl
dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or
-dnl modify it under the terms of the GNU Lesser General Public License as
-dnl published by the Free Software Foundation; either version 3 of the
-dnl License, or (at your option) any later version.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl Lesser General Public License for more details.
-dnl
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
C cycles/limb cycles/limb cnt=1
-C K8,K9: 2.375 1.375
-C K10: 2.375 1.375
-C P4: 8 10.5
-C P6-15 (Core2): 2.11 4.28
-C P6-28 (Atom): 5.75 3.5
+C AMD K8,K9 2.375 1.375
+C AMD K10 2.375 1.375
+C Intel P4 8 10.5
+C Intel core2 2.11 4.28
+C Intel corei ? ?
+C Intel atom 5.75 3.5
+C VIA nano 3.5 2.25
C INPUT PARAMETERS
define(`n', `%rdx')
define(`cnt', `%rcx')
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
ASM_START()
TEXT
ALIGN(32)
PROLOGUE(mpn_lshift)
+ FUNC_ENTRY(4)
cmp $1, R8(%rcx)
jne L(gen)
C For cnt=1 we want to work from lowest limb towards higher limbs.
-C Check for bad overlap (up=rp is OK!) up=1..rp+n-1 is bad.
+C Check for bad overlap (up=rp is OK!) up=rp+1..rp+n-1 is bad.
C FIXME: this could surely be done more cleverly.
mov rp, %rax
dec R32(%rax)
jne L(n00)
adc R32(%rax), R32(%rax)
+ FUNC_EXIT()
ret
L(e1): test R32(%rax), R32(%rax) C clear cy
L(n00): mov (up), %r8
adc %r8, %r8
mov %r8, (rp)
L(ret): adc R32(%rax), R32(%rax)
+ FUNC_EXIT()
ret
L(n01): dec R32(%rax)
mov 8(up), %r9
mov %r8, (rp)
mov %r9, 8(rp)
adc R32(%rax), R32(%rax)
+ FUNC_EXIT()
ret
L(n10): mov 16(up), %r10
adc %r8, %r8
mov %r9, 8(rp)
mov %r10, 16(rp)
adc $-1, R32(%rax)
+ FUNC_EXIT()
ret
L(gen): neg R32(%rcx) C put rsh count in cl
L(ast): mov (up), %r10
shl R8(%rcx), %r10
mov %r10, (rp)
+ FUNC_EXIT()
ret
EPILOGUE()
dnl AMD64 mpn_lshiftc -- mpn left shift with complement.
-dnl Copyright 2003, 2005, 2006, 2009 Free Software Foundation, Inc.
+dnl Copyright 2003, 2005, 2006, 2009, 2011, 2012 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
C cycles/limb
-C K8,K9: 2.75
-C K10: 2.75
-C P4: ?
-C P6-15 (Core2): ?
-C P6-28 (Atom): ?
+C AMD K8,K9 2.75
+C AMD K10 2.75
+C Intel P4 ?
+C Intel core2 ?
+C Intel corei ?
+C Intel atom ?
+C VIA nano 3.75
C INPUT PARAMETERS
define(`n', `%rdx')
define(`cnt', `%rcx')
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
ASM_START()
TEXT
ALIGN(32)
PROLOGUE(mpn_lshiftc)
+ FUNC_ENTRY(4)
neg R32(%rcx) C put rsh count in cl
mov -8(up,n,8), %rax
shr R8(%rcx), %rax C function return value
jae L(top) C 2
L(end):
neg R32(%rcx) C put rsh count in cl
- mov 16(up,n,8), %r8
+ mov 8(up), %r8
shr R8(%rcx), %r8
or %r8, %r10
- mov 8(up,n,8), %r9
+ mov (up), %r9
shr R8(%rcx), %r9
or %r9, %r11
not %r10
not %r11
- mov %r10, 24(rp,n,8)
- mov %r11, 16(rp,n,8)
+ mov %r10, 16(rp)
+ mov %r11, 8(rp)
neg R32(%rcx) C put lsh count in cl
L(ast): mov (up), %r10
shl R8(%rcx), %r10
not %r10
mov %r10, (rp)
+ FUNC_EXIT()
ret
EPILOGUE()
dnl AMD64 mpn_lshsub_n. R = 2^k(U - V).
-dnl Copyright 2006 Free Software Foundation, Inc.
+dnl Copyright 2006, 2011, 2012 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
C cycles/limb
-C K8,K9: 3.15 (mpn_sub_n + mpn_lshift costs about 4 c/l)
-C K10: 3.15 (mpn_sub_n + mpn_lshift costs about 4 c/l)
-C P4: 16.5
-C P6-15: 4.35
+C AMD K8,K9 3.15 (mpn_sub_n + mpn_lshift costs about 4 c/l)
+C AMD K10 3.15 (mpn_sub_n + mpn_lshift costs about 4 c/l)
+C Intel P4 16.5
+C Intel core2 4.35
+C Intel corei ?
+C Intel atom ?
+C VIA nano ?
C This was written quickly and not optimized at all, but it runs very well on
C K8. But perhaps one could get under 3 c/l. Ideas:
define(`n', `%rcx')
define(`cnt', `%r8')
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_lshsub_n)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8d ')
push %r12
push %r13
push %rbx
mov n, %rax
- xor %ebx, %ebx C clear carry save register
- mov %r8d, %ecx C shift count
- xor %r15d, %r15d C limb carry
+ xor R32(%rbx), R32(%rbx) C clear carry save register
+ mov R32(%r8), R32(%rcx) C shift count
+ xor R32(%r15), R32(%r15) C limb carry
- mov %eax, %r11d
- and $3, %r11d
+ mov R32(%rax), R32(%r11)
+ and $3, R32(%r11)
je L(4)
- sub $1, %r11d
+ sub $1, R32(%r11)
L(oopette):
- add %ebx, %ebx C restore carry flag
+ add R32(%rbx), R32(%rbx) C restore carry flag
mov 0(up), %r8
lea 8(up), up
sbb 0(vp), %r8
mov %r8, %r12
- sbb %ebx, %ebx C save carry flag
- shl %cl, %r8
+ sbb R32(%rbx), R32(%rbx) C save carry flag
+ shl R8(%rcx), %r8
or %r15, %r8
mov %r12, %r15
lea 8(vp), vp
- neg %cl
- shr %cl, %r15
- neg %cl
+ neg R8(%rcx)
+ shr R8(%rcx), %r15
+ neg R8(%rcx)
mov %r8, 0(rp)
lea 8(rp), rp
- sub $1, %r11d
+ sub $1, R32(%r11)
jnc L(oopette)
L(4):
ALIGN(16)
L(oop):
- add %ebx, %ebx C restore carry flag
+ add R32(%rbx), R32(%rbx) C restore carry flag
mov 0(up), %r8
mov 8(up), %r9
mov %r10, %r14
sbb 24(vp), %r11
- sbb %ebx, %ebx C save carry flag
+ sbb R32(%rbx), R32(%rbx) C save carry flag
- shl %cl, %r8
- shl %cl, %r9
- shl %cl, %r10
+ shl R8(%rcx), %r8
+ shl R8(%rcx), %r9
+ shl R8(%rcx), %r10
or %r15, %r8
mov %r11, %r15
- shl %cl, %r11
+ shl R8(%rcx), %r11
lea 32(vp), vp
- neg %cl
+ neg R8(%rcx)
- shr %cl, %r12
- shr %cl, %r13
- shr %cl, %r14
- shr %cl, %r15 C used next loop
+ shr R8(%rcx), %r12
+ shr R8(%rcx), %r13
+ shr R8(%rcx), %r14
+ shr R8(%rcx), %r15 C used next loop
or %r12, %r9
or %r13, %r10
or %r14, %r11
- neg %cl
+ neg R8(%rcx)
mov %r8, 0(rp)
mov %r9, 8(rp)
sub $4, %rax
jnc L(oop)
L(end):
- neg %ebx
- shl %cl, %rbx
+ neg R32(%rbx)
+ shl R8(%rcx), %rbx
adc %r15, %rbx
mov %rbx, %rax
pop %rbx
pop %r13
pop %r12
+ FUNC_EXIT()
ret
EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_mod_1_1p
+
+dnl Contributed to the GNU project by Torbjörn Granlund and Niels Möller.
+
+dnl Copyright 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 6
+C AMD K10 6
+C Intel P4 26
+C Intel core2 12.5
+C Intel NHM 11.3
+C Intel SBR 8.4 (slowdown, old code took 8.0)
+C Intel atom 26
+C VIA nano 13
+
+define(`B2mb', `%r10')
+define(`B2modb', `%r11')
+define(`ap', `%rdi')
+define(`n', `%rsi')
+define(`pre', `%r8')
+define(`b', `%rbx')
+
+define(`r0', `%rbp') C r1 kept in %rax
+define(`r2', `%rcx') C kept negated. Also used as shift count
+define(`t0', `%r9')
+
+C mp_limb_t
+C mpn_mod_1_1p (mp_srcptr ap, mp_size_t n, mp_limb_t b, mp_limb_t bmodb[4])
+C %rdi %rsi %rdx %rcx
+C The pre array contains bi, cnt, B1modb, B2modb
+C Note: This implementation needs B1modb only when cnt > 0
+
+C The iteration is almost as follows,
+C
+C r_2 B^3 + r_1 B^2 + r_0 B + u = r_1 B2modb + (r_0 + r_2 B2mod) B + u
+C
+C where r2 is a single bit represented as a mask. But to make sure that the
+C result fits in two limbs and a bit, carry from the addition
+C
+C r_0 + r_2 B2mod
+C
+C is handled specially. On carry, we subtract b to cancel the carry,
+C and we use instead the value
+C
+C r_0 + B2mb (mod B)
+C
+C This addition can be issued early since it doesn't depend on r2, and it is
+C the source of the cmov in the loop.
+C
+C We have the invariant that r_2 B^2 + r_1 B + r_0 < B^2 + B b
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_mod_1_1p)
+ FUNC_ENTRY(4)
+ push %rbp
+ push %rbx
+ mov %rdx, b
+ mov %rcx, pre
+
+ mov -8(ap, n, 8), %rax
+ cmp $3, n
+ jnc L(first)
+ mov -16(ap, n, 8), r0
+ jmp L(reduce_two)
+
+L(first):
+ C First iteration, no r2
+ mov 24(pre), B2modb
+ mul B2modb
+ mov -24(ap, n, 8), r0
+ add %rax, r0
+ mov -16(ap, n, 8), %rax
+ adc %rdx, %rax
+ sbb r2, r2
+ sub $4, n
+ jc L(reduce_three)
+
+ mov B2modb, B2mb
+ sub b, B2mb
+
+ ALIGN(16)
+L(top): and B2modb, r2
+ lea (B2mb, r0), t0
+ mul B2modb
+ add r0, r2
+ mov (ap, n, 8), r0
+ cmovc t0, r2
+ add %rax, r0
+ mov r2, %rax
+ adc %rdx, %rax
+ sbb r2, r2
+ sub $1, n
+ jnc L(top)
+
+L(reduce_three):
+ C Eliminate r2
+ and b, r2
+ sub r2, %rax
+
+L(reduce_two):
+ mov 8(pre), R32(%rcx)
+ test R32(%rcx), R32(%rcx)
+ jz L(normalized)
+
+ C Unnormalized, use B1modb to reduce to size < B (b+1)
+ mulq 16(pre)
+ xor t0, t0
+ add %rax, r0
+ adc %rdx, t0
+ mov t0, %rax
+
+ C Left-shift to normalize
+ifdef(`SHLD_SLOW',`
+ shl R8(%rcx), %rax
+ mov r0, t0
+ neg R32(%rcx)
+ shr R8(%rcx), t0
+ or t0, %rax
+ neg R32(%rcx)
+',`
+ shld R8(%rcx), r0, %rax
+')
+ shl R8(%rcx), r0
+ jmp L(udiv)
+
+L(normalized):
+ mov %rax, t0
+ sub b, t0
+ cmovnc t0, %rax
+
+L(udiv):
+ lea 1(%rax), t0
+ mulq (pre)
+ add r0, %rax
+ adc t0, %rdx
+ imul b, %rdx
+ sub %rdx, r0
+ cmp r0, %rax
+ lea (b, r0), %rax
+ cmovnc r0, %rax
+ cmp b, %rax
+ jnc L(fix)
+L(ok): shr R8(%rcx), %rax
+
+ pop %rbx
+ pop %rbp
+ FUNC_EXIT()
+ ret
+L(fix): sub b, %rax
+ jmp L(ok)
+EPILOGUE()
+
+ ALIGN(16)
+PROLOGUE(mpn_mod_1_1p_cps)
+ FUNC_ENTRY(2)
+ push %rbp
+ bsr %rsi, %rcx
+ push %rbx
+ mov %rdi, %rbx
+ push %r12
+ xor $63, R32(%rcx)
+ mov %rsi, %r12
+ mov R32(%rcx), R32(%rbp)
+ sal R8(%rcx), %r12
+IFSTD(` mov %r12, %rdi ') C pass parameter
+IFDOS(` mov %r12, %rcx ') C pass parameter
+ CALL( mpn_invert_limb)
+ neg %r12
+ mov %r12, %r8
+ mov %rax, (%rbx) C store bi
+ mov %rbp, 8(%rbx) C store cnt
+ imul %rax, %r12
+ mov %r12, 24(%rbx) C store B2modb
+ mov R32(%rbp), R32(%rcx)
+ test R32(%rcx), R32(%rcx)
+ jz L(z)
+
+ mov $1, R32(%rdx)
+ifdef(`SHLD_SLOW',`
+ C Destroys %rax, unlike shld. Otherwise, we could do B1modb
+ C before B2modb, and get rid of the move %r12, %r8 above.
+
+ shl R8(%rcx), %rdx
+ neg R32(%rcx)
+ shr R8(%rcx), %rax
+ or %rax, %rdx
+ neg R32(%rcx)
+',`
+ shld R8(%rcx), %rax, %rdx
+')
+ imul %rdx, %r8
+ shr R8(%rcx), %r8
+ mov %r8, 16(%rbx) C store B1modb
+L(z):
+ pop %r12
+ pop %rbx
+ pop %rbp
+ FUNC_EXIT()
+ ret
+EPILOGUE()
+ASM_END()
--- /dev/null
+dnl AMD64 mpn_mod_1s_2p
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C cycles/limb
+C AMD K8,K9 4
+C AMD K10 4
+C Intel P4 19
+C Intel core2 8
+C Intel NHM 6.5
+C Intel SBR 4.5
+C Intel atom 28
+C VIA nano 8
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_mod_1s_2p)
+ FUNC_ENTRY(4)
+ push %r14
+ test $1, R8(%rsi)
+ mov %rdx, %r14
+ push %r13
+ mov %rcx, %r13
+ push %r12
+ push %rbp
+ push %rbx
+ mov 16(%rcx), %r10
+ mov 24(%rcx), %rbx
+ mov 32(%rcx), %rbp
+ je L(b0)
+ dec %rsi
+ je L(one)
+ mov -8(%rdi,%rsi,8), %rax
+ mul %r10
+ mov %rax, %r9
+ mov %rdx, %r8
+ mov (%rdi,%rsi,8), %rax
+ add -16(%rdi,%rsi,8), %r9
+ adc $0, %r8
+ mul %rbx
+ add %rax, %r9
+ adc %rdx, %r8
+ jmp L(11)
+
+L(b0): mov -8(%rdi,%rsi,8), %r8
+ mov -16(%rdi,%rsi,8), %r9
+
+L(11): sub $4, %rsi
+ jb L(ed2)
+ lea 40(%rdi,%rsi,8), %rdi
+ mov -40(%rdi), %r11
+ mov -32(%rdi), %rax
+ jmp L(m0)
+
+ ALIGN(16)
+L(top): mov -24(%rdi), %r9
+ add %rax, %r11
+ mov -16(%rdi), %rax
+ adc %rdx, %r12
+ mul %r10
+ add %rax, %r9
+ mov %r11, %rax
+ mov %rdx, %r8
+ adc $0, %r8
+ mul %rbx
+ add %rax, %r9
+ mov %r12, %rax
+ adc %rdx, %r8
+ mul %rbp
+ sub $2, %rsi
+ jb L(ed1)
+ mov -40(%rdi), %r11
+ add %rax, %r9
+ mov -32(%rdi), %rax
+ adc %rdx, %r8
+L(m0): mul %r10
+ add %rax, %r11
+ mov %r9, %rax
+ mov %rdx, %r12
+ adc $0, %r12
+ mul %rbx
+ add %rax, %r11
+ lea -32(%rdi), %rdi C ap -= 4
+ mov %r8, %rax
+ adc %rdx, %r12
+ mul %rbp
+ sub $2, %rsi
+ jae L(top)
+
+L(ed0): mov %r11, %r9
+ mov %r12, %r8
+L(ed1): add %rax, %r9
+ adc %rdx, %r8
+L(ed2): mov 8(%r13), R32(%rdi) C cnt
+ mov %r8, %rax
+ mov %r9, %r8
+ mul %r10
+ add %rax, %r8
+ adc $0, %rdx
+L(1): xor R32(%rcx), R32(%rcx)
+ mov %r8, %r9
+ sub R32(%rdi), R32(%rcx)
+ shr R8(%rcx), %r9
+ mov R32(%rdi), R32(%rcx)
+ sal R8(%rcx), %rdx
+ or %rdx, %r9
+ sal R8(%rcx), %r8
+ mov %r9, %rax
+ mulq (%r13)
+ mov %rax, %rsi
+ inc %r9
+ add %r8, %rsi
+ adc %r9, %rdx
+ imul %r14, %rdx
+ sub %rdx, %r8
+ lea (%r8,%r14), %rax
+ cmp %r8, %rsi
+ cmovc %rax, %r8
+ mov %r8, %rax
+ sub %r14, %rax
+ cmovc %r8, %rax
+ mov R32(%rdi), R32(%rcx)
+ shr R8(%rcx), %rax
+ pop %rbx
+ pop %rbp
+ pop %r12
+ pop %r13
+ pop %r14
+ FUNC_EXIT()
+ ret
+L(one):
+ mov (%rdi), %r8
+ mov 8(%rcx), R32(%rdi)
+ xor %rdx, %rdx
+ jmp L(1)
+EPILOGUE()
+
+ ALIGN(16)
+PROLOGUE(mpn_mod_1s_2p_cps)
+ FUNC_ENTRY(2)
+ push %rbp
+ bsr %rsi, %rcx
+ push %rbx
+ mov %rdi, %rbx
+ push %r12
+ xor $63, R32(%rcx)
+ mov %rsi, %r12
+ mov R32(%rcx), R32(%rbp) C preserve cnt over call
+ sal R8(%rcx), %r12 C b << cnt
+IFSTD(` mov %r12, %rdi ') C pass parameter
+IFDOS(` mov %r12, %rcx ') C pass parameter
+ CALL( mpn_invert_limb)
+ mov %r12, %r8
+ mov %rax, %r11
+ mov %rax, (%rbx) C store bi
+ mov %rbp, 8(%rbx) C store cnt
+ neg %r8
+ mov R32(%rbp), R32(%rcx)
+ mov $1, R32(%rsi)
+ifdef(`SHLD_SLOW',`
+ shl R8(%rcx), %rsi
+ neg R32(%rcx)
+ mov %rax, %rbp
+ shr R8(%rcx), %rax
+ or %rax, %rsi
+ mov %rbp, %rax
+ neg R32(%rcx)
+',`
+ shld R8(%rcx), %rax, %rsi C FIXME: Slow on Atom and Nano
+')
+ imul %r8, %rsi
+ mul %rsi
+
+ add %rsi, %rdx
+ shr R8(%rcx), %rsi
+ mov %rsi, 16(%rbx) C store B1modb
+
+ not %rdx
+ imul %r12, %rdx
+ lea (%rdx,%r12), %rsi
+ cmp %rdx, %rax
+ cmovnc %rdx, %rsi
+ mov %r11, %rax
+ mul %rsi
+
+ add %rsi, %rdx
+ shr R8(%rcx), %rsi
+ mov %rsi, 24(%rbx) C store B2modb
+
+ not %rdx
+ imul %r12, %rdx
+ add %rdx, %r12
+ cmp %rdx, %rax
+ cmovnc %rdx, %r12
+
+ shr R8(%rcx), %r12
+ mov %r12, 32(%rbx) C store B3modb
+
+ pop %r12
+ pop %rbx
+ pop %rbp
+ FUNC_EXIT()
+ ret
+EPILOGUE()
dnl Contributed to the GNU project by Torbjorn Granlund.
-dnl Copyright 2009 Free Software Foundation, Inc.
+dnl Copyright 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
C cycles/limb
-C K8,K9: 3.0
-C K10: 3.0
-C P4: 14.5
-C P6 core2: 5.0
-C P6 corei7: 4.3
-C P6 atom: 25.0
+C AMD K8,K9 3
+C AMD K10 3
+C Intel P4 15.5
+C Intel core2 5
+C Intel corei 4
+C Intel atom 23
+C VIA nano 4.75
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_mod_1s_4p)
+ FUNC_ENTRY(4)
+ push %r15
push %r14
push %r13
push %r12
push %rbp
push %rbx
- mov %rdx, -16(%rsp)
+ mov %rdx, %r15
mov %rcx, %r14
- mov 16(%rcx), %r11
- mov 24(%rcx), %rbx
- mov 32(%rcx), %rbp
- mov 40(%rcx), %r13
- mov 48(%rcx), %r12
+ mov 16(%rcx), %r11 C B1modb
+ mov 24(%rcx), %rbx C B2modb
+ mov 32(%rcx), %rbp C B3modb
+ mov 40(%rcx), %r13 C B4modb
+ mov 48(%rcx), %r12 C B5modb
xor R32(%r8), R32(%r8)
mov R32(%rsi), R32(%rdx)
and $3, R32(%rdx)
ALIGN(8)
L(b2): lea -16(%rdi,%rsi,8), %rdi
- mov 8(%rdi), %rax
- mul %r11
+ mov 8(%rdi), %r8
mov (%rdi), %r9
- jmp L(m0)
+ jmp L(m1)
ALIGN(16)
L(top): mov -24(%rdi), %rax
mov -32(%rdi), %r10
- mul %r11
+ mul %r11 C up[1] * B1modb
add %rax, %r10
mov -16(%rdi), %rax
- mov %rdx, %rcx
- adc $0, %rcx
- mul %rbx
+ mov $0, R32(%rcx)
+ adc %rdx, %rcx
+ mul %rbx C up[2] * B2modb
add %rax, %r10
mov -8(%rdi), %rax
adc %rdx, %rcx
sub $32, %rdi
- mul %rbp
+ mul %rbp C up[3] * B3modb
add %rax, %r10
- mov %r9, %rax
+ mov %r13, %rax
adc %rdx, %rcx
- mul %r13
+ mul %r9 C rl * B4modb
add %rax, %r10
- mov %r8, %rax
+ mov %r12, %rax
adc %rdx, %rcx
- mul %r12
+ mul %r8 C rh * B5modb
mov %r10, %r9
mov %rcx, %r8
L(m0): add %rax, %r9
or %rdx, %rdi
mov %rdi, %rax
mulq (%r14)
- mov -16(%rsp), %rbx
+ mov %r15, %rbx
mov %rax, %r9
sal R8(%rcx), %r8
inc %rdi
sub %rdx, %r8
lea (%r8,%rbx), %rax
cmp %r8, %r9
- cmovb %rax, %r8
+ cmovc %rax, %r8
mov %r8, %rax
sub %rbx, %rax
- cmovb %r8, %rax
+ cmovc %r8, %rax
shr R8(%rcx), %rax
pop %rbx
pop %rbp
pop %r12
pop %r13
pop %r14
+ pop %r15
+ FUNC_EXIT()
ret
EPILOGUE()
ALIGN(16)
PROLOGUE(mpn_mod_1s_4p_cps)
- push %r12
- bsr %rsi, %rcx
+ FUNC_ENTRY(2)
push %rbp
- xor $63, R32(%rcx)
- mov %rsi, %rbp
- mov R32(%rcx), R32(%r12)
- sal R8(%rcx), %rbp
+ bsr %rsi, %rcx
push %rbx
mov %rdi, %rbx
- mov %rbp, %rdi
+ push %r12
+ xor $63, R32(%rcx)
+ mov %rsi, %r12
+ mov R32(%rcx), R32(%rbp) C preserve cnt over call
+ sal R8(%rcx), %r12 C b << cnt
+IFSTD(` mov %r12, %rdi ') C pass parameter
+IFDOS(` mov %r12, %rcx ') C pass parameter
CALL( mpn_invert_limb)
- mov R32(%r12), R32(%rcx)
- mov $1, R32(%r10)
- sal R8(%rcx), %r10
- mov $64, R32(%rcx)
- mov %rax, %r9
- sub R32(%r12), R32(%rcx)
- mov %r9, (%rbx)
+ mov %r12, %r8
+ mov %rax, %r11
+ mov %rax, (%rbx) C store bi
+ mov %rbp, 8(%rbx) C store cnt
+ neg %r8
+ mov R32(%rbp), R32(%rcx)
+ mov $1, R32(%rsi)
+ifdef(`SHLD_SLOW',`
+ shl R8(%rcx), %rsi
+ neg R32(%rcx)
+ mov %rax, %rbp
shr R8(%rcx), %rax
- mov R32(%r12), R32(%rcx)
- or %rax, %r10
+ or %rax, %rsi
mov %rbp, %rax
- neg %rax
- imul %rax, %r10
- mov %r10, %rax
- mul %r9
- lea 1(%r10,%rdx), %r8
- neg %r8
- imul %rbp, %r8
- cmp %r8, %rax
- lea (%r8,%rbp), %rdx
- cmovb %rdx, %r8
- mov %r8, %rax
- mul %r9
- lea 1(%r8,%rdx), %rdi
- neg %rdi
- imul %rbp, %rdi
- cmp %rdi, %rax
- lea (%rdi,%rbp), %rdx
- cmovb %rdx, %rdi
- mov %rdi, %rax
- mul %r9
- lea 1(%rdi,%rdx), %rsi
- neg %rsi
- imul %rbp, %rsi
- cmp %rsi, %rax
- lea (%rsi,%rbp), %rdx
- cmovb %rdx, %rsi
- mov %rsi, %rax
- mul %r9
- lea 1(%rsi,%rdx), %rdx
- neg %rdx
- imul %rbp, %rdx
+ neg R32(%rcx)
+',`
+ shld R8(%rcx), %rax, %rsi C FIXME: Slow on Atom and Nano
+')
+ imul %r8, %rsi
+ mul %rsi
+
+ add %rsi, %rdx
+ shr R8(%rcx), %rsi
+ mov %rsi, 16(%rbx) C store B1modb
+
+ not %rdx
+ imul %r12, %rdx
+ lea (%rdx,%r12), %rsi
cmp %rdx, %rax
- lea (%rdx,%rbp), %rbp
- movslq R32(%r12), %rax
- cmovae %rdx, %rbp
- shr R8(%rcx), %r10
- shr R8(%rcx), %r8
- shr R8(%rcx), %rbp
- shr R8(%rcx), %rdi
+ cmovnc %rdx, %rsi
+ mov %r11, %rax
+ mul %rsi
+
+ add %rsi, %rdx
+ shr R8(%rcx), %rsi
+ mov %rsi, 24(%rbx) C store B2modb
+
+ not %rdx
+ imul %r12, %rdx
+ lea (%rdx,%r12), %rsi
+ cmp %rdx, %rax
+ cmovnc %rdx, %rsi
+ mov %r11, %rax
+ mul %rsi
+
+ add %rsi, %rdx
shr R8(%rcx), %rsi
- mov %rbp, 48(%rbx)
- mov %rax, 8(%rbx)
- mov %r10, 16(%rbx)
- mov %r8, 24(%rbx)
- mov %rdi, 32(%rbx)
- mov %rsi, 40(%rbx)
+ mov %rsi, 32(%rbx) C store B3modb
+
+ not %rdx
+ imul %r12, %rdx
+ lea (%rdx,%r12), %rsi
+ cmp %rdx, %rax
+ cmovnc %rdx, %rsi
+ mov %r11, %rax
+ mul %rsi
+
+ add %rsi, %rdx
+ shr R8(%rcx), %rsi
+ mov %rsi, 40(%rbx) C store B4modb
+
+ not %rdx
+ imul %r12, %rdx
+ add %rdx, %r12
+ cmp %rdx, %rax
+ cmovnc %rdx, %r12
+
+ shr R8(%rcx), %r12
+ mov %r12, 48(%rbx) C store B5modb
+
+ pop %r12
pop %rbx
pop %rbp
- pop %r12
+ FUNC_EXIT()
ret
EPILOGUE()
dnl AMD64 mpn_mod_34lsub1 -- remainder modulo 2^48-1.
-dnl Copyright 2000, 2001, 2002, 2004, 2005, 2007 Free Software Foundation,
-dnl Inc.
-dnl
+dnl Copyright 2000, 2001, 2002, 2004, 2005, 2007, 2009, 2010, 2011, 2012 Free
+dnl Software Foundation, Inc.
+
dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or
-dnl modify it under the terms of the GNU Lesser General Public License as
-dnl published by the Free Software Foundation; either version 3 of the
-dnl License, or (at your option) any later version.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl Lesser General Public License for more details.
-dnl
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
-C cycles/limb
-C K8,K9: 1.0
-C K10: 1.12
-C P4: 3.25
-C P6-15 (Core2): 1.5
-C P6-28 (Atom): 2.5
-
+C cycles/limb
+C AMD K8,K9 0.67 0.583 is possible with zero-reg instead of $0, 4-way
+C AMD K10 0.67 this seems hard to beat
+C AMD bd1 1
+C AMD bobcat 1.07
+C Intel P4 7.35 terrible, use old code
+C Intel core2 1.25 1+epsilon with huge unrolling
+C Intel NHM 1.15 this seems hard to beat
+C Intel SBR 0.93
+C Intel atom 2.5
+C VIA nano 1.25 this seems hard to beat
C INPUT PARAMETERS
-C up rdi
-C n rsi
+define(`ap', %rdi)
+define(`n', %rsi)
C mp_limb_t mpn_mod_34lsub1 (mp_srcptr up, mp_size_t n)
C TODO
-C * Apply the movzwl tricks to the x86/k7 code
-C * Review feed-in and wind-down code. In particular, try to avoid adc and
-C sbb to placate Pentium4.
-C * More unrolling and/or index addressing could bring time to under 1 c/l
-C for Athlon64, approaching 0.67 c/l seems possible.
-C * There are recurrencies on the carry registers (r8, r9, r10) that might
-C be the limiting factor for the Pentium4 speed. Splitting these into 6
-C registers would help.
-C * For ultimate Athlon64 performance, a sequence like this might be best.
-C It should reach 0.5 c/l (limited by L1 cache bandwidth).
-C
-C add (%rdi), %rax
-C adc 8(%rdi), %rcx
-C adc 16(%rdi), %rdx
-C adc $0, %r8
-C add 24(%rdi), %rax
-C adc 32(%rdi), %rcx
-C adc 40(%rdi), %rdx
-C adc $0, %r8
-C ...
+C * Review feed-in and wind-down code.
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
ASM_START()
TEXT
ALIGN(32)
PROLOGUE(mpn_mod_34lsub1)
+ FUNC_ENTRY(2)
mov $0x0000FFFFFFFFFFFF, %r11
- sub $2, %rsi
+ mov (ap), %rax
+
+ cmp $2, %rsi
ja L(gt2)
- mov (%rdi), %rax
- nop
- jb L(1)
+ jb L(one)
- mov 8(%rdi), %rsi
+ mov 8(ap), %rsi
mov %rax, %rdx
shr $48, %rax C src[0] low
and %r11, %rdx C src[0] high
add %rdx, %rax
- mov %esi, %edx
+ mov R32(%rsi), R32(%rdx)
shr $32, %rsi C src[1] high
add %rsi, %rax
shl $16, %rdx C src[1] low
add %rdx, %rax
+L(one): FUNC_EXIT()
+ ret
-L(1): ret
+C Don't change this, the wind-down code is not able to handle greater values
+define(UNROLL,3)
- ALIGN(16)
-L(gt2): xor %eax, %eax
- xor %ecx, %ecx
- xor %edx, %edx
- xor %r8, %r8
+L(gt2): mov 8(ap), %rcx
+ mov 16(ap), %rdx
xor %r9, %r9
- xor %r10, %r10
-
-L(top): add (%rdi), %rax
- adc $0, %r10
- add 8(%rdi), %rcx
- adc $0, %r8
- add 16(%rdi), %rdx
+ add $24, ap
+ sub $eval(UNROLL*3+3), %rsi
+ jc L(end)
+ ALIGN(16)
+L(top):
+ add (ap), %rax
+ adc 8(ap), %rcx
+ adc 16(ap), %rdx
adc $0, %r9
-
- sub $3,%rsi
- jng L(end)
-
- add 24(%rdi), %rax
- adc $0, %r10
- add 32(%rdi), %rcx
- adc $0, %r8
- add 40(%rdi), %rdx
- lea 48(%rdi), %rdi
+forloop(i,1,UNROLL-1,`dnl
+ add eval(i*24)(ap), %rax
+ adc eval(i*24+8)(ap), %rcx
+ adc eval(i*24+16)(ap), %rdx
adc $0, %r9
+')dnl
+ add $eval(UNROLL*24), ap
+ sub $eval(UNROLL*3), %rsi
+ jnc L(top)
+
+L(end):
+ lea L(tab)(%rip), %r8
+ifdef(`PIC',
+` movslq 36(%r8,%rsi,4), %r10
+ add %r10, %r8
+ jmp *%r8
+',`
+ jmp *72(%r8,%rsi,8)
+')
+ JUMPTABSECT
+ ALIGN(8)
+L(tab): JMPENT( L(0), L(tab))
+ JMPENT( L(1), L(tab))
+ JMPENT( L(2), L(tab))
+ JMPENT( L(3), L(tab))
+ JMPENT( L(4), L(tab))
+ JMPENT( L(5), L(tab))
+ JMPENT( L(6), L(tab))
+ JMPENT( L(7), L(tab))
+ JMPENT( L(8), L(tab))
+ TEXT
- sub $3,%rsi
- jg L(top)
-
-
- add $-24, %rdi
-L(end): add %r9, %rax
- adc %r10, %rcx
- adc %r8, %rdx
-
- inc %rsi
- mov $0x1, %r10d
- js L(combine)
-
- mov $0x10000, %r10d
- adc 24(%rdi), %rax
- dec %rsi
- js L(combine)
+L(6): add (ap), %rax
+ adc 8(ap), %rcx
+ adc 16(ap), %rdx
+ adc $0, %r9
+ add $24, ap
+L(3): add (ap), %rax
+ adc 8(ap), %rcx
+ adc 16(ap), %rdx
+ jmp L(cj1)
+
+L(7): add (ap), %rax
+ adc 8(ap), %rcx
+ adc 16(ap), %rdx
+ adc $0, %r9
+ add $24, ap
+L(4): add (ap), %rax
+ adc 8(ap), %rcx
+ adc 16(ap), %rdx
+ adc $0, %r9
+ add $24, ap
+L(1): add (ap), %rax
+ adc $0, %rcx
+ jmp L(cj2)
+
+L(8): add (ap), %rax
+ adc 8(ap), %rcx
+ adc 16(ap), %rdx
+ adc $0, %r9
+ add $24, ap
+L(5): add (ap), %rax
+ adc 8(ap), %rcx
+ adc 16(ap), %rdx
+ adc $0, %r9
+ add $24, ap
+L(2): add (ap), %rax
+ adc 8(ap), %rcx
- adc 32(%rdi), %rcx
- mov $0x100000000, %r10
+L(cj2): adc $0, %rdx
+L(cj1): adc $0, %r9
+L(0): add %r9, %rax
+ adc $0, %rcx
+ adc $0, %rdx
+ adc $0, %rax
-L(combine):
- sbb %rsi, %rsi C carry
mov %rax, %rdi C 0mod3
shr $48, %rax C 0mod3 high
- and %r10, %rsi C carry masked
and %r11, %rdi C 0mod3 low
- mov %ecx, %r10d C 1mod3
+ mov R32(%rcx), R32(%r10) C 1mod3
- add %rsi, %rax C apply carry
shr $32, %rcx C 1mod3 high
add %rdi, %rax C apply 0mod3 low
- movzwl %dx, %edi C 2mod3
+ movzwl %dx, R32(%rdi) C 2mod3
shl $16, %r10 C 1mod3 low
add %rcx, %rax C apply 1mod3 high
add %rdx, %rax C apply 2mod3 high
add %rdi, %rax C apply 2mod3 low
+ FUNC_EXIT()
ret
EPILOGUE()
-dnl AMD64 mpn_modexact_1_odd -- exact division style remainder.
+dnl AMD64 mpn_modexact_1_odd -- Hensel norm remainder.
+
+dnl Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2011, 2012 Free
+dnl Software Foundation, Inc.
-dnl Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006 Free Software
-dnl Foundation, Inc.
-dnl
dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or
-dnl modify it under the terms of the GNU Lesser General Public License as
-dnl published by the Free Software Foundation; either version 3 of the
-dnl License, or (at your option) any later version.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl Lesser General Public License for more details.
-dnl
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
C cycles/limb
-C K8,K9: 10
-C K10: 10
-C P4: 33
-C P6 core2: 13
-C P6 corei7: 14.5
-C P6 Atom: 35
-
-
-C mp_limb_t mpn_modexact_1_odd (mp_srcptr src, mp_size_t size,
-C mp_limb_t divisor);
-C mp_limb_t mpn_modexact_1c_odd (mp_srcptr src, mp_size_t size,
-C mp_limb_t divisor, mp_limb_t carry);
-C
-C
+C AMD K8,K9 10
+C AMD K10 10
+C Intel P4 33
+C Intel core2 13
+C Intel corei 14.5
+C Intel atom 35
+C VIA nano ?
+
+
C The dependent chain in the main loop is
C
C cycles
-C subq %rdx, %rax 1
-C imulq %r9, %rax 4
-C mulq %r8 5
+C sub %rdx, %rax 1
+C imul %r9, %rax 4
+C mul %r8 5
C ----
C total 10
C
-C The movq load from src seems to need to be scheduled back before the jz to
-C achieve this speed, out-of-order execution apparently can't completely
-C hide the latency otherwise.
+C The mov load from src seems to need to be scheduled back before the jz to
+C achieve this speed, out-of-order execution apparently can't completely hide
+C the latency otherwise.
C
-C The l=src[i]-cbit step is rotated back too, since that allows us to avoid
-C it for the first iteration (where there's no cbit).
+C The l=src[i]-cbit step is rotated back too, since that allows us to avoid it
+C for the first iteration (where there's no cbit).
C
-C The code alignment used (32-byte) for the loop also seems necessary.
-C Without that the non-PIC case has adcq crossing the 0x60 offset,
-C apparently making it run at 11 cycles instead of 10.
-C
-C Not done:
-C
-C divq for size==1 was measured at about 79 cycles, compared to the inverse
-C at about 25 cycles (both including function call overheads), so that's not
-C used.
-C
-C Enhancements:
-C
-C For PIC, we shouldn't really need the GOT fetch for binvert_limb_table,
-C it'll be in rodata or text in libgmp.so and can be accessed directly %rip
-C relative. This would be for small model only (something we don't
-C presently detect, but which is all that gcc 3.3.3 supports), since 8-byte
-C PC-relative relocations are apparently not available. Some rough
-C experiments with binutils 2.13 looked worrylingly like it might come out
-C with an unwanted text segment relocation though, even with ".protected".
+C The code alignment used (32-byte) for the loop also seems necessary. Without
+C that the non-PIC case has adc crossing the 0x60 offset, apparently making it
+C run at 11 cycles instead of 10.
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
ASM_START()
TEXT
ALIGN(32)
PROLOGUE(mpn_modexact_1_odd)
-
- movl $0, %ecx
+ FUNC_ENTRY(3)
+ mov $0, R32(%rcx)
+IFDOS(` jmp L(ent) ')
PROLOGUE(mpn_modexact_1c_odd)
-
+ FUNC_ENTRY(4)
+L(ent):
C rdi src
C rsi size
C rdx divisor
C rcx carry
- movq %rdx, %r8 C d
- shrl %edx C d/2
-ifdef(`PIC',`
- movq binvert_limb_table@GOTPCREL(%rip), %r9
-',`
- movabsq $binvert_limb_table, %r9
-')
+ mov %rdx, %r8 C d
+ shr R32(%rdx) C d/2
+
+ LEA( binvert_limb_table, %r9)
- andl $127, %edx
- movq %rcx, %r10 C initial carry
+ and $127, R32(%rdx)
+ mov %rcx, %r10 C initial carry
- movzbl (%r9,%rdx), %edx C inv 8 bits
+ movzbl (%r9,%rdx), R32(%rdx) C inv 8 bits
- movq (%rdi), %rax C src[0]
- leaq (%rdi,%rsi,8), %r11 C src end
- movq %r8, %rdi C d, made available to imull
+ mov (%rdi), %rax C src[0]
+ lea (%rdi,%rsi,8), %r11 C src end
+ mov %r8, %rdi C d, made available to imull
- leal (%rdx,%rdx), %ecx C 2*inv
- imull %edx, %edx C inv*inv
+ lea (%rdx,%rdx), R32(%rcx) C 2*inv
+ imul R32(%rdx), R32(%rdx) C inv*inv
- negq %rsi C -size
+ neg %rsi C -size
- imull %edi, %edx C inv*inv*d
+ imul R32(%rdi), R32(%rdx) C inv*inv*d
- subl %edx, %ecx C inv = 2*inv - inv*inv*d, 16 bits
+ sub R32(%rdx), R32(%rcx) C inv = 2*inv - inv*inv*d, 16 bits
- leal (%rcx,%rcx), %edx C 2*inv
- imull %ecx, %ecx C inv*inv
+ lea (%rcx,%rcx), R32(%rdx) C 2*inv
+ imul R32(%rcx), R32(%rcx) C inv*inv
- imull %edi, %ecx C inv*inv*d
+ imul R32(%rdi), R32(%rcx) C inv*inv*d
- subl %ecx, %edx C inv = 2*inv - inv*inv*d, 32 bits
- xorl %ecx, %ecx C initial cbit
+ sub R32(%rcx), R32(%rdx) C inv = 2*inv - inv*inv*d, 32 bits
+ xor R32(%rcx), R32(%rcx) C initial cbit
- leaq (%rdx,%rdx), %r9 C 2*inv
- imulq %rdx, %rdx C inv*inv
+ lea (%rdx,%rdx), %r9 C 2*inv
+ imul %rdx, %rdx C inv*inv
- imulq %r8, %rdx C inv*inv*d
+ imul %r8, %rdx C inv*inv*d
- subq %rdx, %r9 C inv = 2*inv - inv*inv*d, 64 bits
- movq %r10, %rdx C initial climb
+ sub %rdx, %r9 C inv = 2*inv - inv*inv*d, 64 bits
+ mov %r10, %rdx C initial climb
ASSERT(e,` C d*inv == 1 mod 2^64
- movq %r8, %r10
- imulq %r9, %r10
- cmpq $1, %r10')
+ mov %r8, %r10
+ imul %r9, %r10
+ cmp $1, %r10')
- incq %rsi
+ inc %rsi
jz L(one)
C r9 inverse
C r11 src end ptr
- subq %rdx, %rax C l = src[i]-cbit - climb
+ sub %rdx, %rax C l = src[i]-cbit - climb
- adcq $0, %rcx C more cbit
- imulq %r9, %rax C q = l * inverse
+ adc $0, %rcx C more cbit
+ imul %r9, %rax C q = l * inverse
- mulq %r8 C climb = high (q * d)
+ mul %r8 C climb = high (q * d)
- movq (%r11,%rsi,8), %rax C src[i+1]
- subq %rcx, %rax C next l = src[i+1] - cbit
- setc %cl C new cbit
+ mov (%r11,%rsi,8), %rax C src[i+1]
+ sub %rcx, %rax C next l = src[i+1] - cbit
+ setc R8(%rcx) C new cbit
- incq %rsi
+ inc %rsi
jnz L(top)
L(one):
- subq %rdx, %rax C l = src[i]-cbit - climb
+ sub %rdx, %rax C l = src[i]-cbit - climb
- adcq $0, %rcx C more cbit
- imulq %r9, %rax C q = l * inverse
+ adc $0, %rcx C more cbit
+ imul %r9, %rax C q = l * inverse
- mulq %r8 C climb = high (q * d)
+ mul %r8 C climb = high (q * d)
- leaq (%rcx,%rdx), %rax C climb+cbit
+ lea (%rcx,%rdx), %rax C climb+cbit
+ FUNC_EXIT()
ret
EPILOGUE(mpn_modexact_1c_odd)
dnl AMD64 mpn_mul_1.
-dnl Copyright 2003, 2004, 2005, 2007, 2008 Free Software Foundation, Inc.
+dnl Copyright 2003, 2004, 2005, 2007, 2008, 2012 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
C cycles/limb
-C K8,K9: 2.5
-C K10: 2.5
-C P4: 12.3
-C P6 core2: 4.0
-C P6 corei7: 3.8
-C Atom: 19.8
-
-C The inner loop of this code is the result of running a code generation and
+C AMD K8,K9 2.5
+C AMD K10 2.5
+C AMD bd1 5.0
+C AMD bobcat 5.5
+C Intel P4 12.3
+C Intel core2 4.0
+C Intel NHM 3.75
+C Intel SBR 2.95
+C Intel atom 19.8
+C VIA nano 4.25
+
+C The loop of this code is the result of running a code generation and
C optimization tool suite written by David Harvey and Torbjorn Granlund.
-C TODO:
-C * The inner loop is great, but the prologue and epilogue code was
-C quickly written. Tune it!
+C TODO
+C * The loop is great, but the prologue and epilogue code was quickly written.
+C Tune it!
-C INPUT PARAMETERS
-define(`rp', `%rdi')
-define(`up', `%rsi')
-define(`n_param',`%rdx')
-define(`vl', `%rcx')
+define(`rp', `%rdi') C rcx
+define(`up', `%rsi') C rdx
+define(`n_param', `%rdx') C r8
+define(`vl', `%rcx') C r9
-define(`n', `%r11')
+define(`n', `%r11')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+IFDOS(` define(`up', ``%rsi'') ') dnl
+IFDOS(` define(`rp', ``%rcx'') ') dnl
+IFDOS(` define(`vl', ``%r9'') ') dnl
+IFDOS(` define(`r9', ``rdi'') ') dnl
+IFDOS(` define(`n', ``%r8'') ') dnl
+IFDOS(` define(`r8', ``r11'') ') dnl
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_mul_1c)
+IFDOS(``push %rsi '')
+IFDOS(``push %rdi '')
+IFDOS(``mov %rdx, %rsi '')
push %rbx
- mov %r8, %r10
+IFSTD(` mov %r8, %r10')
+IFDOS(` mov 64(%rsp), %r10') C 40 + 3*8 (3 push insns)
jmp L(common)
EPILOGUE()
PROLOGUE(mpn_mul_1)
+
+IFDOS(``push %rsi '')
+IFDOS(``push %rdi '')
+IFDOS(``mov %rdx, %rsi '')
+
push %rbx
xor %r10, %r10
L(common):
mov (up), %rax C read first u limb early
- mov n_param, %rbx C move away n from rdx, mul uses it
+IFSTD(` mov n_param, %rbx ') C move away n from rdx, mul uses it
+IFDOS(` mov n, %rbx ')
mul vl
- mov %rbx, %r11
+IFSTD(` mov %rbx, n ')
add %r10, %rax
adc $0, %rdx
add %rax, %r9
mov (up,n,8), %rax
adc %rdx, %r8
- mov $0, %r10d
+ mov $0, R32(%r10)
L(L1): mul vl
mov %r9, 8(rp,n,8)
add %rax, %r8
L(L3): mov 16(up,n,8), %rax
mul vl
mov %rbx, 24(rp,n,8)
- mov $0, %r8d # zero
- mov %r8, %rbx # zero
+ mov $0, R32(%r8) C zero
+ mov %r8, %rbx C zero
add %rax, %r10
mov 24(up,n,8), %rax
- mov %r8, %r9 # zero
+ mov %r8, %r9 C zero
adc %rdx, %r9
L(L2): mul vl
add $4, n
L(ret): mov %rdx, %rax
pop %rbx
+IFDOS(``pop %rdi '')
+IFDOS(``pop %rsi '')
ret
EPILOGUE()
dnl AMD64 mpn_mul_2 -- Multiply an n-limb vector with a 2-limb vector and
dnl store the result in a third limb vector.
-dnl Copyright 2008 Free Software Foundation, Inc.
+dnl Copyright 2008, 2011, 2012 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
C cycles/limb
-C K8,K9: 2.275
-C K10: 2.275
-C P4: ?
-C P6 core2: 4.0
-C P6 corei7: 3.8
+C AMD K8,K9 2.275
+C AMD K10 2.275
+C Intel P4 13.5
+C Intel core2 4.0
+C Intel corei 3.8
+C Intel atom ?
+C VIA nano ?
C This code is the result of running a code generation and optimization tool
C suite written by David Harvey and Torbjorn Granlund.
define(`w3', `%r10')
define(`n', `%r11')
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_mul_2)
+ FUNC_ENTRY(4)
push %rbx
push %rbp
pop %rbp
pop %rbx
+ FUNC_EXIT()
ret
EPILOGUE()
dnl Contributed to the GNU project by Torbjorn Granlund and David Harvey.
-dnl Copyright 2008 Free Software Foundation, Inc.
+dnl Copyright 2008, 2012 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
C cycles/limb
-C K8,K9: 2.375
-C K10: 2.375
-C P4: ?
-C P6-15: 4.45
+C AMD K8,K9 2.375
+C AMD K10 2.375
+C Intel P4 15-16
+C Intel core2 4.45
+C Intel corei 4.35
+C Intel atom ?
+C VIA nano 4.5
C The inner loops of this code are the result of running a code generation and
C optimization tool suite written by David Harvey and Torbjorn Granlund.
define(`outer_addr', `%r14')
define(`un', `%r13')
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_mul_basecase)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8d ')
push %rbx
push %rbp
push %r12
cmp $2, R32(w0)
jc L(mul_1_prologue_1)
jz L(mul_1_prologue_2)
- jmp L(mul_1_prologue_3)
+
+L(mul_1_prologue_3):
+ add $-1, n
+ lea L(addmul_outer_3)(%rip), outer_addr
+ mov %rax, w3
+ mov %rdx, w0
+ jmp L(mul_1_entry_3)
L(mul_1_prologue_0):
mov %rax, w2
xor R32(w3), R32(w3)
jmp L(mul_1_entry_2)
-L(mul_1_prologue_3):
- add $-1, n
- lea L(addmul_outer_3)(%rip), outer_addr
- mov %rax, w3
- mov %rdx, w0
- jmp L(mul_1_entry_3)
-
C this loop is 10 c/loop = 2.5 c/l on K8, for all up/rp alignments
mov w3, -32(rp,n,8)
js L(mul_2_top)
- mov -32(up,n,8), %rax
+ mov -32(up,n,8), %rax C FIXME: n is constant
mul v1
add %rax, w0
mov w0, (rp)
pop %r12
pop %rbp
pop %rbx
+ FUNC_EXIT()
ret
EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_mullo_basecase.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+C The inner loops of this code are the result of running a code generation and
+C optimisation tool suite written by David Harvey and Torbjorn Granlund.
+
+C NOTES
+C * There is a major stupidity in that we call mpn_mul_1 initially, for a
+C large trip count. Instead, we should start with mul_2 for any operand
+C size congruence class.
+C * Stop iterating addmul_2 earlier, falling into straight-line triangle code
+C for the last 2-3 iterations.
+C * Perhaps implement n=4 special code.
+C * The reload of the outer loop jump address hurts branch preditiction.
+C * The addmul_2 loop ends with an MUL whose high part is not used upon loop
+C exit.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`vp_param', `%rdx')
+define(`n', `%rcx')
+
+define(`vp', `%r11')
+define(`outer_addr', `%r8')
+define(`j', `%r9')
+define(`v0', `%r13')
+define(`v1', `%r14')
+define(`w0', `%rbx')
+define(`w1', `%r15')
+define(`w2', `%rbp')
+define(`w3', `%r10')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_mullo_basecase)
+ FUNC_ENTRY(4)
+ cmp $4, n
+ jge L(gen)
+ mov (up), %rax C u0
+ mov (vp_param), %r8 C v0
+
+ lea L(tab)(%rip), %r9
+ifdef(`PIC',
+` movslq (%r9,%rcx,4), %r10
+ add %r10, %r9
+ jmp *%r9
+',`
+ jmp *(%r9,n,8)
+')
+ JUMPTABSECT
+ ALIGN(8)
+L(tab): JMPENT( L(tab), L(tab)) C not allowed
+ JMPENT( L(1), L(tab)) C 1
+ JMPENT( L(2), L(tab)) C 2
+ JMPENT( L(3), L(tab)) C 3
+dnl JMPENT( L(0m4), L(tab)) C 4
+dnl JMPENT( L(1m4), L(tab)) C 5
+dnl JMPENT( L(2m4), L(tab)) C 6
+dnl JMPENT( L(3m4), L(tab)) C 7
+dnl JMPENT( L(0m4), L(tab)) C 8
+dnl JMPENT( L(1m4), L(tab)) C 9
+dnl JMPENT( L(2m4), L(tab)) C 10
+dnl JMPENT( L(3m4), L(tab)) C 11
+ TEXT
+
+L(1): imul %r8, %rax
+ mov %rax, (rp)
+ FUNC_EXIT()
+ ret
+
+L(2): mov 8(vp_param), %r11
+ imul %rax, %r11 C u0 x v1
+ mul %r8 C u0 x v0
+ mov %rax, (rp)
+ imul 8(up), %r8 C u1 x v0
+ lea (%r11, %rdx), %rax
+ add %r8, %rax
+ mov %rax, 8(rp)
+ FUNC_EXIT()
+ ret
+
+L(3): mov 8(vp_param), %r9 C v1
+ mov 16(vp_param), %r11
+ mul %r8 C u0 x v0 -> <r1,r0>
+ mov %rax, (rp) C r0
+ mov (up), %rax C u0
+ mov %rdx, %rcx C r1
+ mul %r9 C u0 x v1 -> <r2,r1>
+ imul 8(up), %r9 C u1 x v1 -> r2
+ mov 16(up), %r10
+ imul %r8, %r10 C u2 x v0 -> r2
+ add %rax, %rcx
+ adc %rdx, %r9
+ add %r10, %r9
+ mov 8(up), %rax C u1
+ mul %r8 C u1 x v0 -> <r2,r1>
+ add %rax, %rcx
+ adc %rdx, %r9
+ mov %r11, %rax
+ imul (up), %rax C u0 x v2 -> r2
+ add %rax, %r9
+ mov %rcx, 8(rp)
+ mov %r9, 16(rp)
+ FUNC_EXIT()
+ ret
+
+L(0m4):
+L(1m4):
+L(2m4):
+L(3m4):
+L(gen): push %rbx
+ push %rbp
+ push %r13
+ push %r14
+ push %r15
+
+ mov (up), %rax
+ mov (vp_param), v0
+ mov vp_param, vp
+
+ lea (rp,n,8), rp
+ lea (up,n,8), up
+ neg n
+
+ mul v0
+
+ test $1, R8(n)
+ jz L(mul_2)
+
+L(mul_1):
+ lea -8(rp), rp
+ lea -8(up), up
+ test $2, R8(n)
+ jnz L(mul_1_prologue_3)
+
+L(mul_1_prologue_2): C n = 7, 11, 15, ...
+ lea -1(n), j
+ lea L(addmul_outer_1)(%rip), outer_addr
+ mov %rax, w0
+ mov %rdx, w1
+ xor R32(w2), R32(w2)
+ xor R32(w3), R32(w3)
+ mov 16(up,n,8), %rax
+ jmp L(mul_1_entry_2)
+
+L(mul_1_prologue_3): C n = 5, 9, 13, ...
+ lea 1(n), j
+ lea L(addmul_outer_3)(%rip), outer_addr
+ mov %rax, w2
+ mov %rdx, w3
+ xor R32(w0), R32(w0)
+ jmp L(mul_1_entry_0)
+
+ ALIGN(16)
+L(mul_1_top):
+ mov w0, -16(rp,j,8)
+ add %rax, w1
+ mov (up,j,8), %rax
+ adc %rdx, w2
+ xor R32(w0), R32(w0)
+ mul v0
+ mov w1, -8(rp,j,8)
+ add %rax, w2
+ adc %rdx, w3
+L(mul_1_entry_0):
+ mov 8(up,j,8), %rax
+ mul v0
+ mov w2, (rp,j,8)
+ add %rax, w3
+ adc %rdx, w0
+ mov 16(up,j,8), %rax
+ mul v0
+ mov w3, 8(rp,j,8)
+ xor R32(w2), R32(w2) C zero
+ mov w2, w3 C zero
+ add %rax, w0
+ mov 24(up,j,8), %rax
+ mov w2, w1 C zero
+ adc %rdx, w1
+L(mul_1_entry_2):
+ mul v0
+ add $4, j
+ js L(mul_1_top)
+
+ mov w0, -16(rp)
+ add %rax, w1
+ mov w1, -8(rp)
+ adc %rdx, w2
+
+ imul (up), v0
+ add v0, w2
+ mov w2, (rp)
+
+ add $1, n
+ jz L(ret)
+
+ mov 8(vp), v0
+ mov 16(vp), v1
+
+ lea 16(up), up
+ lea 8(vp), vp
+ lea 24(rp), rp
+
+ jmp *outer_addr
+
+
+L(mul_2):
+ mov 8(vp), v1
+ test $2, R8(n)
+ jz L(mul_2_prologue_3)
+
+ ALIGN(16)
+L(mul_2_prologue_1):
+ lea 0(n), j
+ mov %rax, w3
+ mov %rdx, w0
+ xor R32(w1), R32(w1)
+ mov (up,n,8), %rax
+ lea L(addmul_outer_3)(%rip), outer_addr
+ jmp L(mul_2_entry_1)
+
+ ALIGN(16)
+L(mul_2_prologue_3):
+ lea 2(n), j
+ mov $0, R32(w3)
+ mov %rax, w1
+ mov (up,n,8), %rax
+ mov %rdx, w2
+ lea L(addmul_outer_1)(%rip), outer_addr
+ jmp L(mul_2_entry_3)
+
+ ALIGN(16)
+L(mul_2_top):
+ mov -32(up,j,8), %rax
+ mul v1
+ add %rax, w0
+ adc %rdx, w1
+ mov -24(up,j,8), %rax
+ xor R32(w2), R32(w2)
+ mul v0
+ add %rax, w0
+ mov -24(up,j,8), %rax
+ adc %rdx, w1
+ adc $0, R32(w2)
+ mul v1
+ add %rax, w1
+ mov w0, -24(rp,j,8)
+ adc %rdx, w2
+ mov -16(up,j,8), %rax
+ mul v0
+ mov $0, R32(w3)
+ add %rax, w1
+ adc %rdx, w2
+ mov -16(up,j,8), %rax
+ adc $0, R32(w3)
+L(mul_2_entry_3):
+ mov $0, R32(w0)
+ mov w1, -16(rp,j,8)
+ mul v1
+ add %rax, w2
+ mov -8(up,j,8), %rax
+ adc %rdx, w3
+ mov $0, R32(w1)
+ mul v0
+ add %rax, w2
+ mov -8(up,j,8), %rax
+ adc %rdx, w3
+ adc R32(w1), R32(w0)
+ mul v1
+ add %rax, w3
+ mov w2, -8(rp,j,8)
+ adc %rdx, w0
+ mov (up,j,8), %rax
+ mul v0
+ add %rax, w3
+ adc %rdx, w0
+ adc $0, R32(w1)
+L(mul_2_entry_1):
+ add $4, j
+ mov w3, -32(rp,j,8)
+ js L(mul_2_top)
+
+ imul -16(up), v1
+ add v1, w0
+ imul -8(up), v0
+ add v0, w0
+ mov w0, -8(rp)
+
+ add $2, n
+ jz L(ret)
+
+ mov 16(vp), v0
+ mov 24(vp), v1
+
+ lea 16(vp), vp
+ lea 16(rp), rp
+
+ jmp *outer_addr
+
+
+L(addmul_outer_1):
+ lea -2(n), j
+ mov -16(up,n,8), %rax
+ mul v0
+ mov %rax, w3
+ mov -16(up,n,8), %rax
+ mov %rdx, w0
+ xor R32(w1), R32(w1)
+ lea L(addmul_outer_3)(%rip), outer_addr
+ jmp L(addmul_entry_1)
+
+L(addmul_outer_3):
+ lea 0(n), j
+ mov -16(up,n,8), %rax
+ xor R32(w3), R32(w3)
+ mul v0
+ mov %rax, w1
+ mov -16(up,n,8), %rax
+ mov %rdx, w2
+ lea L(addmul_outer_1)(%rip), outer_addr
+ jmp L(addmul_entry_3)
+
+ ALIGN(16)
+L(addmul_top):
+ add w3, -32(rp,j,8)
+ adc %rax, w0
+ mov -24(up,j,8), %rax
+ adc %rdx, w1
+ xor R32(w2), R32(w2)
+ mul v0
+ add %rax, w0
+ mov -24(up,j,8), %rax
+ adc %rdx, w1
+ adc R32(w2), R32(w2)
+ mul v1
+ xor R32(w3), R32(w3)
+ add w0, -24(rp,j,8)
+ adc %rax, w1
+ mov -16(up,j,8), %rax
+ adc %rdx, w2
+ mul v0
+ add %rax, w1
+ mov -16(up,j,8), %rax
+ adc %rdx, w2
+ adc $0, R32(w3)
+L(addmul_entry_3):
+ mul v1
+ add w1, -16(rp,j,8)
+ adc %rax, w2
+ mov -8(up,j,8), %rax
+ adc %rdx, w3
+ mul v0
+ xor R32(w0), R32(w0)
+ add %rax, w2
+ adc %rdx, w3
+ mov $0, R32(w1)
+ mov -8(up,j,8), %rax
+ adc R32(w1), R32(w0)
+ mul v1
+ add w2, -8(rp,j,8)
+ adc %rax, w3
+ adc %rdx, w0
+ mov (up,j,8), %rax
+ mul v0
+ add %rax, w3
+ mov (up,j,8), %rax
+ adc %rdx, w0
+ adc $0, R32(w1)
+L(addmul_entry_1):
+ mul v1
+ add $4, j
+ js L(addmul_top)
+
+ add w3, -32(rp)
+ adc %rax, w0
+
+ imul -24(up), v0
+ add v0, w0
+ add w0, -24(rp)
+
+ add $2, n
+ jns L(ret)
+
+ lea 16(vp), vp
+
+ mov (vp), v0
+ mov 8(vp), v1
+
+ lea -16(up), up
+
+ jmp *outer_addr
+
+L(ret): pop %r15
+ pop %r14
+ pop %r13
+ pop %rbp
+ pop %rbx
+ FUNC_EXIT()
+ ret
+EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_mulmid_basecase
+
+dnl Contributed by David Harvey.
+
+dnl Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+C cycles/limb
+C K8,K9: 2.375 (2.5 when un - vn is "small")
+C K10: ?
+C P4: ?
+C P6-15: ?
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`un_param',`%rdx')
+define(`vp_param',`%rcx')
+define(`vn', `%r8')
+
+define(`v0', `%r12')
+define(`v1', `%r9')
+
+define(`w0', `%rbx')
+define(`w1', `%rcx')
+define(`w2', `%rbp')
+define(`w3', `%r10')
+
+define(`n', `%r11')
+define(`outer_addr', `%r14')
+define(`un', `%r13')
+define(`vp', `%r15')
+
+define(`vp_inner', `%r10')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_mulmid_basecase)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8d ')
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ mov vp_param, vp
+
+ C use un for row length (= un_param - vn + 1)
+ lea 1(un_param), un
+ sub vn, un
+
+ lea (rp,un,8), rp
+
+ cmp $4, un C TODO: needs tuning
+ jc L(diagonal)
+
+ lea (up,un_param,8), up
+
+ test $1, vn
+ jz L(mul_2)
+
+C ===========================================================
+C mul_1 for vp[0] if vn is odd
+
+L(mul_1):
+ mov R32(un), R32(w0)
+
+ neg un
+ mov (up,un,8), %rax
+ mov (vp), v0
+ mul v0
+
+ and $-4, un C round down to multiple of 4
+ mov un, n
+
+ and $3, R32(w0)
+ jz L(mul_1_prologue_0)
+ cmp $2, R32(w0)
+ jc L(mul_1_prologue_1)
+ jz L(mul_1_prologue_2)
+
+L(mul_1_prologue_3):
+ mov %rax, w3
+ mov %rdx, w0
+ lea L(addmul_prologue_3)(%rip), outer_addr
+ jmp L(mul_1_entry_3)
+
+ ALIGN(16)
+L(mul_1_prologue_0):
+ mov %rax, w2
+ mov %rdx, w3 C note already w0 == 0
+ lea L(addmul_prologue_0)(%rip), outer_addr
+ jmp L(mul_1_entry_0)
+
+ ALIGN(16)
+L(mul_1_prologue_1):
+ add $4, n
+ mov %rax, w1
+ mov %rdx, w2
+ mov $0, R32(w3)
+ mov (up,n,8), %rax
+ lea L(addmul_prologue_1)(%rip), outer_addr
+ jmp L(mul_1_entry_1)
+
+ ALIGN(16)
+L(mul_1_prologue_2):
+ mov %rax, w0
+ mov %rdx, w1
+ mov 24(up,n,8), %rax
+ mov $0, R32(w2)
+ mov $0, R32(w3)
+ lea L(addmul_prologue_2)(%rip), outer_addr
+ jmp L(mul_1_entry_2)
+
+
+ C this loop is 10 c/loop = 2.5 c/l on K8
+
+ ALIGN(16)
+L(mul_1_top):
+ mov w0, -16(rp,n,8)
+ add %rax, w1
+ mov (up,n,8), %rax
+ adc %rdx, w2
+L(mul_1_entry_1):
+ mov $0, R32(w0)
+ mul v0
+ mov w1, -8(rp,n,8)
+ add %rax, w2
+ adc %rdx, w3
+L(mul_1_entry_0):
+ mov 8(up,n,8), %rax
+ mul v0
+ mov w2, (rp,n,8)
+ add %rax, w3
+ adc %rdx, w0
+L(mul_1_entry_3):
+ mov 16(up,n,8), %rax
+ mul v0
+ mov w3, 8(rp,n,8)
+ mov $0, R32(w2) C zero
+ mov w2, w3 C zero
+ add %rax, w0
+ mov 24(up,n,8), %rax
+ mov w2, w1 C zero
+ adc %rdx, w1
+L(mul_1_entry_2):
+ mul v0
+ add $4, n
+ js L(mul_1_top)
+
+ mov w0, -16(rp)
+ add %rax, w1
+ mov w1, -8(rp)
+ mov w2, 8(rp) C zero last limb of output
+ adc %rdx, w2
+ mov w2, (rp)
+
+ dec vn
+ jz L(ret)
+
+ lea -8(up), up
+ lea 8(vp), vp
+
+ mov un, n
+ mov (vp), v0
+ mov 8(vp), v1
+
+ jmp *outer_addr
+
+C ===========================================================
+C mul_2 for vp[0], vp[1] if vn is even
+
+ ALIGN(16)
+L(mul_2):
+ mov R32(un), R32(w0)
+
+ neg un
+ mov -8(up,un,8), %rax
+ mov (vp), v0
+ mov 8(vp), v1
+ mul v1
+
+ and $-4, un C round down to multiple of 4
+ mov un, n
+
+ and $3, R32(w0)
+ jz L(mul_2_prologue_0)
+ cmp $2, R32(w0)
+ jc L(mul_2_prologue_1)
+ jz L(mul_2_prologue_2)
+
+L(mul_2_prologue_3):
+ mov %rax, w1
+ mov %rdx, w2
+ lea L(addmul_prologue_3)(%rip), outer_addr
+ jmp L(mul_2_entry_3)
+
+ ALIGN(16)
+L(mul_2_prologue_0):
+ mov %rax, w0
+ mov %rdx, w1
+ lea L(addmul_prologue_0)(%rip), outer_addr
+ jmp L(mul_2_entry_0)
+
+ ALIGN(16)
+L(mul_2_prologue_1):
+ mov %rax, w3
+ mov %rdx, w0
+ mov $0, R32(w1)
+ lea L(addmul_prologue_1)(%rip), outer_addr
+ jmp L(mul_2_entry_1)
+
+ ALIGN(16)
+L(mul_2_prologue_2):
+ mov %rax, w2
+ mov %rdx, w3
+ mov $0, R32(w0)
+ mov 16(up,n,8), %rax
+ lea L(addmul_prologue_2)(%rip), outer_addr
+ jmp L(mul_2_entry_2)
+
+
+ C this loop is 18 c/loop = 2.25 c/l on K8
+
+ ALIGN(16)
+L(mul_2_top):
+ mov -8(up,n,8), %rax
+ mul v1
+ add %rax, w0
+ adc %rdx, w1
+L(mul_2_entry_0):
+ mov $0, R32(w2)
+ mov (up,n,8), %rax
+ mul v0
+ add %rax, w0
+ mov (up,n,8), %rax
+ adc %rdx, w1
+ adc $0, R32(w2)
+ mul v1
+ add %rax, w1
+ mov w0, (rp,n,8)
+ adc %rdx, w2
+L(mul_2_entry_3):
+ mov 8(up,n,8), %rax
+ mul v0
+ mov $0, R32(w3)
+ add %rax, w1
+ adc %rdx, w2
+ mov $0, R32(w0)
+ adc $0, R32(w3)
+ mov 8(up,n,8), %rax
+ mov w1, 8(rp,n,8)
+ mul v1
+ add %rax, w2
+ mov 16(up,n,8), %rax
+ adc %rdx, w3
+L(mul_2_entry_2):
+ mov $0, R32(w1)
+ mul v0
+ add %rax, w2
+ mov 16(up,n,8), %rax
+ adc %rdx, w3
+ adc $0, R32(w0)
+ mul v1
+ add %rax, w3
+ mov w2, 16(rp,n,8)
+ adc %rdx, w0
+L(mul_2_entry_1):
+ mov 24(up,n,8), %rax
+ mul v0
+ add %rax, w3
+ adc %rdx, w0
+ adc $0, R32(w1)
+ add $4, n
+ mov w3, -8(rp,n,8)
+ jnz L(mul_2_top)
+
+ mov w0, (rp)
+ mov w1, 8(rp)
+
+ sub $2, vn
+ jz L(ret)
+
+ lea 16(vp), vp
+ lea -16(up), up
+
+ mov un, n
+ mov (vp), v0
+ mov 8(vp), v1
+
+ jmp *outer_addr
+
+C ===========================================================
+C addmul_2 for remaining vp's
+
+ ALIGN(16)
+L(addmul_prologue_0):
+ mov -8(up,n,8), %rax
+ mul v1
+ mov %rax, w1
+ mov %rdx, w2
+ mov $0, R32(w3)
+ jmp L(addmul_entry_0)
+
+ ALIGN(16)
+L(addmul_prologue_1):
+ mov 16(up,n,8), %rax
+ mul v1
+ mov %rax, w0
+ mov %rdx, w1
+ mov $0, R32(w2)
+ mov 24(up,n,8), %rax
+ jmp L(addmul_entry_1)
+
+ ALIGN(16)
+L(addmul_prologue_2):
+ mov 8(up,n,8), %rax
+ mul v1
+ mov %rax, w3
+ mov %rdx, w0
+ mov $0, R32(w1)
+ jmp L(addmul_entry_2)
+
+ ALIGN(16)
+L(addmul_prologue_3):
+ mov (up,n,8), %rax
+ mul v1
+ mov %rax, w2
+ mov %rdx, w3
+ mov $0, R32(w0)
+ mov $0, R32(w1)
+ jmp L(addmul_entry_3)
+
+ C this loop is 19 c/loop = 2.375 c/l on K8
+
+ ALIGN(16)
+L(addmul_top):
+ mov $0, R32(w3)
+ add %rax, w0
+ mov -8(up,n,8), %rax
+ adc %rdx, w1
+ adc $0, R32(w2)
+ mul v1
+ add w0, -8(rp,n,8)
+ adc %rax, w1
+ adc %rdx, w2
+L(addmul_entry_0):
+ mov (up,n,8), %rax
+ mul v0
+ add %rax, w1
+ mov (up,n,8), %rax
+ adc %rdx, w2
+ adc $0, R32(w3)
+ mul v1
+ add w1, (rp,n,8)
+ mov $0, R32(w1)
+ adc %rax, w2
+ mov $0, R32(w0)
+ adc %rdx, w3
+L(addmul_entry_3):
+ mov 8(up,n,8), %rax
+ mul v0
+ add %rax, w2
+ mov 8(up,n,8), %rax
+ adc %rdx, w3
+ adc $0, R32(w0)
+ mul v1
+ add w2, 8(rp,n,8)
+ adc %rax, w3
+ adc %rdx, w0
+L(addmul_entry_2):
+ mov 16(up,n,8), %rax
+ mul v0
+ add %rax, w3
+ mov 16(up,n,8), %rax
+ adc %rdx, w0
+ adc $0, R32(w1)
+ mul v1
+ add w3, 16(rp,n,8)
+ nop C don't ask...
+ adc %rax, w0
+ mov $0, R32(w2)
+ mov 24(up,n,8), %rax
+ adc %rdx, w1
+L(addmul_entry_1):
+ mul v0
+ add $4, n
+ jnz L(addmul_top)
+
+ add %rax, w0
+ adc %rdx, w1
+ adc $0, R32(w2)
+
+ add w0, -8(rp)
+ adc w1, (rp)
+ adc w2, 8(rp)
+
+ sub $2, vn
+ jz L(ret)
+
+ lea 16(vp), vp
+ lea -16(up), up
+
+ mov un, n
+ mov (vp), v0
+ mov 8(vp), v1
+
+ jmp *outer_addr
+
+C ===========================================================
+C accumulate along diagonals if un - vn is small
+
+ ALIGN(16)
+L(diagonal):
+ xor R32(w0), R32(w0)
+ xor R32(w1), R32(w1)
+ xor R32(w2), R32(w2)
+
+ neg un
+
+ mov R32(vn), %eax
+ and $3, %eax
+ jz L(diag_prologue_0)
+ cmp $2, %eax
+ jc L(diag_prologue_1)
+ jz L(diag_prologue_2)
+
+L(diag_prologue_3):
+ lea -8(vp), vp
+ mov vp, vp_inner
+ add $1, vn
+ mov vn, n
+ lea L(diag_entry_3)(%rip), outer_addr
+ jmp L(diag_entry_3)
+
+L(diag_prologue_0):
+ mov vp, vp_inner
+ mov vn, n
+ lea 0(%rip), outer_addr
+ mov -8(up,n,8), %rax
+ jmp L(diag_entry_0)
+
+L(diag_prologue_1):
+ lea 8(vp), vp
+ mov vp, vp_inner
+ add $3, vn
+ mov vn, n
+ lea 0(%rip), outer_addr
+ mov -8(vp_inner), %rax
+ jmp L(diag_entry_1)
+
+L(diag_prologue_2):
+ lea -16(vp), vp
+ mov vp, vp_inner
+ add $2, vn
+ mov vn, n
+ lea 0(%rip), outer_addr
+ mov 16(vp_inner), %rax
+ jmp L(diag_entry_2)
+
+
+ C this loop is 10 c/loop = 2.5 c/l on K8
+
+ ALIGN(16)
+L(diag_top):
+ add %rax, w0
+ adc %rdx, w1
+ mov -8(up,n,8), %rax
+ adc $0, w2
+L(diag_entry_0):
+ mulq (vp_inner)
+ add %rax, w0
+ adc %rdx, w1
+ adc $0, w2
+L(diag_entry_3):
+ mov -16(up,n,8), %rax
+ mulq 8(vp_inner)
+ add %rax, w0
+ mov 16(vp_inner), %rax
+ adc %rdx, w1
+ adc $0, w2
+L(diag_entry_2):
+ mulq -24(up,n,8)
+ add %rax, w0
+ mov 24(vp_inner), %rax
+ adc %rdx, w1
+ lea 32(vp_inner), vp_inner
+ adc $0, w2
+L(diag_entry_1):
+ mulq -32(up,n,8)
+ sub $4, n
+ jnz L(diag_top)
+
+ add %rax, w0
+ adc %rdx, w1
+ adc $0, w2
+
+ mov w0, (rp,un,8)
+
+ inc un
+ jz L(diag_end)
+
+ mov vn, n
+ mov vp, vp_inner
+
+ lea 8(up), up
+ mov w1, w0
+ mov w2, w1
+ xor R32(w2), R32(w2)
+
+ jmp *outer_addr
+
+L(diag_end):
+ mov w1, (rp)
+ mov w2, 8(rp)
+
+L(ret): pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+ FUNC_EXIT()
+ ret
+EPILOGUE()
--- /dev/null
+dnl X86-64 mpn_copyd optimised for Intel Sandy Bridge.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_copyd)
+include_mpn(`x86_64/fastsse/copyd-palignr.asm')
--- /dev/null
+dnl X86-64 mpn_copyi optimised for Intel Sandy Bridge.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_copyi)
+include_mpn(`x86_64/fastsse/copyi-palignr.asm')
--- /dev/null
+dnl AMD64 mpn_divexact_1 -- mpn by limb exact division.
+
+dnl Copyright 2001, 2002, 2004, 2005, 2006, 2010, 2011, 2012 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb
+C norm unorm
+C AMD K8,K9 11 11
+C AMD K10 11 11
+C Intel P4 ?
+C Intel core2 13.5 13.25
+C Intel corei 14.25
+C Intel atom 34 36
+C VIA nano 19.25 19.25
+
+
+C INPUT PARAMETERS
+C rp rdi
+C up rsi
+C n rdx
+C divisor rcx
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_divexact_1)
+ FUNC_ENTRY(4)
+ push %rbx
+
+ mov %rcx, %rax
+ xor R32(%rcx), R32(%rcx) C shift count
+ mov %rdx, %r8
+
+ bt $0, R32(%rax)
+ jc L(odd) C skip bsfq unless divisor is even
+ bsf %rax, %rcx
+ shr R8(%rcx), %rax
+L(odd): mov %rax, %rbx
+ shr R32(%rax)
+ and $127, R32(%rax) C d/2, 7 bits
+
+ LEA( binvert_limb_table, %rdx)
+
+ movzbl (%rdx,%rax), R32(%rax) C inv 8 bits
+
+ mov %rbx, %r11 C d without twos
+
+ lea (%rax,%rax), R32(%rdx) C 2*inv
+ imul R32(%rax), R32(%rax) C inv*inv
+ imul R32(%rbx), R32(%rax) C inv*inv*d
+ sub R32(%rax), R32(%rdx) C inv = 2*inv - inv*inv*d, 16 bits
+
+ lea (%rdx,%rdx), R32(%rax) C 2*inv
+ imul R32(%rdx), R32(%rdx) C inv*inv
+ imul R32(%rbx), R32(%rdx) C inv*inv*d
+ sub R32(%rdx), R32(%rax) C inv = 2*inv - inv*inv*d, 32 bits
+
+ lea (%rax,%rax), %r10 C 2*inv
+ imul %rax, %rax C inv*inv
+ imul %rbx, %rax C inv*inv*d
+ sub %rax, %r10 C inv = 2*inv - inv*inv*d, 64 bits
+
+ lea (%rsi,%r8,8), %rsi C up end
+ lea -8(%rdi,%r8,8), %rdi C rp end
+ neg %r8 C -n
+
+ mov (%rsi,%r8,8), %rax C up[0]
+
+ inc %r8
+ jz L(one)
+
+ test R32(%rcx), R32(%rcx)
+ jnz L(unorm) C branch if count != 0
+ xor R32(%rbx), R32(%rbx)
+ jmp L(nent)
+
+ ALIGN(8)
+L(ntop):mul %r11 C carry limb in rdx 0 10
+ mov -8(%rsi,%r8,8), %rax C
+ sub %rbx, %rax C apply carry bit
+ setc %bl C
+ sub %rdx, %rax C apply carry limb 5
+ adc $0, %rbx C 6
+L(nent):imul %r10, %rax C 6
+ mov %rax, (%rdi,%r8,8) C
+ inc %r8 C
+ jnz L(ntop)
+
+ mov -8(%rsi), %r9 C up high limb
+ jmp L(com)
+
+L(unorm):
+ mov (%rsi,%r8,8), %r9 C up[1]
+ shr R8(%rcx), %rax C
+ neg R32(%rcx)
+ shl R8(%rcx), %r9 C
+ neg R32(%rcx)
+ or %r9, %rax
+ xor R32(%rbx), R32(%rbx)
+ jmp L(uent)
+
+ ALIGN(8)
+L(utop):mul %r11 C carry limb in rdx 0 10
+ mov (%rsi,%r8,8), %rax C
+ shl R8(%rcx), %rax C
+ neg R32(%rcx)
+ or %r9, %rax
+ sub %rbx, %rax C apply carry bit
+ setc %bl C
+ sub %rdx, %rax C apply carry limb 5
+ adc $0, %rbx C 6
+L(uent):imul %r10, %rax C 6
+ mov (%rsi,%r8,8), %r9 C
+ shr R8(%rcx), %r9 C
+ neg R32(%rcx)
+ mov %rax, (%rdi,%r8,8) C
+ inc %r8 C
+ jnz L(utop)
+
+L(com): mul %r11 C carry limb in rdx
+ sub %rbx, %r9 C apply carry bit
+ sub %rdx, %r9 C apply carry limb
+ imul %r10, %r9
+ mov %r9, (%rdi)
+ pop %rbx
+ FUNC_EXIT()
+ ret
+
+L(one): shr R8(%rcx), %rax
+ imul %r10, %rax
+ mov %rax, (%rdi)
+ pop %rbx
+ FUNC_EXIT()
+ ret
+EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_gcd_1.
+
+dnl Copyright 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_gcd_1)
+include_mpn(`x86_64/core2/gcd_1.asm')
/* VIA Nano gmp-mparam.h -- Compiler/machine parameter header file.
Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
-2008, 2009, 2010 Free Software Foundation, Inc.
+2008, 2009, 2010, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#define GMP_LIMB_BITS 64
#define BYTES_PER_MP_LIMB 8
+#define SHLD_SLOW 1
+#define SHRD_SLOW 1
+
/* 1600 MHz Nano 2xxx */
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 7
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 5
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 7
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 14
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 13
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 4
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 3
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 18
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 20
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 8
#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 24
+#define BMOD_1_TO_MOD_1_THRESHOLD 22
-#define MUL_TOOM22_THRESHOLD 28
+#define MUL_TOOM22_THRESHOLD 27
#define MUL_TOOM33_THRESHOLD 33
-#define MUL_TOOM44_THRESHOLD 292
-#define MUL_TOOM6H_THRESHOLD 746
-#define MUL_TOOM8H_THRESHOLD 866
+#define MUL_TOOM44_THRESHOLD 290
+#define MUL_TOOM6H_THRESHOLD 718
+#define MUL_TOOM8H_THRESHOLD 915
-#define MUL_TOOM32_TO_TOOM43_THRESHOLD 73
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 201
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 211
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 219
+#define MUL_TOOM32_TO_TOOM43_THRESHOLD 67
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 184
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 193
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 193
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 287
#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 38
-#define SQR_TOOM3_THRESHOLD 77
-#define SQR_TOOM4_THRESHOLD 620
-#define SQR_TOOM6_THRESHOLD 996
-#define SQR_TOOM8_THRESHOLD 1138
+#define SQR_TOOM2_THRESHOLD 34
+#define SQR_TOOM3_THRESHOLD 93
+#define SQR_TOOM4_THRESHOLD 587
+#define SQR_TOOM6_THRESHOLD 1095
+#define SQR_TOOM8_THRESHOLD 0 /* always */
+
+#define MULMID_TOOM42_THRESHOLD 28
-#define MULMOD_BNM1_THRESHOLD 15
+#define MULMOD_BNM1_THRESHOLD 13
#define SQRMOD_BNM1_THRESHOLD 17
-#define MUL_FFT_MODF_THRESHOLD 468 /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD 376 /* k = 5 */
#define MUL_FFT_TABLE3 \
- { { 468, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
+ { { 376, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
{ 12, 5}, { 25, 6}, { 13, 5}, { 27, 6}, \
{ 15, 5}, { 31, 6}, { 21, 7}, { 11, 6}, \
{ 24, 7}, { 13, 6}, { 27, 7}, { 15, 6}, \
{ 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
{2097152,22}, {4194304,23}, {8388608,24} }
#define MUL_FFT_TABLE3_SIZE 215
-#define MUL_FFT_THRESHOLD 3712
+#define MUL_FFT_THRESHOLD 3200
-#define SQR_FFT_MODF_THRESHOLD 432 /* k = 5 */
+#define SQR_FFT_MODF_THRESHOLD 400 /* k = 5 */
#define SQR_FFT_TABLE3 \
- { { 432, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
+ { { 400, 5}, { 21, 6}, { 11, 5}, { 23, 6}, \
{ 12, 5}, { 25, 6}, { 21, 7}, { 11, 6}, \
{ 25, 7}, { 13, 6}, { 27, 7}, { 25, 8}, \
{ 13, 7}, { 28, 8}, { 15, 7}, { 32, 8}, \
{ 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
{2097152,22}, {4194304,23}, {8388608,24} }
#define SQR_FFT_TABLE3_SIZE 215
-#define SQR_FFT_THRESHOLD 3264
+#define SQR_FFT_THRESHOLD 2880
-#define MULLO_BASECASE_THRESHOLD 11
-#define MULLO_DC_THRESHOLD 0 /* never mpn_mullo_basecase */
+#define MULLO_BASECASE_THRESHOLD 0 /* always */
+#define MULLO_DC_THRESHOLD 79
#define MULLO_MUL_N_THRESHOLD 6253
-#define DC_DIV_QR_THRESHOLD 53
-#define DC_DIVAPPR_Q_THRESHOLD 151
+#define DC_DIV_QR_THRESHOLD 54
+#define DC_DIVAPPR_Q_THRESHOLD 153
#define DC_BDIV_QR_THRESHOLD 51
-#define DC_BDIV_Q_THRESHOLD 79
+#define DC_BDIV_Q_THRESHOLD 52
-#define INV_MULMOD_BNM1_THRESHOLD 82
-#define INV_NEWTON_THRESHOLD 149
-#define INV_APPR_THRESHOLD 155
+#define INV_MULMOD_BNM1_THRESHOLD 52
+#define INV_NEWTON_THRESHOLD 150
+#define INV_APPR_THRESHOLD 151
-#define BINV_NEWTON_THRESHOLD 228
-#define REDC_1_TO_REDC_2_THRESHOLD 12
-#define REDC_2_TO_REDC_N_THRESHOLD 77
+#define BINV_NEWTON_THRESHOLD 232
+#define REDC_1_TO_REDC_2_THRESHOLD 13
+#define REDC_2_TO_REDC_N_THRESHOLD 55
-#define MU_DIV_QR_THRESHOLD 1787
-#define MU_DIVAPPR_Q_THRESHOLD 1970
-#define MUPI_DIV_QR_THRESHOLD 74
-#define MU_BDIV_QR_THRESHOLD 1334
-#define MU_BDIV_Q_THRESHOLD 1652
+#define MU_DIV_QR_THRESHOLD 1499
+#define MU_DIVAPPR_Q_THRESHOLD 1620
+#define MUPI_DIV_QR_THRESHOLD 75
+#define MU_BDIV_QR_THRESHOLD 1142
+#define MU_BDIV_Q_THRESHOLD 1499
+
+#define POWM_SEC_TABLE 4,29,387,1421
#define MATRIX22_STRASSEN_THRESHOLD 17
-#define HGCD_THRESHOLD 93
-#define GCD_DC_THRESHOLD 245
-#define GCDEXT_DC_THRESHOLD 456
-#define JACOBI_BASE_METHOD 1
+#define HGCD_THRESHOLD 112
+#define HGCD_APPR_THRESHOLD 185
+#define HGCD_REDUCE_THRESHOLD 3134
+#define GCD_DC_THRESHOLD 492
+#define GCDEXT_DC_THRESHOLD 465
+#define JACOBI_BASE_METHOD 4
#define GET_STR_DC_THRESHOLD 11
-#define GET_STR_PRECOMPUTE_THRESHOLD 24
-#define SET_STR_DC_THRESHOLD 552
-#define SET_STR_PRECOMPUTE_THRESHOLD 1898
+#define GET_STR_PRECOMPUTE_THRESHOLD 25
+#define SET_STR_DC_THRESHOLD 414
+#define SET_STR_PRECOMPUTE_THRESHOLD 1945
+
+#define FAC_DSC_THRESHOLD 1517
+#define FAC_ODD_THRESHOLD 44
--- /dev/null
+dnl x86-64 mpn_popcount.
+
+dnl Copyright 2007, 2011 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+
+include(`../config.m4')
+
+MULFUNC_PROLOGUE(mpn_popcount)
+include_mpn(`x86/pentium4/sse2/popcount.asm')
dnl x86-64 mpn_add_n/mpn_sub_n optimized for Pentium 4.
-dnl Copyright 2007, 2008 Free Software Foundation, Inc.
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2007, 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
C cycles/limb
-C K8,K9: 2.8
-C K10: 2.8
-C P4: 4
-C P6-15: 3.6-5 (fluctuating)
+C AMD K8,K9 2.8
+C AMD K10 2.8
+C Intel P4 4
+C Intel core2 3.6-5 (fluctuating)
+C Intel corei ?
+C Intel atom ?
+C VIA nano ?
C INPUT PARAMETERS
define(func, mpn_sub_n)
define(func_nc, mpn_sub_nc)')
-MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc)
ASM_START()
-
TEXT
- ALIGN(16)
-
-PROLOGUE(func_nc)
- jmp L(ent)
-EPILOGUE()
-
PROLOGUE(func)
+ FUNC_ENTRY(4)
xor %r8, %r8
+IFDOS(` jmp L(ent) ')
+EPILOGUE()
+PROLOGUE(func_nc)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8 ')
L(ent): push %rbx
push %r12
L(ret): mov R32(%rbx), R32(%rax)
pop %r12
pop %rbx
+ FUNC_EXIT()
ret
EPILOGUE()
dnl AMD64 mpn_addlsh1_n, mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1),
-dnl optimized for Pentium 4.
+dnl optimised for Pentium 4.
-dnl Copyright 2008 Free Software Foundation, Inc.
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
-C cycles/limb
-C K8,K9: 3.8
-C K10: 4.8
-C P4: 5.8
-C P6-15: ?
-
-
-C INPUT PARAMETERS
-define(`rp',`%rdi')
-define(`up',`%rsi')
-define(`vp',`%rdx')
-define(`n', `%rcx')
+define(LSH, 1)
+define(RSH, 31) C 31, not 63, since we use 32-bit ops
ifdef(`OPERATION_addlsh1_n', `
- define(ADDSUB, add)
- define(func, mpn_addlsh1_n)')
+ define(ADDSUB, add)
+ define(func, mpn_addlsh1_n)')
ifdef(`OPERATION_sublsh1_n', `
- define(ADDSUB, sub)
- define(func, mpn_sublsh1_n)')
-
-MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
-
-ASM_START()
- TEXT
- ALIGN(16)
-PROLOGUE(func)
- push %rbx
- push %r12
- push %rbp
-
- mov (vp), %r9
- shl %r9
- mov 4(vp), R32(%rbp)
-
- xor R32(%rbx), R32(%rbx)
+ define(ADDSUB, sub)
+ define(func, mpn_sublsh1_n)')
- mov R32(n), R32(%rax)
- and $3, R32(%rax)
- jne L(n00) C n = 0, 4, 8, ...
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
- mov (up), %r8
- mov 8(up), %r10
- shr $31, R32(%rbp)
- ADDSUB %r9, %r8
- mov 8(vp), %r9
- lea (%rbp,%r9,2), %r9
- setc R8(%rax)
- mov 12(vp), R32(%rbp)
- lea -16(rp), rp
- jmp L(L00)
-
-L(n00): cmp $2, R32(%rax)
- jnc L(n01) C n = 1, 5, 9, ...
- mov (up), %r11
- lea -8(rp), rp
- shr $31, R32(%rbp)
- ADDSUB %r9, %r11
- setc R8(%rbx)
- dec n
- jz L(1) C jump for n = 1
- mov 8(up), %r8
- mov 8(vp), %r9
- lea (%rbp,%r9,2), %r9
- mov 12(vp), R32(%rbp)
- lea 8(up), up
- lea 8(vp), vp
- jmp L(L01)
-
-L(n01): jne L(n10) C n = 2, 6, 10, ...
- mov (up), %r12
- mov 8(up), %r11
- shr $31, R32(%rbp)
- ADDSUB %r9, %r12
- mov 8(vp), %r9
- lea (%rbp,%r9,2), %r9
- setc R8(%rax)
- mov 12(vp), R32(%rbp)
- lea 16(up), up
- lea 16(vp), vp
- jmp L(L10)
-
-L(n10): mov (up), %r10
- mov 8(up), %r12
- shr $31, R32(%rbp)
- ADDSUB %r9, %r10
- mov 8(vp), %r9
- lea (%rbp,%r9,2), %r9
- setc R8(%rbx)
- mov 12(vp), R32(%rbp)
- lea -24(rp), rp
- lea -8(up), up
- lea -8(vp), vp
- jmp L(L11)
-
-L(c0): mov $1, R8(%rbx)
- jmp L(rc0)
-L(c1): mov $1, R8(%rax)
- jmp L(rc1)
-L(c2): mov $1, R8(%rbx)
- jmp L(rc2)
-
- ALIGN(16)
-L(top): mov (up), %r8 C not on critical path
- shr $31, R32(%rbp)
- ADDSUB %r9, %r11 C not on critical path
- mov (vp), %r9
- lea (%rbp,%r9,2), %r9
- setc R8(%rbx) C save carry out
- mov 4(vp), R32(%rbp)
- mov %r12, (rp)
- ADDSUB %rax, %r11 C apply previous carry out
- jc L(c0) C jump if ripple
-L(rc0):
-L(L01): mov 8(up), %r10
- shr $31, R32(%rbp)
- ADDSUB %r9, %r8
- mov 8(vp), %r9
- lea (%rbp,%r9,2), %r9
- setc R8(%rax)
- mov 12(vp), R32(%rbp)
- mov %r11, 8(rp)
- ADDSUB %rbx, %r8
- jc L(c1)
-L(rc1):
-L(L00): mov 16(up), %r12
- shr $31, R32(%rbp)
- ADDSUB %r9, %r10
- mov 16(vp), %r9
- lea (%rbp,%r9,2), %r9
- setc R8(%rbx)
- mov 20(vp), R32(%rbp)
- mov %r8, 16(rp)
- ADDSUB %rax, %r10
- jc L(c2)
-L(rc2):
-L(L11): mov 24(up), %r11
- shr $31, R32(%rbp)
- ADDSUB %r9, %r12
- mov 24(vp), %r9
- lea (%rbp,%r9,2), %r9
- lea 32(up), up
- lea 32(vp), vp
- setc R8(%rax)
- mov -4(vp), R32(%rbp)
- mov %r10, 24(rp)
- ADDSUB %rbx, %r12
- jc L(c3)
-L(rc3): lea 32(rp), rp
-L(L10): sub $4, n
- ja L(top)
-
-L(end):
- shr $31, R32(%rbp)
- ADDSUB %r9, %r11
- setc R8(%rbx)
- mov %r12, (rp)
- ADDSUB %rax, %r11
- jnc L(1)
- mov $1, R8(%rbx)
-L(1): mov %r11, 8(rp)
- lea (%rbx,%rbp), R32(%rax)
- pop %rbp
- pop %r12
- pop %rbx
- emms
- ret
-L(c3): mov $1, R8(%rax)
- jmp L(rc3)
-EPILOGUE()
-ASM_END()
+MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
+include_mpn(`x86_64/pentium4/aorslshC_n.asm')
--- /dev/null
+dnl AMD64 mpn_addlsh2_n, mpn_sublsh2_n -- rp[] = up[] +- (vp[] << 2),
+dnl optimised for Pentium 4.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+define(LSH, 2)
+define(RSH, 30) C 30, not 62, since we use 32-bit ops
+
+ifdef(`OPERATION_addlsh2_n', `
+ define(ADDSUB, add)
+ define(func, mpn_addlsh2_n)')
+ifdef(`OPERATION_sublsh2_n', `
+ define(ADDSUB, sub)
+ define(func, mpn_sublsh2_n)')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n)
+include_mpn(`x86_64/pentium4/aorslshC_n.asm')
--- /dev/null
+dnl AMD64 mpn_addlshC_n, mpn_sublshC_n -- rp[] = up[] +- (vp[] << C), where
+dnl C is 1, 2, 3. Optimized for Pentium 4.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+C cycles/limb
+C AMD K8,K9 3.8
+C AMD K10 3.8
+C Intel P4 5.8
+C Intel core2 4.75
+C Intel corei 4.75
+C Intel atom ?
+C VIA nano 4.75
+
+
+C INPUT PARAMETERS
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`vp',`%rdx')
+define(`n', `%rcx')
+
+define(M, eval(m4_lshift(1,LSH)))
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(func)
+ FUNC_ENTRY(4)
+ push %rbx
+ push %r12
+ push %rbp
+
+ mov (vp), %r9
+ shl $LSH, %r9
+ mov 4(vp), R32(%rbp)
+
+ xor R32(%rbx), R32(%rbx)
+
+ mov R32(n), R32(%rax)
+ and $3, R32(%rax)
+ jne L(n00) C n = 0, 4, 8, ...
+
+ mov (up), %r8
+ mov 8(up), %r10
+ shr $RSH, R32(%rbp)
+ ADDSUB %r9, %r8
+ mov 8(vp), %r9
+ lea (%rbp,%r9,M), %r9
+ setc R8(%rax)
+ mov 12(vp), R32(%rbp)
+ lea -16(rp), rp
+ jmp L(L00)
+
+L(n00): cmp $2, R32(%rax)
+ jnc L(n01) C n = 1, 5, 9, ...
+ mov (up), %r11
+ lea -8(rp), rp
+ shr $RSH, R32(%rbp)
+ ADDSUB %r9, %r11
+ setc R8(%rbx)
+ dec n
+ jz L(1) C jump for n = 1
+ mov 8(up), %r8
+ mov 8(vp), %r9
+ lea (%rbp,%r9,M), %r9
+ mov 12(vp), R32(%rbp)
+ lea 8(up), up
+ lea 8(vp), vp
+ jmp L(L01)
+
+L(n01): jne L(n10) C n = 2, 6, 10, ...
+ mov (up), %r12
+ mov 8(up), %r11
+ shr $RSH, R32(%rbp)
+ ADDSUB %r9, %r12
+ mov 8(vp), %r9
+ lea (%rbp,%r9,M), %r9
+ setc R8(%rax)
+ mov 12(vp), R32(%rbp)
+ lea 16(up), up
+ lea 16(vp), vp
+ jmp L(L10)
+
+L(n10): mov (up), %r10
+ mov 8(up), %r12
+ shr $RSH, R32(%rbp)
+ ADDSUB %r9, %r10
+ mov 8(vp), %r9
+ lea (%rbp,%r9,M), %r9
+ setc R8(%rbx)
+ mov 12(vp), R32(%rbp)
+ lea -24(rp), rp
+ lea -8(up), up
+ lea -8(vp), vp
+ jmp L(L11)
+
+L(c0): mov $1, R8(%rbx)
+ jmp L(rc0)
+L(c1): mov $1, R8(%rax)
+ jmp L(rc1)
+L(c2): mov $1, R8(%rbx)
+ jmp L(rc2)
+
+ ALIGN(16)
+L(top): mov (up), %r8 C not on critical path
+ shr $RSH, R32(%rbp)
+ ADDSUB %r9, %r11 C not on critical path
+ mov (vp), %r9
+ lea (%rbp,%r9,M), %r9
+ setc R8(%rbx) C save carry out
+ mov 4(vp), R32(%rbp)
+ mov %r12, (rp)
+ ADDSUB %rax, %r11 C apply previous carry out
+ jc L(c0) C jump if ripple
+L(rc0):
+L(L01): mov 8(up), %r10
+ shr $RSH, R32(%rbp)
+ ADDSUB %r9, %r8
+ mov 8(vp), %r9
+ lea (%rbp,%r9,M), %r9
+ setc R8(%rax)
+ mov 12(vp), R32(%rbp)
+ mov %r11, 8(rp)
+ ADDSUB %rbx, %r8
+ jc L(c1)
+L(rc1):
+L(L00): mov 16(up), %r12
+ shr $RSH, R32(%rbp)
+ ADDSUB %r9, %r10
+ mov 16(vp), %r9
+ lea (%rbp,%r9,M), %r9
+ setc R8(%rbx)
+ mov 20(vp), R32(%rbp)
+ mov %r8, 16(rp)
+ ADDSUB %rax, %r10
+ jc L(c2)
+L(rc2):
+L(L11): mov 24(up), %r11
+ shr $RSH, R32(%rbp)
+ ADDSUB %r9, %r12
+ mov 24(vp), %r9
+ lea (%rbp,%r9,M), %r9
+ lea 32(up), up
+ lea 32(vp), vp
+ setc R8(%rax)
+ mov -4(vp), R32(%rbp)
+ mov %r10, 24(rp)
+ ADDSUB %rbx, %r12
+ jc L(c3)
+L(rc3): lea 32(rp), rp
+L(L10): sub $4, n
+ ja L(top)
+
+L(end):
+ shr $RSH, R32(%rbp)
+ ADDSUB %r9, %r11
+ setc R8(%rbx)
+ mov %r12, (rp)
+ ADDSUB %rax, %r11
+ jnc L(1)
+ mov $1, R8(%rbx)
+L(1): mov %r11, 8(rp)
+ lea (%rbx,%rbp), R32(%rax)
+ pop %rbp
+ pop %r12
+ pop %rbx
+ emms
+ FUNC_EXIT()
+ ret
+L(c3): mov $1, R8(%rax)
+ jmp L(rc3)
+EPILOGUE()
+ASM_END()
/* Pentium 4-64 gmp-mparam.h -- Compiler/machine parameter header file.
Copyright 1991, 1993, 1994, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
-2008, 2009, 2012 Free Software Foundation, Inc.
+2008, 2009, 2010 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
/* These routines exists for all x86_64 chips, but they are slower on Pentium4
than separate add/sub and shift. Make sure they are not really used. */
-#undef HAVE_NATIVE_mpn_rsh1add_n
-#undef HAVE_NATIVE_mpn_rsh1sub_n
+#undef HAVE_NATIVE_mpn_rsblsh1_n
+#undef HAVE_NATIVE_mpn_rsblsh2_n
+#undef HAVE_NATIVE_mpn_addlsh_n
+#undef HAVE_NATIVE_mpn_rsblsh_n
-/* 3200 MHz Pentium / 2048 Kibyte cache / socket 775 */
+/* 3400 MHz Pentium / 1024 Kibyte cache */
#define MOD_1_NORM_THRESHOLD 0 /* always */
#define MOD_1_UNNORM_THRESHOLD 0 /* always */
-#define MOD_1N_TO_MOD_1_1_THRESHOLD 6
-#define MOD_1U_TO_MOD_1_1_THRESHOLD 4
-#define MOD_1_1_TO_MOD_1_2_THRESHOLD 9
-#define MOD_1_2_TO_MOD_1_4_THRESHOLD 16
-#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 11
+#define MOD_1N_TO_MOD_1_1_THRESHOLD 4
+#define MOD_1U_TO_MOD_1_1_THRESHOLD 2
+#define MOD_1_1_TO_MOD_1_2_THRESHOLD 14
+#define MOD_1_2_TO_MOD_1_4_THRESHOLD 36
+#define PREINV_MOD_1_TO_MOD_1_THRESHOLD 8
#define USE_PREINV_DIVREM_1 1 /* native */
+#define DIV_QR_2_PI2_THRESHOLD MP_SIZE_T_MAX /* never */
#define DIVEXACT_1_THRESHOLD 0 /* always (native) */
-#define BMOD_1_TO_MOD_1_THRESHOLD 24
+#define BMOD_1_TO_MOD_1_THRESHOLD 20
-#define MUL_TOOM22_THRESHOLD 12
-#define MUL_TOOM33_THRESHOLD 81
-#define MUL_TOOM44_THRESHOLD 121
-#define MUL_TOOM6H_THRESHOLD 270
-#define MUL_TOOM8H_THRESHOLD 430
+#define MUL_TOOM22_THRESHOLD 11
+#define MUL_TOOM33_THRESHOLD 68
+#define MUL_TOOM44_THRESHOLD 120
+#define MUL_TOOM6H_THRESHOLD 157
+#define MUL_TOOM8H_THRESHOLD 236
#define MUL_TOOM32_TO_TOOM43_THRESHOLD 81
-#define MUL_TOOM32_TO_TOOM53_THRESHOLD 138
-#define MUL_TOOM42_TO_TOOM53_THRESHOLD 144
-#define MUL_TOOM42_TO_TOOM63_THRESHOLD 88
+#define MUL_TOOM32_TO_TOOM53_THRESHOLD 131
+#define MUL_TOOM42_TO_TOOM53_THRESHOLD 122
+#define MUL_TOOM42_TO_TOOM63_THRESHOLD 80
+#define MUL_TOOM43_TO_TOOM54_THRESHOLD 106
#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
-#define SQR_TOOM2_THRESHOLD 20
+#define SQR_TOOM2_THRESHOLD 18
#define SQR_TOOM3_THRESHOLD 81
-#define SQR_TOOM4_THRESHOLD 226
-#define SQR_TOOM6_THRESHOLD 303
-#define SQR_TOOM8_THRESHOLD 454
+#define SQR_TOOM4_THRESHOLD 214
+#define SQR_TOOM6_THRESHOLD 238
+#define SQR_TOOM8_THRESHOLD 430
-#define MULMOD_BNM1_THRESHOLD 9
-#define SQRMOD_BNM1_THRESHOLD 11
+#define MULMID_TOOM42_THRESHOLD 16
+#define MULMOD_BNM1_THRESHOLD 9
+#define SQRMOD_BNM1_THRESHOLD 9
-#define MUL_FFT_MODF_THRESHOLD 240 /* k = 5 */
+#define MUL_FFT_MODF_THRESHOLD 236 /* k = 5 */
#define MUL_FFT_TABLE3 \
- { { 240, 5}, { 9, 4}, { 19, 5}, { 11, 6}, \
- { 6, 5}, { 13, 6}, { 7, 5}, { 15, 6}, \
- { 8, 5}, { 17, 6}, { 9, 5}, { 19, 6}, \
- { 13, 7}, { 7, 6}, { 15, 7}, { 8, 6}, \
- { 17, 7}, { 9, 6}, { 19, 7}, { 11, 6}, \
- { 23, 7}, { 13, 8}, { 7, 7}, { 17, 8}, \
- { 9, 7}, { 21, 8}, { 11, 7}, { 23, 8}, \
- { 13, 9}, { 7, 8}, { 15, 7}, { 31, 8}, \
+ { { 236, 5}, { 9, 4}, { 19, 5}, { 13, 6}, \
+ { 9, 5}, { 19, 6}, { 13, 7}, { 7, 6}, \
+ { 15, 7}, { 8, 6}, { 17, 7}, { 9, 6}, \
+ { 19, 7}, { 11, 6}, { 23, 7}, { 13, 8}, \
+ { 7, 7}, { 17, 8}, { 9, 7}, { 21, 8}, \
+ { 11, 7}, { 23, 8}, { 13, 9}, { 7, 8}, \
{ 21, 9}, { 11, 8}, { 25,10}, { 7, 9}, \
{ 15, 8}, { 33, 9}, { 19, 8}, { 39, 9}, \
{ 23, 8}, { 47, 9}, { 27,10}, { 15, 9}, \
{ 39,10}, { 23, 9}, { 51,11}, { 15,10}, \
- { 31, 9}, { 63,10}, { 39, 9}, { 79,10}, \
- { 47,11}, { 31,10}, { 63, 9}, { 127, 8}, \
- { 255,10}, { 79, 9}, { 159,11}, { 47,10}, \
- { 95,12}, { 31,11}, { 63,10}, { 127, 9}, \
+ { 31, 9}, { 67,10}, { 39, 9}, { 79,10}, \
+ { 47, 9}, { 95,10}, { 55,11}, { 31,10}, \
+ { 79,11}, { 47, 9}, { 191,12}, { 31,11}, \
+ { 63,10}, { 127, 9}, { 255,10}, { 143, 9}, \
{ 287,11}, { 79,10}, { 159, 9}, { 319,10}, \
- { 175,11}, { 95,10}, { 191, 9}, { 383,10}, \
- { 207, 9}, { 415,11}, { 111,10}, { 223,12}, \
- { 63,11}, { 127,10}, { 255,11}, { 143,10}, \
- { 287,11}, { 159,10}, { 319,11}, { 175,12}, \
- { 95,11}, { 223,13}, { 63,12}, { 127,11}, \
- { 287,10}, { 575,12}, { 159,11}, { 319,10}, \
- { 639,11}, { 351,12}, { 191,11}, { 383,12}, \
- { 223,11}, { 447,13}, { 127,12}, { 255,11}, \
- { 511,12}, { 287,11}, { 575,12}, { 319,11}, \
- { 639,12}, { 351,13}, { 191,12}, { 415,11}, \
- { 831,12}, { 447,14}, { 127,13}, { 255,12}, \
- { 511,11}, { 1023,12}, { 543,11}, { 1087,10}, \
- { 2175,12}, { 575,13}, { 319,12}, { 639,11}, \
- { 1279,12}, { 703,11}, { 1407,13}, { 383,12}, \
- { 767,11}, { 1535,12}, { 831,11}, { 1663,13}, \
- { 447,14}, { 255,13}, { 511,12}, { 1023,11}, \
- { 2047,12}, { 1087,11}, { 2175,13}, { 575,12}, \
- { 1151,11}, { 2303,12}, { 1215,11}, { 2431,10}, \
- { 4863,13}, { 639,12}, { 1279,11}, { 2559,13}, \
- { 703,12}, { 1407,14}, { 383,13}, { 767,12}, \
- { 1535,13}, { 831,12}, { 1663,13}, { 895,15}, \
- { 255,14}, { 511,13}, { 1023,12}, { 2047,13}, \
- { 1087,12}, { 2175,13}, { 1215,12}, { 2431,11}, \
- { 4863,14}, { 639,13}, { 1407,12}, { 2815,13}, \
- { 1471,14}, { 767,13}, { 1663,14}, { 895,13}, \
- { 1791,12}, { 3583,13}, { 1919,12}, { 3839,15}, \
- { 511,14}, { 1023,13}, { 2175,14}, { 1151,13}, \
- { 2303,12}, { 4607,13}, { 2431,12}, { 4863,14}, \
- { 1279,13}, { 2687,14}, { 1407,13}, { 2815,15}, \
- { 767,14}, { 1791,13}, { 3583,14}, { 1919,13}, \
- { 3839,12}, { 7679,16}, { 511,15}, { 1023,14}, \
- { 2303,13}, { 4607,14}, { 2431,13}, { 4863,15}, \
- { 1279,14}, { 2943,13}, { 5887,15}, { 1535,14}, \
- { 3199,15}, { 1791,14}, { 3839,13}, { 7679,16}, \
- { 1023,15}, { 2047,14}, { 4351,15}, { 2303,14}, \
- { 4863,15}, { 2815,14}, { 5887,13}, { 11775,16}, \
- { 1535,15}, { 3071,14}, { 6655,15}, { 32768,16}, \
- { 65536,17}, { 131072,18}, { 262144,19}, { 524288,20}, \
- {1048576,21}, {2097152,22}, {4194304,23}, {8388608,24} }
-#define MUL_FFT_TABLE3_SIZE 224
-#define MUL_FFT_THRESHOLD 2752
-
-#define SQR_FFT_MODF_THRESHOLD 240 /* k = 5 */
+ { 175, 9}, { 351,11}, { 95,10}, { 191, 9}, \
+ { 383,10}, { 223,12}, { 63,11}, { 127,10}, \
+ { 255,11}, { 143,10}, { 287, 9}, { 575,10}, \
+ { 303,11}, { 159,10}, { 319,11}, { 175,12}, \
+ { 95,11}, { 191,10}, { 383,11}, { 223,13}, \
+ { 8192,14}, { 16384,15}, { 32768,16}, { 65536,17}, \
+ { 131072,18}, { 262144,19}, { 524288,20}, {1048576,21}, \
+ {2097152,22}, {4194304,23}, {8388608,24} }
+#define MUL_FFT_TABLE3_SIZE 91
+#define MUL_FFT_THRESHOLD 2240
+
+#define SQR_FFT_MODF_THRESHOLD 216 /* k = 5 */
#define SQR_FFT_TABLE3 \
- { { 240, 5}, { 11, 6}, { 6, 5}, { 13, 6}, \
- { 15, 7}, { 8, 6}, { 19, 7}, { 10, 6}, \
- { 21, 7}, { 13, 8}, { 7, 7}, { 21, 8}, \
- { 11, 7}, { 25, 8}, { 13, 9}, { 7, 8}, \
- { 15, 7}, { 31, 8}, { 21, 9}, { 11, 8}, \
- { 25,10}, { 7, 9}, { 15, 8}, { 33, 9}, \
- { 19, 8}, { 39, 9}, { 23, 8}, { 47, 9}, \
- { 27,10}, { 15, 9}, { 39,10}, { 23, 9}, \
- { 51,11}, { 15,10}, { 31, 9}, { 63, 8}, \
- { 127,10}, { 39, 9}, { 79,10}, { 47,11}, \
- { 31,10}, { 63, 9}, { 127, 8}, { 255,10}, \
- { 71, 9}, { 143, 7}, { 575,10}, { 79,11}, \
- { 47,12}, { 31,11}, { 63,10}, { 127, 9}, \
- { 255,10}, { 143, 9}, { 287,11}, { 79, 9}, \
- { 319,10}, { 191, 9}, { 383,10}, { 207,12}, \
+ { { 216, 5}, { 13, 6}, { 9, 5}, { 19, 6}, \
+ { 15, 7}, { 8, 6}, { 17, 7}, { 9, 6}, \
+ { 19, 7}, { 10, 6}, { 21, 7}, { 17, 8}, \
+ { 9, 7}, { 21, 8}, { 11, 7}, { 24, 8}, \
+ { 13, 9}, { 7, 8}, { 15, 7}, { 31, 8}, \
+ { 21, 9}, { 11, 8}, { 27,10}, { 7, 9}, \
+ { 15, 8}, { 33, 9}, { 19, 8}, { 39, 9}, \
+ { 23, 8}, { 47, 9}, { 27,10}, { 15, 9}, \
+ { 39,10}, { 23, 9}, { 47,11}, { 15,10}, \
+ { 31, 9}, { 63, 8}, { 127,10}, { 39, 9}, \
+ { 79,10}, { 55,11}, { 31,10}, { 63, 9}, \
+ { 127, 8}, { 255,10}, { 71, 9}, { 143, 8}, \
+ { 287,10}, { 79,11}, { 47,10}, { 95, 9}, \
+ { 191,12}, { 31,11}, { 63, 9}, { 255,10}, \
+ { 143, 9}, { 287,11}, { 79,10}, { 159, 9}, \
+ { 319,10}, { 175, 9}, { 351,11}, { 95,10}, \
+ { 191, 9}, { 383,10}, { 207,11}, { 111,12}, \
{ 63,11}, { 127,10}, { 255,11}, { 143,10}, \
- { 287, 9}, { 575,11}, { 159,10}, { 319,11}, \
- { 175,10}, { 351,12}, { 95,11}, { 191,10}, \
- { 383,11}, { 223,13}, { 63,12}, { 127,11}, \
- { 287,12}, { 159,11}, { 351,12}, { 191,11}, \
- { 383,10}, { 767,11}, { 415,12}, { 223,11}, \
- { 447,13}, { 127,12}, { 255,11}, { 511,12}, \
- { 287,11}, { 575,12}, { 319,11}, { 639,12}, \
- { 351,13}, { 191,12}, { 383,11}, { 767,12}, \
- { 415,11}, { 831,12}, { 447,14}, { 127,13}, \
- { 255,12}, { 511,11}, { 1023,12}, { 543,11}, \
- { 1087,10}, { 2175,12}, { 575,13}, { 319,12}, \
- { 639,11}, { 1279,12}, { 671,11}, { 1343,12}, \
- { 703,13}, { 383,12}, { 767,11}, { 1535,12}, \
- { 831,13}, { 447,14}, { 255,13}, { 511,12}, \
- { 1023,11}, { 2047,12}, { 1087,11}, { 2175,13}, \
- { 575,12}, { 1151,11}, { 2303,12}, { 1215,13}, \
- { 639,12}, { 1279,11}, { 2559,12}, { 1343,13}, \
- { 703,14}, { 383,13}, { 767,12}, { 1535,13}, \
- { 831,12}, { 1663,15}, { 255,14}, { 511,13}, \
- { 1023,12}, { 2047,13}, { 1087,12}, { 2175,13}, \
- { 1151,12}, { 2303,13}, { 1215,14}, { 639,13}, \
- { 1343,12}, { 2687,13}, { 1407,12}, { 2815,14}, \
- { 767,13}, { 1663,14}, { 895,13}, { 1791,12}, \
- { 3583,13}, { 1919,12}, { 3839,15}, { 511,14}, \
- { 1023,13}, { 2175,14}, { 1151,13}, { 2303,12}, \
- { 4607,13}, { 2431,12}, { 4863,14}, { 1279,13}, \
- { 2687,14}, { 1407,13}, { 2815,15}, { 767,14}, \
- { 1535,13}, { 3071,14}, { 1791,13}, { 3583,14}, \
- { 1919,13}, { 3839,12}, { 7679,16}, { 511,15}, \
- { 1023,14}, { 2175,13}, { 4351,14}, { 2303,13}, \
- { 4607,14}, { 2431,13}, { 4863,15}, { 1279,14}, \
- { 2815,13}, { 5631,14}, { 2943,13}, { 5887,12}, \
- { 11775,15}, { 1535,14}, { 3199,15}, { 1791,14}, \
- { 3583,13}, { 7167,14}, { 3839,13}, { 7679,16}, \
- { 1023,15}, { 2047,14}, { 4351,15}, { 2303,14}, \
- { 4863,15}, { 2815,14}, { 5887,13}, { 11775,16}, \
- { 1535,15}, { 3071,14}, { 6655,15}, { 3583,14}, \
- { 7167,15}, { 32768,16}, { 65536,17}, { 131072,18}, \
- { 262144,19}, { 524288,20}, {1048576,21}, {2097152,22}, \
- {4194304,23}, {8388608,24} }
-#define SQR_FFT_TABLE3_SIZE 222
-#define SQR_FFT_THRESHOLD 2240
+ { 287,11}, { 159,10}, { 319, 9}, { 639,11}, \
+ { 175,10}, { 351,12}, { 95,11}, { 207,10}, \
+ { 415,11}, { 223,13}, { 8192,14}, { 16384,15}, \
+ { 32768,16}, { 65536,17}, { 131072,18}, { 262144,19}, \
+ { 524288,20}, {1048576,21}, {2097152,22}, {4194304,23}, \
+ {8388608,24} }
+#define SQR_FFT_TABLE3_SIZE 93
+#define SQR_FFT_THRESHOLD 1984
#define MULLO_BASECASE_THRESHOLD 0 /* always */
-#define MULLO_DC_THRESHOLD 27
-#define MULLO_MUL_N_THRESHOLD 5240
+#define MULLO_DC_THRESHOLD 33
+#define MULLO_MUL_N_THRESHOLD 4392
-#define DC_DIV_QR_THRESHOLD 28
+#define DC_DIV_QR_THRESHOLD 27
#define DC_DIVAPPR_Q_THRESHOLD 60
-#define DC_BDIV_QR_THRESHOLD 31
-#define DC_BDIV_Q_THRESHOLD 49
+#define DC_BDIV_QR_THRESHOLD 27
+#define DC_BDIV_Q_THRESHOLD 38
-#define INV_MULMOD_BNM1_THRESHOLD 22
-#define INV_NEWTON_THRESHOLD 226
-#define INV_APPR_THRESHOLD 108
+#define INV_MULMOD_BNM1_THRESHOLD 20
+#define INV_NEWTON_THRESHOLD 202
+#define INV_APPR_THRESHOLD 106
-#define BINV_NEWTON_THRESHOLD 262
-#define REDC_1_TO_REDC_2_THRESHOLD 11
-#define REDC_2_TO_REDC_N_THRESHOLD 44
+#define BINV_NEWTON_THRESHOLD 198
+#define REDC_1_TO_REDC_2_THRESHOLD 16
+#define REDC_2_TO_REDC_N_THRESHOLD 43
#define MU_DIV_QR_THRESHOLD 979
-#define MU_DIVAPPR_Q_THRESHOLD 1078
-#define MUPI_DIV_QR_THRESHOLD 91
-#define MU_BDIV_QR_THRESHOLD 792
+#define MU_DIVAPPR_Q_THRESHOLD 979
+#define MUPI_DIV_QR_THRESHOLD 92
+#define MU_BDIV_QR_THRESHOLD 807
#define MU_BDIV_Q_THRESHOLD 942
-#define MATRIX22_STRASSEN_THRESHOLD 21
-#define HGCD_THRESHOLD 97
-#define GCD_DC_THRESHOLD 217
-#define GCDEXT_DC_THRESHOLD 237
-#define JACOBI_BASE_METHOD 1
+#define POWM_SEC_TABLE 6,65,192,792,2578
+
+#define MATRIX22_STRASSEN_THRESHOLD 17
+#define HGCD_THRESHOLD 99
+#define HGCD_APPR_THRESHOLD 121
+#define HGCD_REDUCE_THRESHOLD 1679
+#define GCD_DC_THRESHOLD 205
+#define GCDEXT_DC_THRESHOLD 225
+#define JACOBI_BASE_METHOD 4
+
+#define GET_STR_DC_THRESHOLD 13
+#define GET_STR_PRECOMPUTE_THRESHOLD 25
+#define SET_STR_DC_THRESHOLD 232
+#define SET_STR_PRECOMPUTE_THRESHOLD 1585
-#define GET_STR_DC_THRESHOLD 12
-#define GET_STR_PRECOMPUTE_THRESHOLD 23
-#define SET_STR_DC_THRESHOLD 572
-#define SET_STR_PRECOMPUTE_THRESHOLD 1588
+#define FAC_DSC_THRESHOLD 1127
+#define FAC_ODD_THRESHOLD 0 /* always */
dnl x86-64 mpn_lshift optimized for Pentium 4.
-dnl Copyright 2003, 2005, 2007, 2008 Free Software Foundation, Inc.
-dnl
+dnl Copyright 2003, 2005, 2007, 2008, 2012 Free Software Foundation, Inc.
+
dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or
-dnl modify it under the terms of the GNU Lesser General Public License as
-dnl published by the Free Software Foundation; either version 3 of the
-dnl License, or (at your option) any later version.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl Lesser General Public License for more details.
-dnl
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
C cycles/limb
-C K8,K9: 2.5
-C K10: ?
-C P4: 3.29
-C P6-15 (Core2): 2.1 (fluctuates, presumably cache related)
-C P6-28 (Atom): 14.3
+C AMD K8,K9 2.5
+C AMD K10 ?
+C Intel P4 3.29
+C Intel core2 2.1 (fluctuates, presumably cache related)
+C Intel corei ?
+C Intel atom 14.3
+C VIA nano ?
C INPUT PARAMETERS
define(`rp',`%rdi')
define(`n',`%rdx')
define(`cnt',`%cl')
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
ASM_START()
TEXT
ALIGN(32)
PROLOGUE(mpn_lshift)
+ FUNC_ENTRY(4)
mov -8(up,n,8), %rax
- movd %ecx, %mm4
- neg %ecx C put rsh count in cl
- and $63, %ecx
- movd %ecx, %mm5
+ movd R32(%rcx), %mm4
+ neg R32(%rcx) C put rsh count in cl
+ and $63, R32(%rcx)
+ movd R32(%rcx), %mm5
- lea 1(n), %r8d
+ lea 1(n), R32(%r8)
- shr %cl, %rax C function return value
+ shr R8(%rcx), %rax C function return value
- and $3, %r8d
+ and $3, R32(%r8)
je L(rol) C jump for n = 3, 7, 11, ...
- dec %r8d
+ dec R32(%r8)
jne L(1)
C n = 4, 8, 12, ...
movq -8(up,n,8), %mm2
dec n
jmp L(rol)
-L(1): dec %r8d
+L(1): dec R32(%r8)
je L(1x) C jump for n = 1, 5, 9, 13, ...
C n = 2, 6, 10, 16, ...
movq -8(up,n,8), %mm2
jae L(top) C 2
L(end):
- movq 16(up,n,8), %mm0
+ movq 8(up), %mm0
psrlq %mm5, %mm0
por %mm0, %mm2
- movq 8(up,n,8), %mm1
+ movq (up), %mm1
psrlq %mm5, %mm1
por %mm1, %mm3
- movq %mm2, 24(rp,n,8)
- movq %mm3, 16(rp,n,8)
+ movq %mm2, 16(rp)
+ movq %mm3, 8(rp)
L(ast): movq (up), %mm2
psllq %mm4, %mm2
movq %mm2, (rp)
emms
+ FUNC_EXIT()
ret
EPILOGUE()
--- /dev/null
+dnl x86-64 mpn_lshiftc optimized for Pentium 4.
+
+dnl Copyright 2003, 2005, 2007, 2008, 2010, 2012 Free Software Foundation,
+dnl Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb
+C AMD K8,K9 ?
+C AMD K10 ?
+C Intel P4 4.15
+C Intel core2 ?
+C Intel corei ?
+C Intel atom ?
+C VIA nano ?
+
+C INPUT PARAMETERS
+define(`rp',`%rdi')
+define(`up',`%rsi')
+define(`n',`%rdx')
+define(`cnt',`%cl')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(32)
+PROLOGUE(mpn_lshiftc)
+ FUNC_ENTRY(4)
+ mov -8(up,n,8), %rax
+ pcmpeqd %mm6, %mm6 C 0xffff...fff
+ movd R32(%rcx), %mm4
+ neg R32(%rcx) C put rsh count in cl
+ and $63, R32(%rcx)
+ movd R32(%rcx), %mm5
+
+ lea 1(n), R32(%r8)
+
+ shr R8(%rcx), %rax C function return value
+
+ and $3, R32(%r8)
+ je L(rol) C jump for n = 3, 7, 11, ...
+
+ dec R32(%r8)
+ jne L(1)
+C n = 4, 8, 12, ...
+ movq -8(up,n,8), %mm2
+ psllq %mm4, %mm2
+ movq -16(up,n,8), %mm0
+ pxor %mm6, %mm2
+ psrlq %mm5, %mm0
+ pandn %mm2, %mm0
+ movq %mm0, -8(rp,n,8)
+ dec n
+ jmp L(rol)
+
+L(1): dec R32(%r8)
+ je L(1x) C jump for n = 1, 5, 9, 13, ...
+C n = 2, 6, 10, 16, ...
+ movq -8(up,n,8), %mm2
+ psllq %mm4, %mm2
+ movq -16(up,n,8), %mm0
+ pxor %mm6, %mm2
+ psrlq %mm5, %mm0
+ pandn %mm2, %mm0
+ movq %mm0, -8(rp,n,8)
+ dec n
+L(1x):
+ cmp $1, n
+ je L(ast)
+ movq -8(up,n,8), %mm2
+ psllq %mm4, %mm2
+ movq -16(up,n,8), %mm3
+ psllq %mm4, %mm3
+ movq -16(up,n,8), %mm0
+ movq -24(up,n,8), %mm1
+ pxor %mm6, %mm2
+ psrlq %mm5, %mm0
+ pandn %mm2, %mm0
+ pxor %mm6, %mm3
+ psrlq %mm5, %mm1
+ pandn %mm3, %mm1
+ movq %mm0, -8(rp,n,8)
+ movq %mm1, -16(rp,n,8)
+ sub $2, n
+
+L(rol): movq -8(up,n,8), %mm2
+ psllq %mm4, %mm2
+ movq -16(up,n,8), %mm3
+ psllq %mm4, %mm3
+
+ sub $4, n
+ jb L(end)
+ ALIGN(32)
+L(top):
+ C finish stuff from lsh block
+ movq 16(up,n,8), %mm0
+ pxor %mm6, %mm2
+ movq 8(up,n,8), %mm1
+ psrlq %mm5, %mm0
+ psrlq %mm5, %mm1
+ pandn %mm2, %mm0
+ pxor %mm6, %mm3
+ movq %mm0, 24(rp,n,8)
+ movq (up,n,8), %mm0
+ pandn %mm3, %mm1
+ movq %mm1, 16(rp,n,8)
+ movq -8(up,n,8), %mm1
+ C start two new rsh
+ psrlq %mm5, %mm0
+ psrlq %mm5, %mm1
+
+ C finish stuff from rsh block
+ movq 8(up,n,8), %mm2
+ pxor %mm6, %mm0
+ movq (up,n,8), %mm3
+ psllq %mm4, %mm2
+ psllq %mm4, %mm3
+ pandn %mm0, %mm2
+ pxor %mm6, %mm1
+ movq %mm2, 8(rp,n,8)
+ movq -8(up,n,8), %mm2
+ pandn %mm1, %mm3
+ movq %mm3, (rp,n,8)
+ movq -16(up,n,8), %mm3
+ C start two new lsh
+ sub $4, n
+ psllq %mm4, %mm2
+ psllq %mm4, %mm3
+
+ jae L(top)
+
+L(end): pxor %mm6, %mm2
+ movq 8(up), %mm0
+ psrlq %mm5, %mm0
+ pandn %mm2, %mm0
+ pxor %mm6, %mm3
+ movq (up), %mm1
+ psrlq %mm5, %mm1
+ pandn %mm3, %mm1
+ movq %mm0, 16(rp)
+ movq %mm1, 8(rp)
+
+L(ast): movq (up), %mm2
+ psllq %mm4, %mm2
+ pxor %mm6, %mm2
+ movq %mm2, (rp)
+ emms
+ FUNC_EXIT()
+ ret
+EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_mod_34lsub1 -- remainder modulo 2^48-1.
+
+dnl Copyright 2000, 2001, 2002, 2004, 2005, 2007, 2010, 2011, 2012 Free
+dnl Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb
+C AMD K8,K9 1.0
+C AMD K10 1.12
+C Intel P4 3.25
+C Intel core2 1.5
+C Intel corei 1.5
+C Intel atom 2.5
+C VIA nano 1.75
+
+
+C INPUT PARAMETERS
+define(`ap', %rdi)
+define(`n', %rsi)
+
+C mp_limb_t mpn_mod_34lsub1 (mp_srcptr up, mp_size_t n)
+
+C TODO
+C * Review feed-in and wind-down code. In particular, try to avoid adc and
+C sbb to placate Pentium4.
+C * It seems possible to reach 2.67 c/l by using a cleaner 6-way unrolling,
+C without the dual loop exits.
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(32)
+PROLOGUE(mpn_mod_34lsub1)
+ FUNC_ENTRY(2)
+
+ mov $0x0000FFFFFFFFFFFF, %r11
+
+ sub $2, %rsi
+ ja L(gt2)
+
+ mov (ap), %rax
+ nop
+ jb L(1)
+
+ mov 8(ap), %rsi
+ mov %rax, %rdx
+ shr $48, %rax C src[0] low
+
+ and %r11, %rdx C src[0] high
+ add %rdx, %rax
+ mov R32(%rsi), R32(%rdx)
+
+ shr $32, %rsi C src[1] high
+ add %rsi, %rax
+
+ shl $16, %rdx C src[1] low
+ add %rdx, %rax
+
+L(1): FUNC_EXIT()
+ ret
+
+
+ ALIGN(16)
+L(gt2): xor R32(%rax), R32(%rax)
+ xor R32(%rcx), R32(%rcx)
+ xor R32(%rdx), R32(%rdx)
+ xor %r8, %r8
+ xor %r9, %r9
+ xor %r10, %r10
+
+L(top): add (ap), %rax
+ adc $0, %r10
+ add 8(ap), %rcx
+ adc $0, %r8
+ add 16(ap), %rdx
+ adc $0, %r9
+
+ sub $3, %rsi
+ jng L(end)
+
+ add 24(ap), %rax
+ adc $0, %r10
+ add 32(ap), %rcx
+ adc $0, %r8
+ add 40(ap), %rdx
+ lea 48(ap), ap
+ adc $0, %r9
+
+ sub $3, %rsi
+ jg L(top)
+
+
+ add $-24, ap
+L(end): add %r9, %rax
+ adc %r10, %rcx
+ adc %r8, %rdx
+
+ inc %rsi
+ mov $0x1, R32(%r10)
+ js L(combine)
+
+ mov $0x10000, R32(%r10)
+ adc 24(ap), %rax
+ dec %rsi
+ js L(combine)
+
+ adc 32(ap), %rcx
+ mov $0x100000000, %r10
+
+L(combine):
+ sbb %rsi, %rsi C carry
+ mov %rax, %rdi C 0mod3
+ shr $48, %rax C 0mod3 high
+
+ and %r10, %rsi C carry masked
+ and %r11, %rdi C 0mod3 low
+ mov R32(%rcx), R32(%r10) C 1mod3
+
+ add %rsi, %rax C apply carry
+ shr $32, %rcx C 1mod3 high
+
+ add %rdi, %rax C apply 0mod3 low
+ movzwl %dx, R32(%rdi) C 2mod3
+ shl $16, %r10 C 1mod3 low
+
+ add %rcx, %rax C apply 1mod3 high
+ shr $16, %rdx C 2mod3 high
+
+ add %r10, %rax C apply 1mod3 low
+ shl $32, %rdi C 2mod3 low
+
+ add %rdx, %rax C apply 2mod3 high
+ add %rdi, %rax C apply 2mod3 low
+
+ FUNC_EXIT()
+ ret
+EPILOGUE()
--- /dev/null
+dnl x86-64 mpn_rsh1add_n/mpn_rsh1sub_n optimized for Pentium 4.
+
+dnl Contributed to the GNU project by Torbjorn Granlund.
+
+dnl Copyright 2007, 2008, 2010, 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb
+C AMD K8,K9 4.13
+C AMD K10 4.13
+C Intel P4 5.70
+C Intel core2 4.75
+C Intel corei 5
+C Intel atom 8.75
+C VIA nano 5.25
+
+C TODO
+C * Try to make this smaller, 746 bytes seem excessive for this 2nd class
+C function. Less sw pipelining would help, and since we now probably
+C pipeline somewhat too deeply, it might not affect performance too much.
+C * A separate small-n loop might speed things as well as make things smaller.
+C That loop should be selected before pushing registers.
+
+C INPUT PARAMETERS
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`vp', `%rdx')
+define(`n', `%rcx')
+define(`cy', `%r8')
+
+ifdef(`OPERATION_rsh1add_n', `
+ define(ADDSUB, add)
+ define(func, mpn_rsh1add_n)
+ define(func_nc, mpn_rsh1add_nc)')
+ifdef(`OPERATION_rsh1sub_n', `
+ define(ADDSUB, sub)
+ define(func, mpn_rsh1sub_n)
+ define(func_nc, mpn_rsh1sub_nc)')
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1add_nc mpn_rsh1sub_n mpn_rsh1sub_nc)
+
+ASM_START()
+ TEXT
+PROLOGUE(func)
+ FUNC_ENTRY(4)
+ xor %r8, %r8
+IFDOS(` jmp L(ent) ')
+EPILOGUE()
+PROLOGUE(func_nc)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8 ')
+L(ent): push %rbx
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+
+ mov (vp), %r9
+ mov (up), %r15
+
+ mov R32(n), R32(%rax)
+ and $3, R32(%rax)
+ jne L(n00)
+
+ mov R32(%r8), R32(%rbx) C n = 0, 4, 8, ...
+ mov 8(up), %r10
+ ADDSUB %r9, %r15
+ mov 8(vp), %r9
+ setc R8(%rax)
+ ADDSUB %rbx, %r15 C return bit
+ jnc 1f
+ mov $1, R8(%rax)
+1: mov 16(up), %r12
+ ADDSUB %r9, %r10
+ mov 16(vp), %r9
+ setc R8(%rbx)
+ mov %r15, %r13
+ ADDSUB %rax, %r10
+ jnc 1f
+ mov $1, R8(%rbx)
+1: mov 24(up), %r11
+ ADDSUB %r9, %r12
+ lea 32(up), up
+ mov 24(vp), %r9
+ lea 32(vp), vp
+ setc R8(%rax)
+ mov %r10, %r14
+ shl $63, %r10
+ shr %r13
+ jmp L(L00)
+
+L(n00): cmp $2, R32(%rax)
+ jnc L(n01)
+ xor R32(%rbx), R32(%rbx) C n = 1, 5, 9, ...
+ lea -24(rp), rp
+ mov R32(%r8), R32(%rax)
+ dec n
+ jnz L(gt1)
+ ADDSUB %r9, %r15
+ setc R8(%rbx)
+ ADDSUB %rax, %r15
+ jnc 1f
+ mov $1, R8(%rbx)
+1: mov %r15, %r14
+ shl $63, %rbx
+ shr %r14
+ jmp L(cj1)
+L(gt1): mov 8(up), %r8
+ ADDSUB %r9, %r15
+ mov 8(vp), %r9
+ setc R8(%rbx)
+ ADDSUB %rax, %r15
+ jnc 1f
+ mov $1, R8(%rbx)
+1: mov 16(up), %r10
+ ADDSUB %r9, %r8
+ mov 16(vp), %r9
+ setc R8(%rax)
+ mov %r15, %r14
+ ADDSUB %rbx, %r8
+ jnc 1f
+ mov $1, R8(%rax)
+1: mov 24(up), %r12
+ ADDSUB %r9, %r10
+ mov 24(vp), %r9
+ setc R8(%rbx)
+ mov %r8, %r13
+ shl $63, %r8
+ shr %r14
+ lea 8(up), up
+ lea 8(vp), vp
+ jmp L(L01)
+
+L(n01): jne L(n10)
+ lea -16(rp), rp C n = 2, 6, 10, ...
+ mov R32(%r8), R32(%rbx)
+ mov 8(up), %r11
+ ADDSUB %r9, %r15
+ mov 8(vp), %r9
+ setc R8(%rax)
+ ADDSUB %rbx, %r15
+ jnc 1f
+ mov $1, R8(%rax)
+1: sub $2, n
+ jnz L(gt2)
+ ADDSUB %r9, %r11
+ setc R8(%rbx)
+ mov %r15, %r13
+ ADDSUB %rax, %r11
+ jnc 1f
+ mov $1, R8(%rbx)
+1: mov %r11, %r14
+ shl $63, %r11
+ shr %r13
+ jmp L(cj2)
+L(gt2): mov 16(up), %r8
+ ADDSUB %r9, %r11
+ mov 16(vp), %r9
+ setc R8(%rbx)
+ mov %r15, %r13
+ ADDSUB %rax, %r11
+ jnc 1f
+ mov $1, R8(%rbx)
+1: mov 24(up), %r10
+ ADDSUB %r9, %r8
+ mov 24(vp), %r9
+ setc R8(%rax)
+ mov %r11, %r14
+ shl $63, %r11
+ shr %r13
+ lea 16(up), up
+ lea 16(vp), vp
+ jmp L(L10)
+
+L(n10): xor R32(%rbx), R32(%rbx) C n = 3, 7, 11, ...
+ lea -8(rp), rp
+ mov R32(%r8), R32(%rax)
+ mov 8(up), %r12
+ ADDSUB %r9, %r15
+ mov 8(vp), %r9
+ setc R8(%rbx)
+ ADDSUB %rax, %r15
+ jnc 1f
+ mov $1, R8(%rbx)
+1: mov 16(up), %r11
+ ADDSUB %r9, %r12
+ mov 16(vp), %r9
+ setc R8(%rax)
+ mov %r15, %r14
+ ADDSUB %rbx, %r12
+ jnc 1f
+ mov $1, R8(%rax)
+1: sub $3, n
+ jnz L(gt3)
+ ADDSUB %r9, %r11
+ setc R8(%rbx)
+ mov %r12, %r13
+ shl $63, %r12
+ shr %r14
+ jmp L(cj3)
+L(gt3): mov 24(up), %r8
+ ADDSUB %r9, %r11
+ mov 24(vp), %r9
+ setc R8(%rbx)
+ mov %r12, %r13
+ shl $63, %r12
+ shr %r14
+ lea 24(up), up
+ lea 24(vp), vp
+ jmp L(L11)
+
+L(c0): mov $1, R8(%rbx)
+ jmp L(rc0)
+L(c1): mov $1, R8(%rax)
+ jmp L(rc1)
+L(c2): mov $1, R8(%rbx)
+ jmp L(rc2)
+
+ ALIGN(16)
+L(top): mov (up), %r8 C not on critical path
+ or %r13, %r10
+ ADDSUB %r9, %r11 C not on critical path
+ mov (vp), %r9 C not on critical path
+ setc R8(%rbx) C save carry out
+ mov %r12, %r13 C new for later
+ shl $63, %r12 C shift new right
+ shr %r14 C shift old left
+ mov %r10, (rp)
+L(L11): ADDSUB %rax, %r11 C apply previous carry out
+ jc L(c0) C jump if ripple
+L(rc0): mov 8(up), %r10
+ or %r14, %r12
+ ADDSUB %r9, %r8
+ mov 8(vp), %r9
+ setc R8(%rax)
+ mov %r11, %r14
+ shl $63, %r11
+ shr %r13
+ mov %r12, 8(rp)
+L(L10): ADDSUB %rbx, %r8
+ jc L(c1)
+L(rc1): mov 16(up), %r12
+ or %r13, %r11
+ ADDSUB %r9, %r10
+ mov 16(vp), %r9
+ setc R8(%rbx)
+ mov %r8, %r13
+ shl $63, %r8
+ shr %r14
+ mov %r11, 16(rp)
+L(L01): ADDSUB %rax, %r10
+ jc L(c2)
+L(rc2): mov 24(up), %r11
+ or %r14, %r8
+ ADDSUB %r9, %r12
+ lea 32(up), up
+ mov 24(vp), %r9
+ lea 32(vp), vp
+ setc R8(%rax)
+ mov %r10, %r14
+ shl $63, %r10
+ shr %r13
+ mov %r8, 24(rp)
+ lea 32(rp), rp
+L(L00): ADDSUB %rbx, %r12
+ jc L(c3)
+L(rc3): sub $4, n
+ ja L(top)
+
+L(end): or %r13, %r10
+ ADDSUB %r9, %r11
+ setc R8(%rbx)
+ mov %r12, %r13
+ shl $63, %r12
+ shr %r14
+ mov %r10, (rp)
+L(cj3): ADDSUB %rax, %r11
+ jnc 1f
+ mov $1, R8(%rbx)
+1: or %r14, %r12
+ mov %r11, %r14
+ shl $63, %r11
+ shr %r13
+ mov %r12, 8(rp)
+L(cj2): or %r13, %r11
+ shl $63, %rbx
+ shr %r14
+ mov %r11, 16(rp)
+L(cj1): or %r14, %rbx
+ mov %rbx, 24(rp)
+
+ mov R32(%r15), R32(%rax)
+ and $1, R32(%rax)
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbx
+ FUNC_EXIT()
+ ret
+L(c3): mov $1, R8(%rax)
+ jmp L(rc3)
+EPILOGUE()
dnl x86-64 mpn_rshift optimized for Pentium 4.
-dnl Copyright 2003, 2005, 2007, 2008 Free Software Foundation, Inc.
-dnl
+dnl Copyright 2003, 2005, 2007, 2008, 2012 Free Software Foundation, Inc.
+
dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or
-dnl modify it under the terms of the GNU Lesser General Public License as
-dnl published by the Free Software Foundation; either version 3 of the
-dnl License, or (at your option) any later version.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl Lesser General Public License for more details.
-dnl
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
C cycles/limb
-C K8,K9: 2.5
-C K10: ?
-C P4: 3.29
-C P6-15 (Core2): 2.1 (fluctuates, presumably cache related)
-C P6-28 (Atom): 14.3
+C AMD K8,K9 2.5
+C AMD K10 ?
+C Intel P4 3.29
+C Intel core2 2.1 (fluctuates, presumably cache related)
+C Intel corei ?
+C Intel atom 14.3
+C VIA nano ?
C INPUT PARAMETERS
define(`rp',`%rdi')
define(`n',`%rdx')
define(`cnt',`%cl')
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
ASM_START()
TEXT
ALIGN(32)
PROLOGUE(mpn_rshift)
+ FUNC_ENTRY(4)
mov (up), %rax
- movd %ecx, %mm4
- neg %ecx C put lsh count in cl
- and $63, %ecx
- movd %ecx, %mm5
+ movd R32(%rcx), %mm4
+ neg R32(%rcx) C put lsh count in cl
+ and $63, R32(%rcx)
+ movd R32(%rcx), %mm5
lea -8(up,n,8), up
lea -8(rp,n,8), rp
- lea 1(n), %r8d
+ lea 1(n), R32(%r8)
neg n
- shl %cl, %rax C function return value
+ shl R8(%rcx), %rax C function return value
- and $3, %r8d
+ and $3, R32(%r8)
je L(rol) C jump for n = 3, 7, 11, ...
- dec %r8d
+ dec R32(%r8)
jne L(1)
C n = 4, 8, 12, ...
movq 8(up,n,8), %mm2
inc n
jmp L(rol)
-L(1): dec %r8d
+L(1): dec R32(%r8)
je L(1x) C jump for n = 1, 5, 9, 13, ...
C n = 2, 6, 10, 16, ...
movq 8(up,n,8), %mm2
jae L(top) C 2
L(end):
- movq -16(up,n,8), %mm0
+ movq -8(up), %mm0
psllq %mm5, %mm0
por %mm0, %mm2
- movq -8(up,n,8), %mm1
+ movq (up), %mm1
psllq %mm5, %mm1
por %mm1, %mm3
- movq %mm2, -24(rp,n,8)
- movq %mm3, -16(rp,n,8)
+ movq %mm2, -16(rp)
+ movq %mm3, -8(rp)
L(ast): movq (up), %mm2
psrlq %mm4, %mm2
movq %mm2, (rp)
emms
+ FUNC_EXIT()
ret
EPILOGUE()
dnl AMD64 mpn_popcount, mpn_hamdist -- population count and hamming distance.
-dnl Copyright 2004, 2005, 2007 Free Software Foundation, Inc.
+dnl Copyright 2004, 2005, 2007, 2010, 2011, 2012 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
C popcount hamdist
C cycles/limb cycles/limb
-C K8,K9: 6 7
-C K10: 6 7
-C P4: 12 14.3
-C P6-15: 7 8
+C AMD K8,K9 6 7
+C AMD K10 6 7
+C Intel P4 12 14.3
+C Intel core2 7 8
+C Intel corei ? 7.3
+C Intel atom 16.5 17.5
+C VIA nano 8.75 10.4
C TODO
C * Tune. It should be possible to reach 5 c/l for popcount and 6 c/l for
define(`h33333333', `%r11')
define(`h0f0f0f0f', `%rcx')
define(`h01010101', `%rdx')
+ define(`POP', `$1')
define(`HAM', `dnl')
')
ifdef(`OPERATION_hamdist',`
define(`h33333333', `%r11')
define(`h0f0f0f0f', `%rcx')
define(`h01010101', `%r14')
+ define(`POP', `dnl')
define(`HAM', `$1')
')
MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist)
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
ASM_START()
TEXT
ALIGN(32)
PROLOGUE(func)
+ POP(` FUNC_ENTRY(2) ')
+ HAM(` FUNC_ENTRY(3) ')
+ push %r12
+ push %r13
+ HAM(` push %r14 ')
- pushq %r12
- pushq %r13
- HAM(` pushq %r14 ')
-
- movq $0x5555555555555555, h55555555
- movq $0x3333333333333333, h33333333
- movq $0x0f0f0f0f0f0f0f0f, h0f0f0f0f
- movq $0x0101010101010101, h01010101
+ mov $0x5555555555555555, h55555555
+ mov $0x3333333333333333, h33333333
+ mov $0x0f0f0f0f0f0f0f0f, h0f0f0f0f
+ mov $0x0101010101010101, h01010101
- leaq (up,n,8), up
- HAM(` leaq (vp,n,8), vp ')
- negq n
+ lea (up,n,8), up
+ HAM(` lea (vp,n,8), vp ')
+ neg n
- xorl %eax, %eax
+ xor R32(%rax), R32(%rax)
- btq $0, n
- jnc L(oop)
+ bt $0, R32(n)
+ jnc L(top)
- movq (up,n,8), %r8
- HAM(` xorq (vp,n,8), %r8 ')
+ mov (up,n,8), %r8
+ HAM(` xor (vp,n,8), %r8 ')
- movq %r8, %r9
- shrq %r8
- andq h55555555, %r8
- subq %r8, %r9
+ mov %r8, %r9
+ shr %r8
+ and h55555555, %r8
+ sub %r8, %r9
- movq %r9, %r8
- shrq $2, %r9
- andq h33333333, %r8
- andq h33333333, %r9
- addq %r8, %r9 C 16 4-bit fields (0..4)
+ mov %r9, %r8
+ shr $2, %r9
+ and h33333333, %r8
+ and h33333333, %r9
+ add %r8, %r9 C 16 4-bit fields (0..4)
- movq %r9, %r8
- shrq $4, %r9
- andq h0f0f0f0f, %r8
- andq h0f0f0f0f, %r9
- addq %r8, %r9 C 8 8-bit fields (0..16)
+ mov %r9, %r8
+ shr $4, %r9
+ and h0f0f0f0f, %r8
+ and h0f0f0f0f, %r9
+ add %r8, %r9 C 8 8-bit fields (0..16)
- imulq h01010101, %r9 C sum the 8 fields in high 8 bits
- shrq $56, %r9
+ imul h01010101, %r9 C sum the 8 fields in high 8 bits
+ shr $56, %r9
- addq %r9, %rax C add to total
- addq $1, n
- jz L(done)
+ mov %r9, %rax C add to total
+ add $1, n
+ jz L(end)
ALIGN(16)
-L(oop): movq (up,n,8), %r8
- movq 8(up,n,8), %r12
- HAM(` xorq (vp,n,8), %r8 ')
- HAM(` xorq 8(vp,n,8), %r12 ')
-
- movq %r8, %r9
- movq %r12, %r13
- shrq %r8
- shrq %r12
- andq h55555555, %r8
- andq h55555555, %r12
- subq %r8, %r9
- subq %r12, %r13
-
- movq %r9, %r8
- movq %r13, %r12
- shrq $2, %r9
- shrq $2, %r13
- andq h33333333, %r8
- andq h33333333, %r9
- andq h33333333, %r12
- andq h33333333, %r13
- addq %r8, %r9 C 16 4-bit fields (0..4)
- addq %r12, %r13 C 16 4-bit fields (0..4)
-
- addq %r13, %r9 C 16 4-bit fields (0..8)
- movq %r9, %r8
- shrq $4, %r9
- andq h0f0f0f0f, %r8
- andq h0f0f0f0f, %r9
- addq %r8, %r9 C 8 8-bit fields (0..16)
-
- imulq h01010101, %r9 C sum the 8 fields in high 8 bits
- shrq $56, %r9
-
- addq %r9, %rax C add to total
- addq $2, n
- jnc L(oop)
-
-L(done):
- HAM(` popq %r14 ')
- popq %r13
- popq %r12
+L(top): mov (up,n,8), %r8
+ mov 8(up,n,8), %r12
+ HAM(` xor (vp,n,8), %r8 ')
+ HAM(` xor 8(vp,n,8), %r12 ')
+
+ mov %r8, %r9
+ mov %r12, %r13
+ shr %r8
+ shr %r12
+ and h55555555, %r8
+ and h55555555, %r12
+ sub %r8, %r9
+ sub %r12, %r13
+
+ mov %r9, %r8
+ mov %r13, %r12
+ shr $2, %r9
+ shr $2, %r13
+ and h33333333, %r8
+ and h33333333, %r9
+ and h33333333, %r12
+ and h33333333, %r13
+ add %r8, %r9 C 16 4-bit fields (0..4)
+ add %r12, %r13 C 16 4-bit fields (0..4)
+
+ add %r13, %r9 C 16 4-bit fields (0..8)
+ mov %r9, %r8
+ shr $4, %r9
+ and h0f0f0f0f, %r8
+ and h0f0f0f0f, %r9
+ add %r8, %r9 C 8 8-bit fields (0..16)
+
+ imul h01010101, %r9 C sum the 8 fields in high 8 bits
+ shr $56, %r9
+
+ add %r9, %rax C add to total
+ add $2, n
+ jnc L(top)
+
+L(end):
+ HAM(` pop %r14 ')
+ pop %r13
+ pop %r12
+ FUNC_EXIT()
ret
-
EPILOGUE()
dnl AMD64 mpn_redc_1 -- Montgomery reduction with a one-limb modular inverse.
-dnl Copyright 2004, 2008 Free Software Foundation, Inc.
+dnl Copyright 2004, 2008, 2011, 2012 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
C cycles/limb
C cycles/limb
-C K8,K9: 2.5
-C K10: 2.5
-C P4: ?
-C P6-15 (Core2): 5.3
-C P6-28 (Atom): ?
+C AMD K8,K9 2.5
+C AMD K10 2.5
+C Intel P4 ?
+C Intel core2 5.3
+C Intel corei ?
+C Intel atom ?
+C VIA nano ?
C TODO
C * Handle certain sizes, e.g., 1, 2, 3, 4, 8, with single-loop code.
C The code for 1, 2, 3, 4 should perhaps be completely register based.
C * Perhaps align outer loops.
-C * The sub_n at the end leaks side-channel data. How do we fix that?
-C * Write mpn_add_n_sub_n computing R = A + B - C. It should run at 2 c/l.
C * We could software pipeline the IMUL stuff, by putting it before the
C outer loops and before the end of the outer loops. The last outer
C loop iteration would then compute an unneeded product, but it is at
C least not a stray read from up[], since it is at up[n].
-C * Can we combine both the add_n and sub_n into the loops, somehow?
+C * Make a tail call to mpn_add_n.
C INPUT PARAMETERS
-define(`rp', `%rdi')
-define(`up', `%rsi')
-define(`param_mp',`%rdx')
-define(`n', `%rcx')
-define(`invm', `%r8')
+define(`rp', `%rdi') C rcx
+define(`up', `%rsi') C rdx
+define(`mp_param',`%rdx') C r8
+define(`n', `%rcx') C r9
+define(`invm', `%r8') C stack
define(`mp', `%r13')
define(`i', `%r11')
define(`nneg', `%r12')
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
ASM_START()
TEXT
ALIGN(32)
PROLOGUE(mpn_redc_1)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8 ')
push %rbp
push %rbx
push %r12
push %r13
push %r14
- push n
- sub $8, %rsp C maintain ABI required rsp alignment
- lea (param_mp,n,8), mp C mp += n
+ lea (mp_param,n,8), mp C mp += n
lea (up,n,8), up C up += n
mov n, nneg
imul invm, %rbp
mov (mp,i,8), %rax
- xor %ebx, %ebx
+ xor R32(%rbx), R32(%rbx)
mul %rbp
add $1, i
jnz 1f
adc %rax, %r9
mov (mp,i,8), %rax
adc %rdx, %r14
-L(mi1): xor %r10d, %r10d
+L(mi1): xor R32(%r10), R32(%r10)
mul %rbp
add %r9, 8(up,i,8)
adc %rax, %r14
adc %rdx, %r10
mov 16(mp,i,8), %rax
mul %rbp
- xor %r9d, %r9d
- xor %r14d, %r14d
+ xor R32(%r9), R32(%r9)
+ xor R32(%r14), R32(%r14)
add %rbx, 24(up,i,8)
adc %rax, %r10
mov 24(mp,i,8), %rax
adc %rdx, %r9
- xor %ebx, %ebx
+ xor R32(%rbx), R32(%rbx)
mul %rbp
add $4, i
js L(lo1)
L(ed1): add %r10, (up)
adc %rax, %r9
adc %rdx, %r14
- xor %r10d, %r10d
+ xor R32(%r10), R32(%r10)
add %r9, 8(up)
adc $0, %r14
L(n1): mov %r14, 16(up,nneg,8) C up[0]
add $8, up
dec n
jnz L(o1)
-C lea (mp), mp
- lea 16(up), up
+ lea 16(up,nneg,8), up
jmp L(common)
L(b0): C lea (mp), mp
imul invm, %rbp
mov (mp,i,8), %rax
- xor %r10d, %r10d
+ xor R32(%r10), R32(%r10)
mul %rbp
mov %rax, %r14
mov %rdx, %rbx
adc %rax, %r9
mov (mp,i,8), %rax
adc %rdx, %r14
- xor %r10d, %r10d
+ xor R32(%r10), R32(%r10)
mul %rbp
add %r9, 8(up,i,8)
adc %rax, %r14
adc %rdx, %r10
mov 16(mp,i,8), %rax
mul %rbp
- xor %r9d, %r9d
- xor %r14d, %r14d
+ xor R32(%r9), R32(%r9)
+ xor R32(%r14), R32(%r14)
add %rbx, 24(up,i,8)
adc %rax, %r10
mov 24(mp,i,8), %rax
adc %rdx, %r9
- xor %ebx, %ebx
+ xor R32(%rbx), R32(%rbx)
mul %rbp
add $4, i
js L(lo0)
L(ed0): add %r10, (up)
adc %rax, %r9
adc %rdx, %r14
- xor %r10d, %r10d
+ xor R32(%r10), R32(%r10)
add %r9, 8(up)
adc $0, %r14
mov %r14, 16(up,nneg,8) C up[0]
add $8, up
dec n
jnz L(o0)
-C lea (mp), mp
- lea 16(up), up
+ lea 16(up,nneg,8), up
jmp L(common)
-
L(b3): lea -8(mp), mp
lea -24(up), up
L(o3): mov nneg, i
adc %rax, %r9
mov (mp,i,8), %rax
adc %rdx, %r14
- xor %r10d, %r10d
+ xor R32(%r10), R32(%r10)
mul %rbp
add %r9, 8(up,i,8)
adc %rax, %r14
adc %rdx, %r10
L(mi3): mov 16(mp,i,8), %rax
mul %rbp
- xor %r9d, %r9d
- xor %r14d, %r14d
+ xor R32(%r9), R32(%r9)
+ xor R32(%r14), R32(%r14)
add %rbx, 24(up,i,8)
adc %rax, %r10
mov 24(mp,i,8), %rax
adc %rdx, %r9
- xor %ebx, %ebx
+ xor R32(%rbx), R32(%rbx)
mul %rbp
add $4, i
js L(lo3)
L(ed3): add %r10, 8(up)
adc %rax, %r9
adc %rdx, %r14
- xor %r10d, %r10d
+ xor R32(%r10), R32(%r10)
add %r9, 16(up)
adc $0, %r14
mov %r14, 24(up,nneg,8) C up[0]
add $8, up
dec n
jnz L(o3)
- lea 8(mp), mp
- lea 24(up), up
+ lea 24(up,nneg,8), up
jmp L(common)
L(b2): lea -16(mp), mp
mov 16(mp,i,8), %rax
mul %rbp
- xor %r14d, %r14d
+ xor R32(%r14), R32(%r14)
mov %rax, %r10
mov 24(mp,i,8), %rax
mov %rdx, %r9
adc %rax, %r9
mov (mp,i,8), %rax
adc %rdx, %r14
- xor %r10d, %r10d
+ xor R32(%r10), R32(%r10)
mul %rbp
add %r9, 8(up,i,8)
adc %rax, %r14
adc %rdx, %r10
mov 16(mp,i,8), %rax
mul %rbp
- xor %r9d, %r9d
- xor %r14d, %r14d
+ xor R32(%r9), R32(%r9)
+ xor R32(%r14), R32(%r14)
add %rbx, 24(up,i,8)
adc %rax, %r10
mov 24(mp,i,8), %rax
adc %rdx, %r9
-L(mi2): xor %ebx, %ebx
+L(mi2): xor R32(%rbx), R32(%rbx)
mul %rbp
add $4, i
js L(lo2)
L(ed2): add %r10, 16(up)
adc %rax, %r9
adc %rdx, %r14
- xor %r10d, %r10d
+ xor R32(%r10), R32(%r10)
add %r9, 24(up)
adc $0, %r14
mov %r14, 32(up,nneg,8) C up[0]
add $8, up
dec n
jnz L(o2)
- lea 16(mp), mp
- lea 32(up), up
+ lea 32(up,nneg,8), up
L(common):
- lea (mp,nneg,8), mp C restore entry mp
C cy = mpn_add_n (rp, up, up - n, n);
-C rdi rsi rdx rcx
- lea (up,nneg,8), up C up -= n
- lea (up,nneg,8), %rdx C rdx = up - n [up entry value]
- mov rp, nneg C preserve rp over first call
- mov 8(%rsp), %rcx C pass entry n
-C mov rp, %rdi
+C rdi rsi rdx rcx STD
+C rcx rdx r8 r9 DOS
+
+IFSTD(` lea (up,nneg,8), %rdx ') C rdx = up - n [up entry value]
+IFSTD(` mov nneg, %rcx ')
+IFSTD(` neg %rcx ') C rcx = -nneg = n
+
+IFDOS(` lea (up,nneg,8), %r8 ') C r8 = up - n
+IFDOS(` mov up, %rdx ') C rdx = up
+IFDOS(` mov nneg, %r9 ')
+IFDOS(` neg %r9 ') C r9 = -nneg = n
+IFDOS(` mov rp, %rcx ') C rcx = rp
+
CALL( mpn_add_n)
- test R32(%rax), R32(%rax)
- jz L(ret)
-
-C mpn_sub_n (rp, rp, mp, n);
-C rdi rsi rdx rcx
- mov nneg, %rdi
- mov nneg, %rsi
- mov mp, %rdx
- mov 8(%rsp), %rcx C pass entry n
- CALL( mpn_sub_n)
-
-L(ret):
- add $8, %rsp
- pop n C just increment rsp
+
pop %r14
pop %r13
pop %r12
pop %rbx
pop %rbp
+ FUNC_EXIT()
ret
EPILOGUE()
dnl AMD64 mpn_rsh1add_n -- rp[] = (up[] + vp[]) >> 1
+dnl AMD64 mpn_rsh1sub_n -- rp[] = (up[] - vp[]) >> 1
-dnl Copyright 2003, 2005, 2009 Free Software Foundation, Inc.
+dnl Copyright 2003, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
include(`../config.m4')
-
C cycles/limb
-C K8,K9: 2.14 (mpn_add_n + mpn_rshift need 4.125)
-C K10: 2.14 (mpn_add_n + mpn_rshift need 4.125)
-C P4: 12.75
-C P6-15: 3.75
+C AMD K8,K9 2.14 (mpn_add_n + mpn_rshift need 4.125)
+C AMD K10 2.14 (mpn_add_n + mpn_rshift need 4.125)
+C Intel P4 12.75
+C Intel core2 3.75
+C Intel NMH 4.4
+C Intel SBR ?
+C Intel atom ?
+C VIA nano 3.25
C TODO
C * Rewrite to use indexed addressing, like addlsh1.asm and sublsh1.asm.
-C * Try to approach the cache bandwidth 1.5 c/l. It should be possible.
C INPUT PARAMETERS
-define(`rp',`%rdi')
-define(`up',`%rsi')
-define(`vp',`%rdx')
-define(`n',`%rcx')
-define(`n32',`%ecx')
+define(`rp', `%rdi')
+define(`up', `%rsi')
+define(`vp', `%rdx')
+define(`n',` %rcx')
ifdef(`OPERATION_rsh1add_n', `
define(ADDSUB, add)
MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1add_nc mpn_rsh1sub_n mpn_rsh1sub_nc)
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
ASM_START()
TEXT
-
ALIGN(16)
PROLOGUE(func_nc)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8 ')
push %rbx
- xor %eax, %eax
+ xor R32(%rax), R32(%rax)
neg %r8 C set C flag from parameter
mov (up), %rbx
ADCSBB (vp), %rbx
ALIGN(16)
PROLOGUE(func_n)
+ FUNC_ENTRY(4)
push %rbx
- xor %eax, %eax
+ xor R32(%rax), R32(%rax)
mov (up), %rbx
ADDSUB (vp), %rbx
L(ent):
rcr %rbx C rotate, save acy
- adc %eax, %eax C return value
+ adc R32(%rax), R32(%rax) C return value
- mov n32, R32(%r11)
+ mov R32(n), R32(%r11)
and $3, R32(%r11)
cmp $1, R32(%r11)
L(end): mov %rbx, (rp)
pop %rbx
+ FUNC_EXIT()
ret
EPILOGUE()
dnl AMD64 mpn_rshift -- mpn right shift.
-dnl Copyright 2003, 2005, 2009 Free Software Foundation, Inc.
-dnl
+dnl Copyright 2003, 2005, 2009, 2011, 2012 Free Software Foundation, Inc.
+
dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or
-dnl modify it under the terms of the GNU Lesser General Public License as
-dnl published by the Free Software Foundation; either version 3 of the
-dnl License, or (at your option) any later version.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl Lesser General Public License for more details.
-dnl
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
C cycles/limb
-C K8,K9: 2.375
-C K10: 2.375
-C P4: 8
-C P6-15 (Core2): 2.11
-C P6-28 (Atom): 5.75
+C AMD K8,K9 2.375
+C AMD K10 2.375
+C Intel P4 8
+C Intel core2 2.11
+C Intel corei ?
+C Intel atom 5.75
+C VIA nano 3.5
C INPUT PARAMETERS
define(`n', `%rdx')
define(`cnt', `%rcx')
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
ASM_START()
TEXT
ALIGN(32)
PROLOGUE(mpn_rshift)
+ FUNC_ENTRY(4)
neg R32(%rcx) C put rsh count in cl
mov (up), %rax
shl R8(%rcx), %rax C function return value
L(ast): mov (up), %r10
shr R8(%rcx), %r10
mov %r10, (rp)
+ FUNC_EXIT()
ret
EPILOGUE()
dnl Contributed to the GNU project by Torbjorn Granlund.
-dnl Copyright 2008, 2009 Free Software Foundation, Inc.
+dnl Copyright 2008, 2009, 2011, 2012 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
C optimization tool suite written by David Harvey and Torbjorn Granlund.
C NOTES
-C * This code only handles operands up to SQR_TOOM2_THRESHOLD_MAX. That
-C means we can safely use 32-bit operations for all sizes, unlike in e.g.,
-C mpn_addmul_1.
+C * There is a major stupidity in that we call mpn_mul_1 initially, for a
+C large trip count. Instead, we should follow the generic/sqr_basecase.c
+C code which uses addmul_2s from the start, conditionally leaving a 1x1
+C multiply to the end. (In assembly code, one would stop invoking
+C addmul_2s loops when perhaps 3x2s respectively a 2x2s remains.)
+C * Another stupidity is in the sqr_diag_addlsh1 code. It does not need to
+C save/restore carry, instead it can propagate into the high product word.
+C * Align more labels, should shave off a few cycles.
+C * We can safely use 32-bit size operations, since operands with (2^32)
+C limbs will lead to non-termination in practice.
C * The jump table could probably be optimized, at least for non-pic.
-C * The special code for n=1,2,3 was quickly written. It is probably too
+C * The special code for n <= 4 was quickly written. It is probably too
C large and unnecessarily slow.
-C * Consider combining small cases code so that the n=k-1 code jumps into
-C the middle of the n=k code.
+C * Consider combining small cases code so that the n=k-1 code jumps into the
+C middle of the n=k code.
C * Avoid saving registers for small cases code.
C * Needed variables:
C n r11 input size
define(`up', `%rsi')
define(`n_param', `%rdx')
-C We should really trim this, for better spatial locality. Alternatively,
-C we could grab the upper part of the stack area, leaving the lower part
-C instead of the upper part unused.
-deflit(SQR_TOOM2_THRESHOLD_MAX, 80)
-define(`STACK_ALLOC', eval(8*2*SQR_TOOM2_THRESHOLD_MAX))
-
define(`n', `%r11')
define(`tp', `%r12')
define(`i', `%r8')
define(`w2', `%rbp')
define(`w3', `%r10')
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
ASM_START()
TEXT
ALIGN(16)
-
PROLOGUE(mpn_sqr_basecase)
- add $-48, %rsp
- mov %rbx, 40(%rsp)
- mov %rbp, 32(%rsp)
- mov %r12, 24(%rsp)
- mov %r13, 16(%rsp)
- mov %r14, 8(%rsp)
-
- mov R32(n_param), R32(n) C free original n register (rdx)
+ FUNC_ENTRY(3)
mov R32(n_param), R32(%rcx)
+ mov R32(n_param), R32(n) C free original n register (rdx)
+
+ add $-40, %rsp
+
and $3, R32(%rcx)
- lea 4(%rcx), %rbx
cmp $4, R32(n_param)
- cmovg %rbx, %rcx
- lea L(jmptab)(%rip), %rax
+ lea 4(%rcx), %r8
+
+ mov %rbx, 32(%rsp)
+ mov %rbp, 24(%rsp)
+ mov %r12, 16(%rsp)
+ mov %r13, 8(%rsp)
+ mov %r14, (%rsp)
+
+ cmovg %r8, %rcx
+
+ lea L(tab)(%rip), %rax
+ifdef(`PIC',
+` movslq (%rax,%rcx,4), %r10
+ add %r10, %rax
+ jmp *%rax
+',`
jmp *(%rax,%rcx,8)
+')
JUMPTABSECT
ALIGN(8)
-L(jmptab):
- .quad L(4)
- .quad L(1)
- .quad L(2)
- .quad L(3)
- .quad L(0m4)
- .quad L(1m4)
- .quad L(2m4)
- .quad L(3m4)
+L(tab): JMPENT( L(4), L(tab))
+ JMPENT( L(1), L(tab))
+ JMPENT( L(2), L(tab))
+ JMPENT( L(3), L(tab))
+ JMPENT( L(0m4), L(tab))
+ JMPENT( L(1m4), L(tab))
+ JMPENT( L(2m4), L(tab))
+ JMPENT( L(3m4), L(tab))
TEXT
L(1): mov (up), %rax
mul %rax
+ add $40, %rsp
mov %rax, (rp)
mov %rdx, 8(rp)
- add $40, %rsp
- pop %rbx
+ FUNC_EXIT()
ret
L(2): mov (up), %rax
+ mov %rax, %r8
mul %rax
+ mov 8(up), %r11
mov %rax, (rp)
+ mov %r11, %rax
mov %rdx, %r9
- mov 8(up), %rax
mul %rax
+ add $40, %rsp
mov %rax, %r10
+ mov %r11, %rax
mov %rdx, %r11
- mov 8(up), %rax
- mov (up), %rbx
- mul %rbx
+ mul %r8
+ xor %r8, %r8
add %rax, %r9
adc %rdx, %r10
- adc $0, %r11
+ adc %r8, %r11
add %rax, %r9
mov %r9, 8(rp)
adc %rdx, %r10
mov %r10, 16(rp)
- adc $0, %r11
+ adc %r8, %r11
mov %r11, 24(rp)
- add $40, %rsp
- pop %rbx
+ FUNC_EXIT()
ret
L(3): mov (up), %rax
+ mov %rax, %r10
mul %rax
+ mov 8(up), %r11
mov %rax, (rp)
+ mov %r11, %rax
mov %rdx, 8(rp)
- mov 8(up), %rax
mul %rax
+ mov 16(up), %rcx
mov %rax, 16(rp)
+ mov %rcx, %rax
mov %rdx, 24(rp)
- mov 16(up), %rax
mul %rax
mov %rax, 32(rp)
mov %rdx, 40(rp)
- mov (up), %rbx
- mov 8(up), %rax
- mul %rbx
+ mov %r11, %rax
+ mul %r10
mov %rax, %r8
+ mov %rcx, %rax
mov %rdx, %r9
- mov 16(up), %rax
- mul %rbx
- xor R32(%r10), R32(%r10)
+ mul %r10
+ xor %r10, %r10
add %rax, %r9
+ mov %r11, %rax
+ mov %r10, %r11
adc %rdx, %r10
- mov 8(up), %rbx
- mov 16(up), %rax
- mul %rbx
- xor R32(%r11), R32(%r11)
+ mul %rcx
+ add $40, %rsp
add %rax, %r10
- adc %rdx, %r11
+ adc %r11, %rdx
add %r8, %r8
adc %r9, %r9
adc %r10, %r10
+ adc %rdx, %rdx
adc %r11, %r11
- mov $0, R32(%rbx)
- adc %rbx, %rbx
add %r8, 8(rp)
adc %r9, 16(rp)
adc %r10, 24(rp)
- adc %r11, 32(rp)
- adc %rbx, 40(rp)
- add $40, %rsp
- pop %rbx
+ adc %rdx, 32(rp)
+ adc %r11, 40(rp)
+ FUNC_EXIT()
ret
L(4): mov (up), %rax
+ mov %rax, %r11
mul %rax
+ mov 8(up), %rbx
mov %rax, (rp)
+ mov %rbx, %rax
mov %rdx, 8(rp)
- mov 8(up), %rax
mul %rax
mov %rax, 16(rp)
mov %rdx, 24(rp)
mov 24(up), %rax
mul %rax
mov %rax, 48(rp)
+ mov %rbx, %rax
mov %rdx, 56(rp)
- mov (up), %rbx
- mov 8(up), %rax
- mul %rbx
+ mul %r11
+ add $32, %rsp
mov %rax, %r8
mov %rdx, %r9
mov 16(up), %rax
- mul %rbx
- xor R32(%r10), R32(%r10)
+ mul %r11
+ xor %r10, %r10
add %rax, %r9
adc %rdx, %r10
mov 24(up), %rax
- mul %rbx
- xor R32(%r11), R32(%r11)
+ mul %r11
+ xor %r11, %r11
add %rax, %r10
adc %rdx, %r11
- mov 8(up), %rbx
mov 16(up), %rax
mul %rbx
- xor R32(%r12), R32(%r12)
+ xor %rcx, %rcx
add %rax, %r10
adc %rdx, %r11
- adc $0, %r12
+ adc $0, %rcx
mov 24(up), %rax
mul %rbx
+ pop %rbx
add %rax, %r11
- adc %rdx, %r12
- mov 16(up), %rbx
+ adc %rdx, %rcx
+ mov 16(up), %rdx
mov 24(up), %rax
- mul %rbx
- xor R32(%rbp), R32(%rbp)
- add %rax, %r12
- adc %rdx, %rbp
+ mul %rdx
+ add %rax, %rcx
+ adc $0, %rdx
add %r8, %r8
adc %r9, %r9
adc %r10, %r10
adc %r11, %r11
- adc %r12, %r12
- mov $0, R32(%rbx)
- adc %rbp, %rbp
+ adc %rcx, %rcx
+ mov $0, R32(%rax)
+ adc %rdx, %rdx
- adc %rbx, %rbx
+ adc %rax, %rax
add %r8, 8(rp)
adc %r9, 16(rp)
adc %r10, 24(rp)
adc %r11, 32(rp)
- adc %r12, 40(rp)
- adc %rbp, 48(rp)
- adc %rbx, 56(rp)
- add $24, %rsp
- pop %r12
- pop %rbp
- pop %rbx
+ adc %rcx, 40(rp)
+ adc %rdx, 48(rp)
+ adc %rax, 56(rp)
+ FUNC_EXIT()
ret
-L(0m4): add $-STACK_ALLOC, %rsp
- lea -24(%rsp,n,8), tp C point tp in middle of result operand
+L(0m4):
+ lea -16(rp,n,8), tp C point tp in middle of result operand
mov (up), v0
mov 8(up), %rax
lea (up,n,8), up C point up at end of input operand
jmp L(dowhile)
-L(1m4): add $-STACK_ALLOC, %rsp
- lea (%rsp,n,8), tp C point tp in middle of result operand
+L(1m4):
+ lea 8(rp,n,8), tp C point tp in middle of result operand
mov (up), v0 C u0
mov 8(up), %rax C u1
lea 8(up,n,8), up C point up at end of input operand
mul v0 C u0 * u1
mov %rdx, w1
xor R32(w2), R32(w2)
- mov %rax, (%rsp)
+ mov %rax, 8(rp)
jmp L(m0)
ALIGN(16)
jmp L(dowhile_end)
-L(2m4): add $-STACK_ALLOC, %rsp
- lea -24(%rsp,n,8), tp C point tp in middle of result operand
+L(2m4):
+ lea -16(rp,n,8), tp C point tp in middle of result operand
mov (up), v0
mov 8(up), %rax
lea (up,n,8), up C point up at end of input operand
jmp L(dowhile_mid)
-L(3m4): add $-STACK_ALLOC, %rsp
- lea (%rsp,n,8), tp C point tp in middle of result operand
+L(3m4):
+ lea 8(rp,n,8), tp C point tp in middle of result operand
mov (up), v0 C u0
mov 8(up), %rax C u1
lea 8(up,n,8), up C point up at end of input operand
mov %rdx, w3
xor R32(w0), R32(w0)
xor R32(w1), R32(w1)
- mov %rax, (%rsp)
+ mov %rax, 8(rp)
jmp L(m2)
ALIGN(16)
C Function mpn_sqr_diag_addlsh1
lea -4(n,n), j
- mov (%rsp), %r11
-
- lea (rp,j,8), rp
+ mov 8(rp), %r11
lea -8(up), up
- lea 8(%rsp,j,8), tp
+ lea (rp,j,8), rp
neg j
mov (up,j,4), %rax
mul %rax
adc %rdx, %r11
mov %r10, (rp,j,8)
L(d0): mov %r11, 8(rp,j,8)
- mov (tp,j,8), %r10
+ mov 16(rp,j,8), %r10
adc %r10, %r10
- mov 8(tp,j,8), %r11
+ mov 24(rp,j,8), %r11
adc %r11, %r11
nop
sbb R32(%rbp), R32(%rbp) C save CF
adc %rdx, %r11
mov %r10, 16(rp,j,8)
L(d1): mov %r11, 24(rp,j,8)
- mov 16(tp,j,8), %r10
+ mov 32(rp,j,8), %r10
adc %r10, %r10
- mov 24(tp,j,8), %r11
+ mov 40(rp,j,8), %r11
adc %r11, %r11
sbb R32(%rbx), R32(%rbx) C save CF
add $4, j
adc %rdx, %r11
mov %r10, (rp)
mov %r11, 8(rp)
- mov (tp), %r10
+ mov 16(rp), %r10
adc %r10, %r10
sbb R32(%rbp), R32(%rbp) C save CF
neg R32(%rbp)
mov %r10, 16(rp)
mov %rdx, 24(rp)
- add $eval(8+STACK_ALLOC), %rsp
pop %r14
pop %r13
pop %r12
pop %rbp
pop %rbx
+ FUNC_EXIT()
ret
EPILOGUE()
dnl AMD64 mpn_sublsh1_n -- rp[] = up[] - (vp[] << 1)
-dnl Copyright 2003, 2005, 2006, 2007 Free Software Foundation, Inc.
+dnl Copyright 2003, 2005, 2006, 2007, 2011, 2012 Free Software Foundation,
+dnl Inc.
dnl This file is part of the GNU MP Library.
C cycles/limb
-C K8,K9: 2.2
-C K10: 2.2
-C P4: 12.75
-C P6 core2: 3.45
-C P6 corei7: 3.45
-C P6 atom: ?
-
+C AMD K8,K9 2.2
+C AMD K10 2.2
+C Intel P4 12.75
+C Intel core2 3.45
+C Intel corei ?
+C Intel atom ?
+C VIA nano 3.25
C Sometimes speed degenerates, supposedly related to that some operand
C alignments cause cache conflicts.
define(`vp',`%rdx')
define(`n', `%rcx')
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
ASM_START()
TEXT
ALIGN(16)
PROLOGUE(mpn_sublsh1_n)
+ FUNC_ENTRY(4)
push %rbx
push %rbp
pop %rbp
pop %rbx
+ FUNC_EXIT()
ret
EPILOGUE()
--- /dev/null
+dnl AMD64 mpn_tabselect.
+
+dnl Copyright 2011, 2012 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library.
+
+dnl The GNU MP Library is free software; you can redistribute it and/or modify
+dnl it under the terms of the GNU Lesser General Public License as published
+dnl by the Free Software Foundation; either version 3 of the License, or (at
+dnl your option) any later version.
+
+dnl The GNU MP Library is distributed in the hope that it will be useful, but
+dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+dnl License for more details.
+
+dnl You should have received a copy of the GNU Lesser General Public License
+dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+include(`../config.m4')
+
+
+C cycles/limb
+C AMD K8,K9 2.5
+C AMD K10 2.5
+C AMD bobcat 3.5
+C Intel P4 4
+C Intel core2 2.33
+C Intel NHM 2.5
+C Intel SBR 2.2
+C Intel atom 5
+C VIA nano 3.5
+
+C NOTES
+C * This has not been tuned for any specific processor. Its speed should not
+C be too bad, though.
+C * Using SSE2/AVX2 could result in many-fold speedup.
+
+C mpn_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
+define(`rp', `%rdi')
+define(`tp', `%rsi')
+define(`n', `%rdx')
+define(`nents', `%rcx')
+define(`which', `%r8')
+
+define(`i', `%rbp')
+define(`maskp', `%r11')
+define(`maskn', `%r12')
+
+C rax rbx rcx rdx rdi rsi rbp (rsp) r8 r9 r10 r11 r12 r13 r14 r15
+C nents n rp tab which
+
+ABI_SUPPORT(DOS64)
+ABI_SUPPORT(STD64)
+
+ASM_START()
+ TEXT
+ ALIGN(16)
+PROLOGUE(mpn_tabselect)
+ FUNC_ENTRY(4)
+IFDOS(` mov 56(%rsp), %r8d ')
+ push %rbx
+ push %rbp
+ push %r12
+
+ lea (rp,n,8), rp
+ lea (tp,n,8), tp
+ sub nents, which
+L(outer):
+ lea (which,nents), %rax
+ neg %rax C set CF iff 'which' != k
+ sbb maskn, maskn
+ mov maskn, maskp
+ not maskp
+
+ mov n, i
+ neg i
+ test $1, R32(n)
+ je L(top)
+ mov (tp,i,8), %rax
+ and maskp, %rax
+ mov (rp,i,8), %r9
+ and maskn, %r9
+ or %r9, %rax
+ mov %rax, (rp,i,8)
+ add $1, i
+ jns L(end)
+
+ ALIGN(16)
+L(top): mov (tp,i,8), %rax
+ mov 8(tp,i,8), %rbx
+ and maskp, %rax
+ and maskp, %rbx
+ mov (rp,i,8), %r9
+ mov 8(rp,i,8), %r10
+ and maskn, %r9
+ and maskn, %r10
+ or %r9, %rax
+ or %r10, %rbx
+ mov %rax, (rp,i,8)
+ mov %rbx, 8(rp,i,8)
+ add $2, i
+ js L(top)
+
+L(end): lea (tp,n,8), tp
+ dec nents
+ jne L(outer)
+
+L(outer_end):
+ pop %r12
+ pop %rbp
+ pop %rbx
+ FUNC_EXIT()
+ ret
+EPILOGUE()
dnl m4 macros for amd64 assembler.
-dnl Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009 Free
-dnl Software Foundation, Inc.
+dnl Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009, 2011,
+dnl 2012, 2013 Free Software Foundation, Inc.
dnl
dnl This file is part of the GNU MP Library.
dnl
define(CPUVEC_FUNCS_LIST,
``add_n',
+`addlsh1_n',
+`addlsh2_n',
`addmul_1',
+`addmul_2',
+`bdiv_dbm1c',
+`com',
`copyd',
`copyi',
`divexact_1',
-`divexact_by3c',
`divrem_1',
`gcd_1',
`lshift',
+`lshiftc',
`mod_1',
+`mod_1_1p',
+`mod_1_1p_cps',
+`mod_1s_2p',
+`mod_1s_2p_cps',
+`mod_1s_4p',
+`mod_1s_4p_cps',
`mod_34lsub1',
`modexact_1c_odd',
`mul_1',
`mul_basecase',
+`mullo_basecase',
`preinv_divrem_1',
`preinv_mod_1',
+`redc_1',
+`redc_2',
`rshift',
`sqr_basecase',
`sub_n',
+`sublsh1_n',
`submul_1'')
define(ASSERT_counter,1)
-define(`LEA',`
- mov $1@GOTPCREL(%rip), $2
+define(`LEA',`dnl
+ifdef(`PIC',
+ `mov $1@GOTPCREL(%rip), $2'
+,
+ `movabs `$'$1, $2')
')
define(`JUMPTABSECT', `.section .data.rel.ro.local,"aw",@progbits')
+
+dnl Usage: JMPENT(targlabel,tablabel)
+
+define(`JMPENT',`dnl
+ifdef(`PIC',
+ `.long $1-$2'
+,
+ `.quad $1'
+)')
+
+
+dnl These macros are defined just for DOS64, where they provide calling
+dnl sequence glue code.
+
+define(`FUNC_ENTRY',`')
+define(`FUNC_EXIT',`')
+
+
+dnl Target ABI macros.
+
+define(`IFDOS', `')
+define(`IFSTD', `$1')
+define(`IFELF', `$1')
+
+
+dnl Usage: PROTECT(symbol)
+dnl
+dnl Used for private GMP symbols that should never be overridden by users.
+dnl This can save reloc entries and improve shlib sharing as well as
+dnl application startup times
+
+define(`PROTECT', `.hidden $1')
+
+
+dnl Usage: x86_lookup(target, key,value, key,value, ...)
+dnl
+dnl Look for `target' among the `key' parameters.
+dnl
+dnl x86_lookup expands to the corresponding `value', or generates an error
+dnl if `target' isn't found.
+
+define(x86_lookup,
+m4_assert_numargs_range(1,999)
+`ifelse(eval($#<3),1,
+`m4_error(`unrecognised part of x86 instruction: $1
+')',
+`ifelse(`$1',`$2', `$3',
+`x86_lookup(`$1',shift(shift(shift($@))))')')')
+
+
+dnl Usage: x86_opcode_regxmm(reg)
+dnl
+dnl Validate the given xmm register, and return its number, 0 to 7.
+
+define(x86_opcode_regxmm,
+m4_assert_numargs(1)
+`x86_lookup(`$1',x86_opcode_regxmm_list)')
+
+define(x86_opcode_regxmm_list,
+``%xmm0',0,
+`%xmm1',1,
+`%xmm2',2,
+`%xmm3',3,
+`%xmm4',4,
+`%xmm5',5,
+`%xmm6',6,
+`%xmm7',7,
+`%xmm8',8,
+`%xmm9',9,
+`%xmm10',10,
+`%xmm11',11,
+`%xmm12',12,
+`%xmm13',13,
+`%xmm14',14,
+`%xmm15',15')
+
+dnl Usage: palignr($imm,%srcreg,%dstreg)
+dnl
+dnl Emit a palignr instruction, using a .byte sequence, since obsolete but
+dnl still distributed versions of gas don't know SSSE3 instructions.
+
+define(`palignr',
+m4_assert_numargs(3)
+`.byte 0x66,dnl
+ifelse(eval(x86_opcode_regxmm($3) >= 8 || x86_opcode_regxmm($2) >= 8),1,
+ `eval(0x40+x86_opcode_regxmm($3)/8*4+x86_opcode_regxmm($2)/8),')dnl
+0x0f,0x3a,0x0f,dnl
+eval(0xc0+x86_opcode_regxmm($3)%8*8+x86_opcode_regxmm($2)%8),dnl
+substr($1,1)')
+
+
divert`'dnl
mpn/z8000 uses a 16-bit limb, it's possible this doesn't really work, on
account of various bits of C code assuming limb>=long and of course long is
invariably at least 32 bits.
-
-mpn/z8000x uses a 32-bit limb, this could perhaps be an ABI choice.
-Currently it's reached only by an MPN_PATH override.
-
-
-
-----------------
-Local variables:
-mode: text
-fill-column: 76
-End:
+++ /dev/null
-! Z8000 (32 bit limb version) __gmpn_add_n -- Add two limb vectors of equal,
-! non-zero length.
-
-! Copyright 1993, 1994, 2000 Free Software Foundation, Inc.
-
-! This file is part of the GNU MP Library.
-
-! The GNU MP Library is free software; you can redistribute it and/or modify
-! it under the terms of the GNU Lesser General Public License as published by
-! the Free Software Foundation; either version 3 of the License, or (at your
-! option) any later version.
-
-! The GNU MP Library is distributed in the hope that it will be useful, but
-! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-! License for more details.
-
-! You should have received a copy of the GNU Lesser General Public License
-! along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-
-! INPUT PARAMETERS
-! res_ptr r7
-! s1_ptr r6
-! s2_ptr r5
-! size r4
-
-! If we are really crazy, we can use push to write a few result words
-! backwards, using push just because it is faster than reg+disp. We'd
-! then add 2x the number of words written to r7...
-
- segm
- .text
- even
- global ___gmpn_add_n
-___gmpn_add_n:
- popl rr0,@r6
- popl rr8,@r5
- addl rr0,rr8
- ldl @r7,rr0
- dec r4
- jr eq,Lend
-Loop: popl rr0,@r6
- popl rr8,@r5
- adc r1,r9
- adc r0,r8
- inc r7,#4
- ldl @r7,rr0
- dec r4
- jr ne,Loop
-Lend: ld r2,r4 ! use 0 already in r4
- ld r3,r4
- adc r2,r2
- ret t
+++ /dev/null
-! Z8000 (32 bit limb version) __gmpn_sub_n -- Subtract two limb vectors of the
-! same length > 0 and store difference in a third limb vector.
-
-! Copyright 1993, 1994, 2000 Free Software Foundation, Inc.
-
-! This file is part of the GNU MP Library.
-
-! The GNU MP Library is free software; you can redistribute it and/or modify
-! it under the terms of the GNU Lesser General Public License as published by
-! the Free Software Foundation; either version 3 of the License, or (at your
-! option) any later version.
-
-! The GNU MP Library is distributed in the hope that it will be useful, but
-! WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-! or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-! License for more details.
-
-! You should have received a copy of the GNU Lesser General Public License
-! along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-
-! INPUT PARAMETERS
-! res_ptr r7
-! s1_ptr r6
-! s2_ptr r5
-! size r4
-
-! If we are really crazy, we can use push to write a few result words
-! backwards, using push just because it is faster than reg+disp. We'd
-! then add 2x the number of words written to r7...
-
- segm
- .text
- even
- global ___gmpn_sub_n
-___gmpn_sub_n:
- popl rr0,@r6
- popl rr8,@r5
- subl rr0,rr8
- ldl @r7,rr0
- dec r4
- jr eq,Lend
-Loop: popl rr0,@r6
- popl rr8,@r5
- sbc r1,r9
- sbc r0,r8
- inc r7,#4
- ldl @r7,rr0
- dec r4
- jr ne,Loop
-Lend: ld r2,r4 ! use 0 already in r4
- ld r3,r4
- adc r2,r2
- ret t
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
VPATH = @srcdir@
+am__make_dryrun = \
+ { \
+ am__dry=no; \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
+ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+ *) \
+ for am__flg in $$MAKEFLAGS; do \
+ case $$am__flg in \
+ *=*|--*) ;; \
+ *n*) am__dry=yes; break;; \
+ esac; \
+ done;; \
+ esac; \
+ test $$am__dry = yes; \
+ }
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
subdir = mpq
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/configure.in
+ $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_CLEAN_VPATH_FILES =
LTLIBRARIES = $(noinst_LTLIBRARIES)
libmpq_la_LIBADD =
-am_libmpq_la_OBJECTS = abs$U.lo aors$U.lo canonicalize$U.lo clear$U.lo \
- clears$U.lo cmp$U.lo cmp_si$U.lo cmp_ui$U.lo div$U.lo \
- equal$U.lo get_d$U.lo get_den$U.lo get_num$U.lo get_str$U.lo \
- init$U.lo inits$U.lo inp_str$U.lo inv$U.lo md_2exp$U.lo \
- mul$U.lo neg$U.lo out_str$U.lo set$U.lo set_den$U.lo \
- set_num$U.lo set_si$U.lo set_str$U.lo set_ui$U.lo set_z$U.lo \
- set_d$U.lo set_f$U.lo swap$U.lo
+am_libmpq_la_OBJECTS = abs.lo aors.lo canonicalize.lo clear.lo \
+ clears.lo cmp.lo cmp_si.lo cmp_ui.lo div.lo equal.lo get_d.lo \
+ get_den.lo get_num.lo get_str.lo init.lo inits.lo inp_str.lo \
+ inv.lo md_2exp.lo mul.lo neg.lo out_str.lo set.lo set_den.lo \
+ set_num.lo set_si.lo set_str.lo set_ui.lo set_z.lo set_d.lo \
+ set_f.lo swap.lo
libmpq_la_OBJECTS = $(am_libmpq_la_OBJECTS)
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
depcomp =
$(LDFLAGS) -o $@
SOURCES = $(libmpq_la_SOURCES)
DIST_SOURCES = $(libmpq_la_SOURCES)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
STRIP = @STRIP@
TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
U_FOR_BUILD = @U_FOR_BUILD@
VERSION = @VERSION@
WITH_READLINE_01 = @WITH_READLINE_01@
mkdir_p = @mkdir_p@
mpn_objects = @mpn_objects@
mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-libmpq.la: $(libmpq_la_OBJECTS) $(libmpq_la_DEPENDENCIES)
+libmpq.la: $(libmpq_la_OBJECTS) $(libmpq_la_DEPENDENCIES) $(EXTRA_libmpq_la_DEPENDENCIES)
$(LINK) $(libmpq_la_OBJECTS) $(libmpq_la_LIBADD) $(LIBS)
mostlyclean-compile:
distclean-compile:
-rm -f *.tab.c
-$(top_builddir)/ansi2knr:
- $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
- -test "$U" = "" || rm -f *_.c
.c.o:
$(COMPILE) -c $<
.c.lo:
$(LTCOMPILE) -c -o $@ $<
-abs_.c: abs.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/abs.c; then echo $(srcdir)/abs.c; else echo abs.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-aors_.c: aors.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/aors.c; then echo $(srcdir)/aors.c; else echo aors.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-canonicalize_.c: canonicalize.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/canonicalize.c; then echo $(srcdir)/canonicalize.c; else echo canonicalize.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-clear_.c: clear.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/clear.c; then echo $(srcdir)/clear.c; else echo clear.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-clears_.c: clears.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/clears.c; then echo $(srcdir)/clears.c; else echo clears.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmp_.c: cmp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp.c; then echo $(srcdir)/cmp.c; else echo cmp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmp_si_.c: cmp_si.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp_si.c; then echo $(srcdir)/cmp_si.c; else echo cmp_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmp_ui_.c: cmp_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp_ui.c; then echo $(srcdir)/cmp_ui.c; else echo cmp_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-div_.c: div.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/div.c; then echo $(srcdir)/div.c; else echo div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-equal_.c: equal.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/equal.c; then echo $(srcdir)/equal.c; else echo equal.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_d_.c: get_d.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_d.c; then echo $(srcdir)/get_d.c; else echo get_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_den_.c: get_den.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_den.c; then echo $(srcdir)/get_den.c; else echo get_den.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_num_.c: get_num.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_num.c; then echo $(srcdir)/get_num.c; else echo get_num.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_str_.c: get_str.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_str.c; then echo $(srcdir)/get_str.c; else echo get_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-init_.c: init.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/init.c; then echo $(srcdir)/init.c; else echo init.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-inits_.c: inits.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/inits.c; then echo $(srcdir)/inits.c; else echo inits.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-inp_str_.c: inp_str.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/inp_str.c; then echo $(srcdir)/inp_str.c; else echo inp_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-inv_.c: inv.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/inv.c; then echo $(srcdir)/inv.c; else echo inv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-md_2exp_.c: md_2exp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/md_2exp.c; then echo $(srcdir)/md_2exp.c; else echo md_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_.c: mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul.c; then echo $(srcdir)/mul.c; else echo mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-neg_.c: neg.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/neg.c; then echo $(srcdir)/neg.c; else echo neg.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-out_str_.c: out_str.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/out_str.c; then echo $(srcdir)/out_str.c; else echo out_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_.c: set.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set.c; then echo $(srcdir)/set.c; else echo set.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_d_.c: set_d.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_d.c; then echo $(srcdir)/set_d.c; else echo set_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_den_.c: set_den.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_den.c; then echo $(srcdir)/set_den.c; else echo set_den.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_f_.c: set_f.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_f.c; then echo $(srcdir)/set_f.c; else echo set_f.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_num_.c: set_num.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_num.c; then echo $(srcdir)/set_num.c; else echo set_num.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_si_.c: set_si.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_si.c; then echo $(srcdir)/set_si.c; else echo set_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_str_.c: set_str.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_str.c; then echo $(srcdir)/set_str.c; else echo set_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_ui_.c: set_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_ui.c; then echo $(srcdir)/set_ui.c; else echo set_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_z_.c: set_z.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_z.c; then echo $(srcdir)/set_z.c; else echo set_z.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-swap_.c: swap.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/swap.c; then echo $(srcdir)/swap.c; else echo swap.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-abs_.$(OBJEXT) abs_.lo aors_.$(OBJEXT) aors_.lo \
-canonicalize_.$(OBJEXT) canonicalize_.lo clear_.$(OBJEXT) clear_.lo \
-clears_.$(OBJEXT) clears_.lo cmp_.$(OBJEXT) cmp_.lo cmp_si_.$(OBJEXT) \
-cmp_si_.lo cmp_ui_.$(OBJEXT) cmp_ui_.lo div_.$(OBJEXT) div_.lo \
-equal_.$(OBJEXT) equal_.lo get_d_.$(OBJEXT) get_d_.lo \
-get_den_.$(OBJEXT) get_den_.lo get_num_.$(OBJEXT) get_num_.lo \
-get_str_.$(OBJEXT) get_str_.lo init_.$(OBJEXT) init_.lo \
-inits_.$(OBJEXT) inits_.lo inp_str_.$(OBJEXT) inp_str_.lo \
-inv_.$(OBJEXT) inv_.lo md_2exp_.$(OBJEXT) md_2exp_.lo mul_.$(OBJEXT) \
-mul_.lo neg_.$(OBJEXT) neg_.lo out_str_.$(OBJEXT) out_str_.lo \
-set_.$(OBJEXT) set_.lo set_d_.$(OBJEXT) set_d_.lo set_den_.$(OBJEXT) \
-set_den_.lo set_f_.$(OBJEXT) set_f_.lo set_num_.$(OBJEXT) set_num_.lo \
-set_si_.$(OBJEXT) set_si_.lo set_str_.$(OBJEXT) set_str_.lo \
-set_ui_.$(OBJEXT) set_ui_.lo set_z_.$(OBJEXT) set_z_.lo \
-swap_.$(OBJEXT) swap_.lo : $(ANSI2KNR)
mostlyclean-libtool:
-rm -f *.lo
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
mostlyclean: mostlyclean-am
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
uninstall-am:
-.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+.MAKE: install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
clean-libtool clean-noinstLTLIBRARIES ctags distclean \
install-pdf install-pdf-am install-ps install-ps-am \
install-strip installcheck installcheck-am installdirs \
maintainer-clean maintainer-clean-generic mostlyclean \
- mostlyclean-compile mostlyclean-generic mostlyclean-kr \
- mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
- uninstall-am
+ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+ pdf pdf-am ps ps-am tags uninstall uninstall-am
# Tell versions [3.59,3.63) of GNU make to not export all variables.
/* mpq_abs -- absolute value of a rational.
-Copyright 2000, 2001 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
void
mpq_abs (mpq_ptr dst, mpq_srcptr src)
{
- mp_size_t num_size = src->_mp_num._mp_size;
- mp_size_t num_abs_size = ABS (num_size);
+ mp_size_t num_abs_size = ABSIZ(NUM(src));
if (dst != src)
{
- mp_size_t den_size = src->_mp_den._mp_size;
+ mp_size_t den_size = SIZ(DEN(src));
+ mp_ptr dp;
- MPZ_REALLOC (mpq_numref(dst), num_abs_size);
- MPZ_REALLOC (mpq_denref(dst), den_size);
+ dp = MPZ_NEWALLOC (NUM(dst), num_abs_size);
+ MPN_COPY (dp, PTR(NUM(src)), num_abs_size);
- MPN_COPY (dst->_mp_num._mp_d, src->_mp_num._mp_d, num_abs_size);
- MPN_COPY (dst->_mp_den._mp_d, src->_mp_den._mp_d, den_size);
-
- dst->_mp_den._mp_size = den_size;
+ dp = MPZ_NEWALLOC (DEN(dst), den_size);
+ SIZ(DEN(dst)) = den_size;
+ MPN_COPY (dp, PTR(DEN(src)), den_size);
}
- dst->_mp_num._mp_size = num_abs_size;
+ SIZ(NUM(dst)) = num_abs_size;
}
#include "gmp-impl.h"
-static void __gmpq_aors __GMP_PROTO ((REGPARM_3_1 (mpq_ptr, mpq_srcptr, mpq_srcptr, void (*) __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr))))) REGPARM_ATTR (1);
+static void __gmpq_aors (REGPARM_3_1 (mpq_ptr, mpq_srcptr, mpq_srcptr, void (*) (mpz_ptr, mpz_srcptr, mpz_srcptr))) REGPARM_ATTR (1);
#define mpq_aors(w,x,y,fun) __gmpq_aors (REGPARM_3_1 (w, x, y, fun))
REGPARM_ATTR (1) static void
mpq_aors (mpq_ptr rop, mpq_srcptr op1, mpq_srcptr op2,
- void (*fun) __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr)))
+ void (*fun) (mpz_ptr, mpz_srcptr, mpz_srcptr))
{
mpz_t gcd;
mpz_t tmp1, tmp2;
- mp_size_t op1_num_size = ABS (op1->_mp_num._mp_size);
- mp_size_t op1_den_size = op1->_mp_den._mp_size;
- mp_size_t op2_num_size = ABS (op2->_mp_num._mp_size);
- mp_size_t op2_den_size = op2->_mp_den._mp_size;
+ mp_size_t op1_num_size = ABSIZ(NUM(op1));
+ mp_size_t op1_den_size = SIZ(DEN(op1));
+ mp_size_t op2_num_size = ABSIZ(NUM(op2));
+ mp_size_t op2_den_size = SIZ(DEN(op2));
TMP_DECL;
TMP_MARK;
dare to overwrite the numerator of ROP when we are finished
with the numerators of OP1 and OP2. */
- mpz_gcd (gcd, &(op1->_mp_den), &(op2->_mp_den));
+ mpz_gcd (gcd, DEN(op1), DEN(op2));
if (! MPZ_EQUAL_1_P (gcd))
{
mpz_t t;
- mpz_divexact_gcd (tmp1, &(op2->_mp_den), gcd);
- mpz_mul (tmp1, &(op1->_mp_num), tmp1);
+ MPZ_TMP_INIT (t, MAX (op1_num_size + op2_den_size,
+ op2_num_size + op1_den_size) + 2 - SIZ(gcd));
- mpz_divexact_gcd (tmp2, &(op1->_mp_den), gcd);
- mpz_mul (tmp2, &(op2->_mp_num), tmp2);
+ mpz_divexact_gcd (t, DEN(op2), gcd);
+ mpz_divexact_gcd (tmp2, DEN(op1), gcd);
- MPZ_TMP_INIT (t, MAX (ABS (tmp1->_mp_size), ABS (tmp2->_mp_size)) + 1);
+ mpz_mul (tmp1, NUM(op1), t);
+ mpz_mul (t, NUM(op2), tmp2);
- (*fun) (t, tmp1, tmp2);
- mpz_divexact_gcd (tmp2, &(op1->_mp_den), gcd);
+ (*fun) (t, tmp1, t);
mpz_gcd (gcd, t, gcd);
if (MPZ_EQUAL_1_P (gcd))
{
- mpz_set (&(rop->_mp_num), t);
- mpz_mul (&(rop->_mp_den), &(op2->_mp_den), tmp2);
+ mpz_set (NUM(rop), t);
+ mpz_mul (DEN(rop), DEN(op2), tmp2);
}
else
{
- mpz_divexact_gcd (&(rop->_mp_num), t, gcd);
- mpz_divexact_gcd (tmp1, &(op2->_mp_den), gcd);
- mpz_mul (&(rop->_mp_den), tmp1, tmp2);
+ mpz_divexact_gcd (NUM(rop), t, gcd);
+ mpz_divexact_gcd (tmp1, DEN(op2), gcd);
+ mpz_mul (DEN(rop), tmp1, tmp2);
}
}
else
{
/* The common divisor is 1. This is the case (for random input) with
probability 6/(pi**2), which is about 60.8%. */
- mpz_mul (tmp1, &(op1->_mp_num), &(op2->_mp_den));
- mpz_mul (tmp2, &(op2->_mp_num), &(op1->_mp_den));
- (*fun) (&(rop->_mp_num), tmp1, tmp2);
- mpz_mul (&(rop->_mp_den), &(op1->_mp_den), &(op2->_mp_den));
+ mpz_mul (tmp1, NUM(op1), DEN(op2));
+ mpz_mul (tmp2, NUM(op2), DEN(op1));
+ (*fun) (NUM(rop), tmp1, tmp2);
+ mpz_mul (DEN(rop), DEN(op1), DEN(op2));
}
TMP_FREE;
}
mpz_t gcd;
TMP_DECL;
- if (op->_mp_den._mp_size == 0)
+ if (UNLIKELY (SIZ(DEN(op)) == 0))
DIVIDE_BY_ZERO;
TMP_MARK;
/* ??? Dunno if the 1+ is needed. */
- MPZ_TMP_INIT (gcd, 1 + MAX (ABS (op->_mp_num._mp_size),
- ABS (op->_mp_den._mp_size)));
+ MPZ_TMP_INIT (gcd, 1 + MAX (ABSIZ(NUM(op)),
+ ABSIZ(DEN(op))));
- mpz_gcd (gcd, &(op->_mp_num), &(op->_mp_den));
+ mpz_gcd (gcd, NUM(op), DEN(op));
if (! MPZ_EQUAL_1_P (gcd))
{
- mpz_divexact_gcd (&(op->_mp_num), &(op->_mp_num), gcd);
- mpz_divexact_gcd (&(op->_mp_den), &(op->_mp_den), gcd);
+ mpz_divexact_gcd (NUM(op), NUM(op), gcd);
+ mpz_divexact_gcd (DEN(op), DEN(op), gcd);
}
- if (op->_mp_den._mp_size < 0)
+ if (SIZ(DEN(op)) < 0)
{
- op->_mp_num._mp_size = -op->_mp_num._mp_size;
- op->_mp_den._mp_size = -op->_mp_den._mp_size;
+ SIZ(NUM(op)) = -SIZ(NUM(op));
+ SIZ(DEN(op)) = -SIZ(DEN(op));
}
TMP_FREE;
}
void
mpq_clear (MP_RAT *m)
{
- (*__gmp_free_func) (m->_mp_num._mp_d,
- m->_mp_num._mp_alloc * BYTES_PER_MP_LIMB);
- (*__gmp_free_func) (m->_mp_den._mp_d,
- m->_mp_den._mp_alloc * BYTES_PER_MP_LIMB);
+ (*__gmp_free_func) (PTR(NUM(m)),
+ ALLOC(NUM(m)) * BYTES_PER_MP_LIMB);
+ (*__gmp_free_func) (PTR(DEN(m)),
+ ALLOC(DEN(m)) * BYTES_PER_MP_LIMB);
}
int
mpq_cmp (const MP_RAT *op1, const MP_RAT *op2)
{
- mp_size_t num1_size = op1->_mp_num._mp_size;
- mp_size_t den1_size = op1->_mp_den._mp_size;
- mp_size_t num2_size = op2->_mp_num._mp_size;
- mp_size_t den2_size = op2->_mp_den._mp_size;
+ mp_size_t num1_size = SIZ(NUM(op1));
+ mp_size_t den1_size = SIZ(DEN(op1));
+ mp_size_t num2_size = SIZ(NUM(op2));
+ mp_size_t den2_size = SIZ(DEN(op2));
mp_size_t tmp1_size, tmp2_size;
mp_ptr tmp1_ptr, tmp2_ptr;
mp_size_t num1_sign;
int cnt1, cnt2;
mp_bitcnt_t bits1, bits2;
- count_leading_zeros (cnt1, op1->_mp_num._mp_d[num1_size - 1]);
- count_leading_zeros (cnt2, op2->_mp_den._mp_d[den2_size - 1]);
+ count_leading_zeros (cnt1, PTR(NUM(op1))[num1_size - 1]);
+ count_leading_zeros (cnt2, PTR(DEN(op2))[den2_size - 1]);
bits1 = tmp1_size * GMP_NUMB_BITS - cnt1 - cnt2 + 2 * GMP_NAIL_BITS;
- count_leading_zeros (cnt1, op2->_mp_num._mp_d[num2_size - 1]);
- count_leading_zeros (cnt2, op1->_mp_den._mp_d[den1_size - 1]);
+ count_leading_zeros (cnt1, PTR(NUM(op2))[num2_size - 1]);
+ count_leading_zeros (cnt2, PTR(DEN(op1))[den1_size - 1]);
bits2 = tmp2_size * GMP_NUMB_BITS - cnt1 - cnt2 + 2 * GMP_NAIL_BITS;
if (bits1 > bits2 + 1)
if (num1_size >= den2_size)
tmp1_size -= 0 == mpn_mul (tmp1_ptr,
- op1->_mp_num._mp_d, num1_size,
- op2->_mp_den._mp_d, den2_size);
+ PTR(NUM(op1)), num1_size,
+ PTR(DEN(op2)), den2_size);
else
tmp1_size -= 0 == mpn_mul (tmp1_ptr,
- op2->_mp_den._mp_d, den2_size,
- op1->_mp_num._mp_d, num1_size);
+ PTR(DEN(op2)), den2_size,
+ PTR(NUM(op1)), num1_size);
if (num2_size >= den1_size)
tmp2_size -= 0 == mpn_mul (tmp2_ptr,
- op2->_mp_num._mp_d, num2_size,
- op1->_mp_den._mp_d, den1_size);
+ PTR(NUM(op2)), num2_size,
+ PTR(DEN(op1)), den1_size);
else
tmp2_size -= 0 == mpn_mul (tmp2_ptr,
- op1->_mp_den._mp_d, den1_size,
- op2->_mp_num._mp_d, num2_size);
+ PTR(DEN(op1)), den1_size,
+ PTR(NUM(op2)), num2_size);
cc = tmp1_size - tmp2_size != 0
_mpq_cmp_si (mpq_srcptr q, long n, unsigned long d)
{
/* need canonical sign to get right result */
- ASSERT (q->_mp_den._mp_size > 0);
+ ASSERT (SIZ(DEN(q)) > 0);
- if (q->_mp_num._mp_size >= 0)
+ if (SIZ(NUM(q)) >= 0)
{
if (n >= 0)
return _mpq_cmp_ui (q, n, d); /* >=0 cmp >=0 */
else
{
mpq_t qabs;
- qabs->_mp_num._mp_size = ABS (q->_mp_num._mp_size);
- qabs->_mp_num._mp_d = q->_mp_num._mp_d;
- qabs->_mp_den._mp_size = q->_mp_den._mp_size;
- qabs->_mp_den._mp_d = q->_mp_den._mp_d;
+ SIZ(NUM(qabs)) = ABSIZ(NUM(q));
+ PTR(NUM(qabs)) = PTR(NUM(q));
+ SIZ(DEN(qabs)) = SIZ(DEN(q));
+ PTR(DEN(qabs)) = PTR(DEN(q));
return - _mpq_cmp_ui (qabs, -n, d); /* <0 cmp <0 */
}
int
_mpq_cmp_ui (const MP_RAT *op1, unsigned long int num2, unsigned long int den2)
{
- mp_size_t num1_size = op1->_mp_num._mp_size;
- mp_size_t den1_size = op1->_mp_den._mp_size;
+ mp_size_t num1_size = SIZ(NUM(op1));
+ mp_size_t den1_size = SIZ(DEN(op1));
mp_size_t tmp1_size, tmp2_size;
mp_ptr tmp1_ptr, tmp2_ptr;
mp_limb_t cy_limb;
/* need canonical sign to get right result */
ASSERT (den1_size > 0);
- if (den2 == 0)
+ if (UNLIKELY (den2 == 0))
DIVIDE_BY_ZERO;
if (num1_size == 0)
tmp1_ptr = TMP_ALLOC_LIMBS (num1_size + 1);
tmp2_ptr = TMP_ALLOC_LIMBS (den1_size + 1);
- cy_limb = mpn_mul_1 (tmp1_ptr, op1->_mp_num._mp_d, num1_size,
+ cy_limb = mpn_mul_1 (tmp1_ptr, PTR(NUM(op1)), num1_size,
(mp_limb_t) den2);
tmp1_ptr[num1_size] = cy_limb;
tmp1_size = num1_size + (cy_limb != 0);
- cy_limb = mpn_mul_1 (tmp2_ptr, op1->_mp_den._mp_d, den1_size,
+ cy_limb = mpn_mul_1 (tmp2_ptr, PTR(DEN(op1)), den1_size,
(mp_limb_t) num2);
tmp2_ptr[den1_size] = cy_limb;
tmp2_size = den1_size + (cy_limb != 0);
mp_size_t alloc;
TMP_DECL;
- op1_num_size = ABS (op1->_mp_num._mp_size);
- op1_den_size = op1->_mp_den._mp_size;
- op2_num_size = ABS (op2->_mp_num._mp_size);
- op2_den_size = op2->_mp_den._mp_size;
+ op2_num_size = ABSIZ(NUM(op2));
- if (op2_num_size == 0)
+ if (UNLIKELY (op2_num_size == 0))
DIVIDE_BY_ZERO;
+ op1_num_size = ABSIZ(NUM(op1));
+
if (op1_num_size == 0)
{
/* We special case this to simplify allocation logic; gcd(0,x) = x
is a singular case for the allocations. */
- quot->_mp_num._mp_size = 0;
- quot->_mp_den._mp_d[0] = 1;
- quot->_mp_den._mp_size = 1;
+ SIZ(NUM(quot)) = 0;
+ PTR(DEN(quot))[0] = 1;
+ SIZ(DEN(quot)) = 1;
return;
}
+ op2_den_size = SIZ(DEN(op2));
+ op1_den_size = SIZ(DEN(op1));
+
TMP_MARK;
alloc = MIN (op1_num_size, op2_num_size);
numerator of QUOT when we are finished with the numerators of OP1 and
OP2. */
- mpz_gcd (gcd1, &(op1->_mp_num), &(op2->_mp_num));
- mpz_gcd (gcd2, &(op2->_mp_den), &(op1->_mp_den));
+ mpz_gcd (gcd1, NUM(op1), NUM(op2));
+ mpz_gcd (gcd2, DEN(op2), DEN(op1));
- mpz_divexact_gcd (tmp1, &(op1->_mp_num), gcd1);
- mpz_divexact_gcd (tmp2, &(op2->_mp_den), gcd2);
+ mpz_divexact_gcd (tmp1, NUM(op1), gcd1);
+ mpz_divexact_gcd (tmp2, DEN(op2), gcd2);
mpz_mul (numtmp, tmp1, tmp2);
- mpz_divexact_gcd (tmp1, &(op2->_mp_num), gcd1);
- mpz_divexact_gcd (tmp2, &(op1->_mp_den), gcd2);
+ mpz_divexact_gcd (tmp1, NUM(op2), gcd1);
+ mpz_divexact_gcd (tmp2, DEN(op1), gcd2);
- mpz_mul (&(quot->_mp_den), tmp1, tmp2);
+ mpz_mul (DEN(quot), tmp1, tmp2);
/* We needed to go via NUMTMP to take care of QUOT being the same as OP2.
Now move NUMTMP to QUOT->_mp_num. */
- mpz_set (&(quot->_mp_num), numtmp);
+ mpz_set (NUM(quot), numtmp);
/* Keep the denominator positive. */
- if (quot->_mp_den._mp_size < 0)
+ if (SIZ(DEN(quot)) < 0)
{
- quot->_mp_den._mp_size = -quot->_mp_den._mp_size;
- quot->_mp_num._mp_size = -quot->_mp_num._mp_size;
+ SIZ(DEN(quot)) = -SIZ(DEN(quot));
+ SIZ(NUM(quot)) = -SIZ(NUM(quot));
}
TMP_FREE;
ASSERT_MPQ_CANONICAL (op1);
ASSERT_MPQ_CANONICAL (op2);
- num1_size = op1->_mp_num._mp_size;
- num2_size = op2->_mp_num._mp_size;
+ num1_size = SIZ(NUM(op1));
+ num2_size = SIZ(NUM(op2));
if (num1_size != num2_size)
return 0;
- num1_ptr = op1->_mp_num._mp_d;
- num2_ptr = op2->_mp_num._mp_d;
+ num1_ptr = PTR(NUM(op1));
+ num2_ptr = PTR(NUM(op2));
num1_size = ABS (num1_size);
for (i = 0; i < num1_size; i++)
if (num1_ptr[i] != num2_ptr[i])
return 0;
- den1_size = op1->_mp_den._mp_size;
- den2_size = op2->_mp_den._mp_size;
+ den1_size = SIZ(DEN(op1));
+ den2_size = SIZ(DEN(op2));
if (den1_size != den2_size)
return 0;
- den1_ptr = op1->_mp_den._mp_d;
- den2_ptr = op2->_mp_den._mp_d;
+ den1_ptr = PTR(DEN(op1));
+ den2_ptr = PTR(DEN(op2));
for (i = 0; i < den1_size; i++)
if (den1_ptr[i] != den2_ptr[i])
return 0;
double res;
mp_srcptr np, dp;
mp_ptr remp, tp;
- mp_size_t nsize = src->_mp_num._mp_size;
- mp_size_t dsize = src->_mp_den._mp_size;
+ mp_size_t nsize = SIZ(NUM(src));
+ mp_size_t dsize = SIZ(DEN(src));
mp_size_t qsize, prospective_qsize, zeros, chop, tsize;
mp_size_t sign_quotient = nsize;
long exp;
TMP_MARK;
nsize = ABS (nsize);
dsize = ABS (dsize);
- np = src->_mp_num._mp_d;
- dp = src->_mp_den._mp_d;
+ np = PTR(NUM(src));
+ dp = PTR(DEN(src));
prospective_qsize = nsize - dsize + 1; /* from using given n,d */
qsize = N_QLIMBS + 1; /* desired qsize */
/* mpq_get_den(den,rat_src) -- Set DEN to the denominator of RAT_SRC.
-Copyright 1991, 1994, 1995, 2001 Free Software Foundation, Inc.
+Copyright 1991, 1994, 1995, 2001, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#include "gmp-impl.h"
void
-mpq_get_den (MP_INT *den, const MP_RAT *src)
+mpq_get_den (mpz_ptr den, mpq_srcptr src)
{
- mp_size_t size = src->_mp_den._mp_size;
+ mp_size_t size = SIZ(DEN(src));
+ mp_ptr dp;
- if (den->_mp_alloc < size)
- _mpz_realloc (den, size);
-
- MPN_COPY (den->_mp_d, src->_mp_den._mp_d, size);
- den->_mp_size = size;
+ dp = MPZ_NEWALLOC (den, size);
+ SIZ(den) = size;
+ MPN_COPY (dp, PTR(DEN(src)), size);
}
/* mpq_get_num(num,rat_src) -- Set NUM to the numerator of RAT_SRC.
-Copyright 1991, 1994, 1995, 2001 Free Software Foundation, Inc.
+Copyright 1991, 1994, 1995, 2001, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#include "gmp-impl.h"
void
-mpq_get_num (MP_INT *num, const MP_RAT *src)
+mpq_get_num (mpz_ptr num, mpq_srcptr src)
{
- mp_size_t size = src->_mp_num._mp_size;
+ mp_size_t size = SIZ(NUM(src));
mp_size_t abs_size = ABS (size);
+ mp_ptr dp;
- if (num->_mp_alloc < abs_size)
- _mpz_realloc (num, abs_size);
+ dp = MPZ_NEWALLOC (num, abs_size);
+ SIZ(num) = size;
- MPN_COPY (num->_mp_d, src->_mp_num._mp_d, abs_size);
- num->_mp_size = size;
+ MPN_COPY (dp, PTR(NUM(src)), abs_size);
}
/* mpq_get_str -- mpq to string conversion.
-Copyright 2001, 2002, 2006 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2006, 2011 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#include <string.h>
#include "gmp.h"
#include "gmp-impl.h"
+#include "longlong.h"
char *
mpq_get_str (char *str, int base, mpq_srcptr q)
{
size_t str_alloc, len;
- ASSERT (ABS(base) >= 2);
- ASSERT (ABS(base) <= 62);
+ if (base > 62 || base < -36)
+ return NULL;
str_alloc = 0;
if (str == NULL)
{
/* This is an overestimate since we don't bother checking how much of
- the high limbs of num and den are used. +2 for rounding up the
- chars per bit of num and den. +3 for sign, slash and '\0'. */
- str_alloc = ((size_t) ((ABS (q->_mp_num._mp_size) + q->_mp_den._mp_size)
- * GMP_LIMB_BITS
- * mp_bases[ABS(base)].chars_per_bit_exactly))
- + 5;
+ the high limbs of num and den are used. +2 for rounding up the
+ chars per bit of num and den. +3 for sign, slash and '\0'. */
+ DIGITS_IN_BASE_PER_LIMB (str_alloc, ABSIZ(NUM(q)) + SIZ(DEN(q)), ABS(base));
+ str_alloc += 6;
+
str = (char *) (*__gmp_allocate_func) (str_alloc);
}
ASSERT (len == strlen(str));
ASSERT (str_alloc == 0 || len+1 <= str_alloc);
ASSERT (len+1 <= /* size recommended to applications */
- mpz_sizeinbase (mpq_numref(q), ABS(base)) +
- mpz_sizeinbase (mpq_denref(q), ABS(base)) + 3);
+ mpz_sizeinbase (mpq_numref(q), ABS(base)) +
+ mpz_sizeinbase (mpq_denref(q), ABS(base)) + 3);
if (str_alloc != 0)
__GMP_REALLOCATE_FUNC_MAYBE_TYPE (str, str_alloc, len+1, char);
void
mpq_init (MP_RAT *x)
{
- x->_mp_num._mp_alloc = 1;
- x->_mp_num._mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
- x->_mp_num._mp_size = 0;
- x->_mp_den._mp_alloc = 1;
- x->_mp_den._mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
- x->_mp_den._mp_d[0] = 1;
- x->_mp_den._mp_size = 1;
+ ALLOC(NUM(x)) = 1;
+ PTR(NUM(x)) = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
+ SIZ(NUM(x)) = 0;
+ ALLOC(DEN(x)) = 1;
+ PTR(DEN(x)) = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
+ PTR(DEN(x))[0] = 1;
+ SIZ(DEN(x)) = 1;
#ifdef __CHECKER__
/* let the low limb look initialized, for the benefit of mpz_get_ui etc */
- x->_mp_num._mp_d[0] = 0;
+ PTR(NUM(x))[0] = 0;
#endif
}
if (fp == NULL)
fp = stdin;
- q->_mp_den._mp_size = 1;
- q->_mp_den._mp_d[0] = 1;
+ SIZ(DEN(q)) = 1;
+ PTR(DEN(q))[0] = 1;
nread = mpz_inp_str (mpq_numref(q), fp, base);
if (nread == 0)
nread = mpz_inp_str_nowhite (mpq_denref(q), fp, base, c, nread);
if (nread == 0)
- {
- q->_mp_num._mp_size = 0;
- q->_mp_den._mp_size = 1;
- q->_mp_den._mp_d[0] = 1;
- }
+ {
+ SIZ(NUM(q)) = 0;
+ SIZ(DEN(q)) = 1;
+ PTR(DEN(q))[0] = 1;
+ }
}
else
{
/* mpq_inv(dest,src) -- invert a rational number, i.e. set DEST to SRC
with the numerator and denominator swapped.
-Copyright 1991, 1994, 1995, 2000, 2001 Free Software Foundation, Inc.
+Copyright 1991, 1994, 1995, 2000, 2001, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#include "gmp-impl.h"
void
-mpq_inv (MP_RAT *dest, const MP_RAT *src)
+mpq_inv (mpq_ptr dest, mpq_srcptr src)
{
- mp_size_t num_size = src->_mp_num._mp_size;
- mp_size_t den_size = src->_mp_den._mp_size;
-
- if (num_size == 0)
- DIVIDE_BY_ZERO;
+ mp_size_t num_size = SIZ(NUM(src));
+ mp_size_t den_size = SIZ(DEN(src));
if (num_size < 0)
{
num_size = -num_size;
den_size = -den_size;
}
- dest->_mp_den._mp_size = num_size;
- dest->_mp_num._mp_size = den_size;
+ else if (UNLIKELY (num_size == 0))
+ DIVIDE_BY_ZERO;
+
+ SIZ(DEN(dest)) = num_size;
+ SIZ(NUM(dest)) = den_size;
- /* If dest == src we may just swap the numerator and denominator, but
- we have to ensure the new denominator is positive. */
+ /* If dest == src we may just swap the numerator and denominator;
+ we ensured that the new denominator is positive. */
if (dest == src)
{
- mp_size_t alloc = dest->_mp_num._mp_alloc;
- mp_ptr limb_ptr = dest->_mp_num._mp_d;
-
- dest->_mp_num._mp_alloc = dest->_mp_den._mp_alloc;
- dest->_mp_num._mp_d = dest->_mp_den._mp_d;
-
- dest->_mp_den._mp_alloc = alloc;
- dest->_mp_den._mp_d = limb_ptr;
+ MP_PTR_SWAP (PTR(NUM(dest)), PTR(DEN(dest)));
+ MP_SIZE_T_SWAP (ALLOC(NUM(dest)), ALLOC(DEN(dest)));
}
else
{
- den_size = ABS (den_size);
- if (dest->_mp_num._mp_alloc < den_size)
- _mpz_realloc (&(dest->_mp_num), den_size);
+ mp_ptr dp;
- if (dest->_mp_den._mp_alloc < num_size)
- _mpz_realloc (&(dest->_mp_den), num_size);
+ den_size = ABS (den_size);
+ dp = MPZ_NEWALLOC (NUM(dest), den_size);
+ MPN_COPY (dp, PTR(DEN(src)), den_size);
- MPN_COPY (dest->_mp_num._mp_d, src->_mp_den._mp_d, den_size);
- MPN_COPY (dest->_mp_den._mp_d, src->_mp_num._mp_d, num_size);
+ dp = MPZ_NEWALLOC (DEN(dest), num_size);
+ MPN_COPY (dp, PTR(NUM(src)), num_size);
}
}
/* mpq_mul_2exp, mpq_div_2exp - multiply or divide by 2^N */
/*
-Copyright 2000, 2002 Free Software Foundation, Inc.
+Copyright 2000, 2002, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
/* no realloc here if rsrc==rdst, so p and rsrc_ptr remain valid */
len -= (p - rsrc_ptr);
- MPZ_REALLOC (rdst, len);
- rdst_ptr = PTR(rdst);
+ rdst_ptr = MPZ_REALLOC (rdst, len);
if ((plow & 1) || n == 0)
{
- /* need DECR when src==dst */
+ /* need INCR when src==dst */
if (p != rdst_ptr)
- MPN_COPY_DECR (rdst_ptr, p, len);
+ MPN_COPY_INCR (rdst_ptr, p, len);
}
else
{
void
mpq_mul_2exp (mpq_ptr dst, mpq_srcptr src, mp_bitcnt_t n)
{
- mord_2exp (mpq_numref (dst), mpq_denref (dst),
- mpq_numref (src), mpq_denref (src), n);
+ mord_2exp (NUM(dst), DEN(dst), NUM(src), DEN(src), n);
}
void
mpq_div_2exp (mpq_ptr dst, mpq_srcptr src, mp_bitcnt_t n)
{
- if (SIZ (mpq_numref(src)) == 0)
+ if (SIZ(NUM(src)) == 0)
{
- dst->_mp_num._mp_size = 0;
- dst->_mp_den._mp_size = 1;
- dst->_mp_den._mp_d[0] = 1;
+ SIZ(NUM(dst)) = 0;
+ SIZ(DEN(dst)) = 1;
+ PTR(DEN(dst))[0] = 1;
return;
}
- mord_2exp (mpq_denref (dst), mpq_numref (dst),
- mpq_denref (src), mpq_numref (src), n);
+ mord_2exp (DEN(dst), NUM(dst), DEN(src), NUM(src), n);
}
return;
}
- op1_num_size = ABS (op1->_mp_num._mp_size);
- op1_den_size = op1->_mp_den._mp_size;
- op2_num_size = ABS (op2->_mp_num._mp_size);
- op2_den_size = op2->_mp_den._mp_size;
+ op1_num_size = ABSIZ(NUM(op1));
+ op1_den_size = SIZ(DEN(op1));
+ op2_num_size = ABSIZ(NUM(op2));
+ op2_den_size = SIZ(DEN(op2));
if (op1_num_size == 0 || op2_num_size == 0)
{
/* We special case this to simplify allocation logic; gcd(0,x) = x
is a singular case for the allocations. */
- prod->_mp_num._mp_size = 0;
- prod->_mp_den._mp_d[0] = 1;
- prod->_mp_den._mp_size = 1;
+ SIZ(NUM(prod)) = 0;
+ PTR(DEN(prod))[0] = 1;
+ SIZ(DEN(prod)) = 1;
return;
}
numerator of PROD when we are finished with the numerators of OP1 and
OP2. */
- mpz_gcd (gcd1, &(op1->_mp_num), &(op2->_mp_den));
- mpz_gcd (gcd2, &(op2->_mp_num), &(op1->_mp_den));
+ mpz_gcd (gcd1, NUM(op1), DEN(op2));
+ mpz_gcd (gcd2, NUM(op2), DEN(op1));
- mpz_divexact_gcd (tmp1, &(op1->_mp_num), gcd1);
- mpz_divexact_gcd (tmp2, &(op2->_mp_num), gcd2);
+ mpz_divexact_gcd (tmp1, NUM(op1), gcd1);
+ mpz_divexact_gcd (tmp2, NUM(op2), gcd2);
- mpz_mul (&(prod->_mp_num), tmp1, tmp2);
+ mpz_mul (NUM(prod), tmp1, tmp2);
- mpz_divexact_gcd (tmp1, &(op2->_mp_den), gcd1);
- mpz_divexact_gcd (tmp2, &(op1->_mp_den), gcd2);
+ mpz_divexact_gcd (tmp1, DEN(op2), gcd1);
+ mpz_divexact_gcd (tmp2, DEN(op1), gcd2);
- mpz_mul (&(prod->_mp_den), tmp1, tmp2);
+ mpz_mul (DEN(prod), tmp1, tmp2);
TMP_FREE;
}
/* mpq_neg -- negate a rational.
-Copyright 2000, 2001 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
void
mpq_neg (mpq_ptr dst, mpq_srcptr src)
{
- mp_size_t num_size = src->_mp_num._mp_size;
+ mp_size_t num_size = SIZ(NUM(src));
if (src != dst)
{
- mp_size_t num_abs_size = ABS(num_size);
- mp_size_t den_size = src->_mp_den._mp_size;
+ mp_size_t size;
+ mp_ptr dp;
- MPZ_REALLOC (mpq_numref(dst), num_abs_size);
- MPZ_REALLOC (mpq_denref(dst), den_size);
+ size = ABS(num_size);
+ dp = MPZ_NEWALLOC (NUM(dst), size);
+ MPN_COPY (dp, PTR(NUM(src)), size);
- MPN_COPY (dst->_mp_num._mp_d, src->_mp_num._mp_d, num_abs_size);
- MPN_COPY (dst->_mp_den._mp_d, src->_mp_den._mp_d, den_size);
-
- dst->_mp_den._mp_size = den_size;
+ size = SIZ(DEN(src));
+ dp = MPZ_NEWALLOC (DEN(dst), size);
+ SIZ(DEN(dst)) = size;
+ MPN_COPY (dp, PTR(DEN(src)), size);
}
- dst->_mp_num._mp_size = -num_size;
+ SIZ(NUM(dst)) = -num_size;
}
/* mpq_set(dest,src) -- Set DEST to SRC.
-Copyright 1991, 1994, 1995, 2001 Free Software Foundation, Inc.
+Copyright 1991, 1994, 1995, 2001, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#include "gmp-impl.h"
void
-mpq_set (MP_RAT *dest, const MP_RAT *src)
+mpq_set (mpq_ptr dest, mpq_srcptr src)
{
mp_size_t num_size, den_size;
mp_size_t abs_num_size;
+ mp_ptr dp;
- num_size = src->_mp_num._mp_size;
+ num_size = SIZ(NUM(src));
abs_num_size = ABS (num_size);
- if (dest->_mp_num._mp_alloc < abs_num_size)
- _mpz_realloc (&(dest->_mp_num), abs_num_size);
- MPN_COPY (dest->_mp_num._mp_d, src->_mp_num._mp_d, abs_num_size);
- dest->_mp_num._mp_size = num_size;
-
- den_size = src->_mp_den._mp_size;
- if (dest->_mp_den._mp_alloc < den_size)
- _mpz_realloc (&(dest->_mp_den), den_size);
- MPN_COPY (dest->_mp_den._mp_d, src->_mp_den._mp_d, den_size);
- dest->_mp_den._mp_size = den_size;
+ dp = MPZ_NEWALLOC (NUM(dest), abs_num_size);
+ SIZ(NUM(dest)) = num_size;
+ MPN_COPY (dp, PTR(NUM(src)), abs_num_size);
+
+ den_size = SIZ(DEN(src));
+ dp = MPZ_NEWALLOC (DEN(dest), den_size);
+ SIZ(DEN(dest)) = den_size;
+ MPN_COPY (dp, PTR(DEN(src)), den_size);
}
/* mpq_set_d(mpq_t q, double d) -- Set q to d without rounding.
-Copyright 2000, 2002, 2003 Free Software Foundation, Inc.
+Copyright 2000, 2002, 2003, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
{
if (d == 0.0)
{
- SIZ(&(dest->_mp_num)) = 0;
- SIZ(&(dest->_mp_den)) = 1;
- PTR(&(dest->_mp_den))[0] = 1;
+ SIZ(NUM(dest)) = 0;
+ SIZ(DEN(dest)) = 1;
+ PTR(DEN(dest))[0] = 1;
return;
}
dn = -exp;
- MPZ_REALLOC (&(dest->_mp_num), 3);
- np = PTR(&(dest->_mp_num));
+ np = MPZ_NEWALLOC (NUM(dest), 3);
#if LIMBS_PER_DOUBLE == 4
if ((tp[0] | tp[1] | tp[2]) == 0)
np[0] = tp[3], nn = 1;
#endif
dn += nn + 1;
ASSERT_ALWAYS (dn > 0);
- MPZ_REALLOC (&(dest->_mp_den), dn);
- dp = PTR(&(dest->_mp_den));
+ dp = MPZ_NEWALLOC (DEN(dest), dn);
MPN_ZERO (dp, dn - 1);
dp[dn - 1] = 1;
count_trailing_zeros (c, np[0] | dp[0]);
mpn_rshift (dp, dp, dn, c);
dn -= dp[dn - 1] == 0;
}
- SIZ(&(dest->_mp_den)) = dn;
- SIZ(&(dest->_mp_num)) = negative ? -nn : nn;
+ SIZ(DEN(dest)) = dn;
+ SIZ(NUM(dest)) = negative ? -nn : nn;
}
else
{
nn = exp;
- MPZ_REALLOC (&(dest->_mp_num), nn);
- np = PTR(&(dest->_mp_num));
+ np = MPZ_NEWALLOC (NUM(dest), nn);
switch (nn)
{
default:
break;
#endif
}
- dp = PTR(&(dest->_mp_den));
+ dp = PTR(DEN(dest));
dp[0] = 1;
- SIZ(&(dest->_mp_den)) = 1;
- SIZ(&(dest->_mp_num)) = negative ? -nn : nn;
+ SIZ(DEN(dest)) = 1;
+ SIZ(NUM(dest)) = negative ? -nn : nn;
}
}
/* mpq_set_den(dest,den) -- Set the denominator of DEST from DEN.
-Copyright 1991, 1994, 1995, 1996, 2000, 2001 Free Software Foundation, Inc.
+Copyright 1991, 1994, 1995, 1996, 2000, 2001, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
#include "gmp-impl.h"
void
-mpq_set_den (MP_RAT *dest, const MP_INT *den)
+mpq_set_den (mpq_ptr dest, mpz_srcptr den)
{
- mp_size_t size = den->_mp_size;
+ mp_size_t size = SIZ (den);
mp_size_t abs_size = ABS (size);
+ mp_ptr dp;
- if (dest->_mp_den._mp_alloc < abs_size)
- _mpz_realloc (&(dest->_mp_den), abs_size);
+ dp = MPZ_NEWALLOC (DEN(dest), abs_size);
- MPN_COPY (dest->_mp_den._mp_d, den->_mp_d, abs_size);
- dest->_mp_den._mp_size = size;
+ SIZ(DEN(dest)) = size;
+ MPN_COPY (dp, PTR(den), abs_size);
}
if (fsize == 0)
{
/* set q=0 */
- q->_mp_num._mp_size = 0;
- q->_mp_den._mp_size = 1;
- q->_mp_den._mp_d[0] = 1;
+ SIZ(NUM(q)) = 0;
+ SIZ(DEN(q)) = 1;
+ PTR(DEN(q))[0] = 1;
return;
}
/* radix point is to the right of the limbs, no denominator */
mp_ptr num_ptr;
- MPZ_REALLOC (mpq_numref (q), fexp);
- num_ptr = q->_mp_num._mp_d;
+ num_ptr = MPZ_NEWALLOC (mpq_numref (q), fexp);
MPN_ZERO (num_ptr, fexp - abs_fsize);
MPN_COPY (num_ptr + fexp - abs_fsize, fptr, abs_fsize);
- q->_mp_num._mp_size = fsize >= 0 ? fexp : -fexp;
- q->_mp_den._mp_size = 1;
- q->_mp_den._mp_d[0] = 1;
+ SIZ(NUM(q)) = fsize >= 0 ? fexp : -fexp;
+ SIZ(DEN(q)) = 1;
+ PTR(DEN(q))[0] = 1;
}
else
{
mp_size_t den_size;
den_size = abs_fsize - fexp;
- MPZ_REALLOC (mpq_numref (q), abs_fsize);
- MPZ_REALLOC (mpq_denref (q), den_size+1);
- num_ptr = q->_mp_num._mp_d;
- den_ptr = q->_mp_den._mp_d;
+ num_ptr = MPZ_NEWALLOC (mpq_numref (q), abs_fsize);
+ den_ptr = MPZ_NEWALLOC (mpq_denref (q), den_size+1);
if (flow & 1)
{
den_ptr[den_size] = GMP_LIMB_HIGHBIT >> (shift-1);
}
- q->_mp_num._mp_size = fsize >= 0 ? abs_fsize : -abs_fsize;
- q->_mp_den._mp_size = den_size + 1;
+ SIZ(NUM(q)) = fsize >= 0 ? abs_fsize : -abs_fsize;
+ SIZ(DEN(q)) = den_size + 1;
}
}
/* mpq_set_num(dest,num) -- Set the numerator of DEST from NUM.
-Copyright 1991, 1994, 1995, 2001 Free Software Foundation, Inc.
+Copyright 1991, 1994, 1995, 2001, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#include "gmp-impl.h"
void
-mpq_set_num (MP_RAT *dest, const MP_INT *num)
+mpq_set_num (mpq_ptr dest, mpz_srcptr num)
{
- mp_size_t size = num->_mp_size;
+ mp_size_t size = SIZ (num);
mp_size_t abs_size = ABS (size);
+ mp_ptr dp;
- if (dest->_mp_num._mp_alloc < abs_size)
- _mpz_realloc (&(dest->_mp_num), abs_size);
+ dp = MPZ_NEWALLOC (NUM(dest), abs_size);
- MPN_COPY (dest->_mp_num._mp_d, num->_mp_d, abs_size);
- dest->_mp_num._mp_size = size;
+ SIZ(NUM(dest)) = size;
+ MPN_COPY (dp, PTR(num), abs_size);
}
{
/* Canonicalize 0/d to 0/1. */
den = 1;
- dest->_mp_num._mp_size = 0;
+ SIZ(NUM(dest)) = 0;
}
else
{
- dest->_mp_num._mp_d[0] = abs_num;
- dest->_mp_num._mp_size = num > 0 ? 1 : -1;
+ PTR(NUM(dest))[0] = abs_num;
+ SIZ(NUM(dest)) = num > 0 ? 1 : -1;
}
- dest->_mp_den._mp_d[0] = den;
- dest->_mp_den._mp_size = (den != 0);
+ PTR(DEN(dest))[0] = den;
+ SIZ(DEN(dest)) = (den != 0);
}
slash = strchr (str, '/');
if (slash == NULL)
{
- q->_mp_den._mp_size = 1;
- q->_mp_den._mp_d[0] = 1;
+ SIZ(DEN(q)) = 1;
+ PTR(DEN(q))[0] = 1;
return mpz_set_str (mpq_numref(q), str, base);
}
{
/* Canonicalize 0/n to 0/1. */
den = 1;
- dest->_mp_num._mp_size = 0;
+ SIZ(NUM(dest)) = 0;
}
else
{
- dest->_mp_num._mp_d[0] = num;
- dest->_mp_num._mp_size = 1;
+ PTR(NUM(dest))[0] = num;
+ SIZ(NUM(dest)) = 1;
}
- dest->_mp_den._mp_d[0] = den;
- dest->_mp_den._mp_size = (den != 0);
+ PTR(DEN(dest))[0] = den;
+ SIZ(DEN(dest)) = (den != 0);
}
/* mpq_set_z (dest,src) -- Set DEST to SRC.
-Copyright 1996, 2001 Free Software Foundation, Inc.
+Copyright 1996, 2001, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
{
mp_size_t num_size;
mp_size_t abs_num_size;
+ mp_ptr dp;
- num_size = src->_mp_size;
+ num_size = SIZ (src);
abs_num_size = ABS (num_size);
- if (dest->_mp_num._mp_alloc < abs_num_size)
- _mpz_realloc (&(dest->_mp_num), abs_num_size);
- MPN_COPY (dest->_mp_num._mp_d, src->_mp_d, abs_num_size);
- dest->_mp_num._mp_size = num_size;
+ dp = MPZ_NEWALLOC (NUM(dest), abs_num_size);
+ SIZ(NUM(dest)) = num_size;
+ MPN_COPY (dp, PTR(src), abs_num_size);
- dest->_mp_den._mp_d[0] = 1;
- dest->_mp_den._mp_size = 1;
+ PTR(DEN(dest))[0] = 1;
+ SIZ(DEN(dest)) = 1;
}
mp_size_t usize, vsize;
mp_size_t ualloc, valloc;
- ualloc = u->_mp_num._mp_alloc;
- valloc = v->_mp_num._mp_alloc;
- v->_mp_num._mp_alloc = ualloc;
- u->_mp_num._mp_alloc = valloc;
-
- usize = u->_mp_num._mp_size;
- vsize = v->_mp_num._mp_size;
- v->_mp_num._mp_size = usize;
- u->_mp_num._mp_size = vsize;
-
- up = u->_mp_num._mp_d;
- vp = v->_mp_num._mp_d;
- v->_mp_num._mp_d = up;
- u->_mp_num._mp_d = vp;
-
-
- ualloc = u->_mp_den._mp_alloc;
- valloc = v->_mp_den._mp_alloc;
- v->_mp_den._mp_alloc = ualloc;
- u->_mp_den._mp_alloc = valloc;
-
- usize = u->_mp_den._mp_size;
- vsize = v->_mp_den._mp_size;
- v->_mp_den._mp_size = usize;
- u->_mp_den._mp_size = vsize;
-
- up = u->_mp_den._mp_d;
- vp = v->_mp_den._mp_d;
- v->_mp_den._mp_d = up;
- u->_mp_den._mp_d = vp;
+ ualloc = ALLOC(NUM(u));
+ valloc = ALLOC(NUM(v));
+ ALLOC(NUM(v)) = ualloc;
+ ALLOC(NUM(u)) = valloc;
+
+ usize = SIZ(NUM(u));
+ vsize = SIZ(NUM(v));
+ SIZ(NUM(v)) = usize;
+ SIZ(NUM(u)) = vsize;
+
+ up = PTR(NUM(u));
+ vp = PTR(NUM(v));
+ PTR(NUM(v)) = up;
+ PTR(NUM(u)) = vp;
+
+
+ ualloc = ALLOC(DEN(u));
+ valloc = ALLOC(DEN(v));
+ ALLOC(DEN(v)) = ualloc;
+ ALLOC(DEN(u)) = valloc;
+
+ usize = SIZ(DEN(u));
+ vsize = SIZ(DEN(v));
+ SIZ(DEN(v)) = usize;
+ SIZ(DEN(u)) = vsize;
+
+ up = PTR(DEN(u));
+ vp = PTR(DEN(v));
+ PTR(DEN(v)) = up;
+ PTR(DEN(u)) = vp;
}
--- /dev/null
+/* mpz_2fac_ui(RESULT, N) -- Set RESULT to N!!.
+
+Contributed to the GNU project by Marco Bodrato.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \
+ do { \
+ if ((PR) > (MAX_PR)) { \
+ (VEC)[(I)++] = (PR); \
+ (PR) = (P); \
+ } else \
+ (PR) *= (P); \
+ } while (0)
+
+#define FAC_2DSC_THRESHOLD ((FAC_DSC_THRESHOLD << 1) | (FAC_DSC_THRESHOLD & 1))
+#define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_2DSC_THRESHOLD-1)+1))
+
+/* Computes n!!, the 2-multi-factorial of n. (aka double-factorial or semi-factorial)
+ WARNING: it assumes that n fits in a limb!
+ */
+void
+mpz_2fac_ui (mpz_ptr x, unsigned long n)
+{
+ ASSERT (n <= GMP_NUMB_MAX);
+
+ if ((n & 1) == 0) { /* n is even, n = 2k, (2k)!! = k! 2^k */
+ mp_limb_t count;
+
+ if ((n <= TABLE_LIMIT_2N_MINUS_POPC_2N) & (n != 0))
+ count = __gmp_fac2cnt_table[n / 2 - 1];
+ else
+ {
+ popc_limb (count, n); /* popc(n) == popc(k) */
+ count = n - count; /* n - popc(n) == k + k - popc(k) */
+ }
+ mpz_oddfac_1 (x, n >> 1, 0);
+ mpz_mul_2exp (x, x, count);
+ } else { /* n is odd */
+ if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT) {
+ PTR (x)[0] = __gmp_odd2fac_table[n >> 1];
+ SIZ (x) = 1;
+ } else if (BELOW_THRESHOLD (n, FAC_2DSC_THRESHOLD)) { /* odd basecase, */
+ mp_limb_t *factors, prod, max_prod, j;
+ TMP_SDECL;
+
+ /* FIXME: we might alloc a fixed ammount 1+FAC_2DSC_THRESHOLD/FACTORS_PER_LIMB */
+ TMP_SMARK;
+ factors = TMP_SALLOC_LIMBS (1 + n / (2 * FACTORS_PER_LIMB));
+
+ factors[0] = ODD_DOUBLEFACTORIAL_TABLE_MAX;
+ j = 1;
+ prod = n;
+
+ max_prod = GMP_NUMB_MAX / FAC_2DSC_THRESHOLD;
+ while ((n -= 2) > ODD_DOUBLEFACTORIAL_TABLE_LIMIT)
+ FACTOR_LIST_STORE (n, prod, max_prod, factors, j);
+
+ factors[j++] = prod;
+ mpz_prodlimbs (x, factors, j);
+
+ TMP_SFREE;
+ } else { /* for the asymptotically fast odd case, let oddfac do the job. */
+ mpz_oddfac_1 (x, n, 1);
+ }
+ }
+}
+
+#undef FACTORS_PER_LIMB
+#undef FACTOR_LIST_STORE
+#undef FAC_2DSC_THRESHOLD
## Process this file with automake to generate Makefile.in
-# Copyright 1996, 1998, 1999, 2000, 2001, 2002, 2003 Free Software
+# Copyright 1996, 1998, 1999, 2000, 2001, 2002, 2003, 2012 Free Software
# Foundation, Inc.
#
# This file is part of the GNU MP Library.
noinst_LTLIBRARIES = libmpz.la
libmpz_la_SOURCES = aors.h aors_ui.h fits_s.h mul_i.h \
+ 2fac_ui.c \
add.c add_ui.c abs.c aorsmul.c aorsmul_i.c and.c array_init.c \
bin_ui.c bin_uiui.c cdiv_q.c \
cdiv_q_ui.c cdiv_qr.c cdiv_qr_ui.c cdiv_r.c cdiv_r_ui.c cdiv_ui.c \
import.c init.c init2.c inits.c inp_raw.c inp_str.c \
invert.c ior.c iset.c iset_d.c iset_si.c iset_str.c iset_ui.c \
jacobi.c kronsz.c kronuz.c kronzs.c kronzu.c \
- lcm.c lcm_ui.c lucnum_ui.c lucnum2_ui.c millerrabin.c \
+ lcm.c lcm_ui.c lucnum_ui.c lucnum2_ui.c mfac_uiui.c millerrabin.c \
mod.c mul.c mul_2exp.c mul_si.c mul_ui.c n_pow_ui.c neg.c nextprime.c \
+ oddfac_1.c \
out_raw.c out_str.c perfpow.c perfsqr.c popcount.c pow_ui.c powm.c \
- powm_sec.c powm_ui.c pprime_p.c random.c random2.c \
+ powm_sec.c powm_ui.c pprime_p.c prodlimbs.c primorial_ui.c random.c random2.c \
realloc.c realloc2.c remove.c root.c rootrem.c rrandomb.c \
scan0.c scan1.c set.c set_d.c set_f.c set_q.c set_si.c set_str.c \
set_ui.c setbit.c size.c sizeinbase.c sqrt.c sqrtrem.c sub.c sub_ui.c \
swap.c tdiv_ui.c tdiv_q.c tdiv_q_2exp.c tdiv_q_ui.c tdiv_qr.c \
tdiv_qr_ui.c tdiv_r.c tdiv_r_2exp.c tdiv_r_ui.c tstbit.c ui_pow_ui.c \
ui_sub.c urandomb.c urandomm.c xor.c
-
-# These are BUILT_SOURCES at the top-level, so normally they're built before
-# recursing into this directory.
-#
-fac_ui.h:
- cd ..; $(MAKE) $(AM_MAKEFLAGS) mpz/fac_ui.h
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@SET_MAKE@
-# Copyright 1996, 1998, 1999, 2000, 2001, 2002, 2003 Free Software
+# Copyright 1996, 1998, 1999, 2000, 2001, 2002, 2003, 2012 Free Software
# Foundation, Inc.
#
# This file is part of the GNU MP Library.
# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
VPATH = @srcdir@
+am__make_dryrun = \
+ { \
+ am__dry=no; \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
+ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+ *) \
+ for am__flg in $$MAKEFLAGS; do \
+ case $$am__flg in \
+ *=*|--*) ;; \
+ *n*) am__dry=yes; break;; \
+ esac; \
+ done;; \
+ esac; \
+ test $$am__dry = yes; \
+ }
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
subdir = mpz
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/configure.in
+ $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_CLEAN_VPATH_FILES =
LTLIBRARIES = $(noinst_LTLIBRARIES)
libmpz_la_LIBADD =
-am_libmpz_la_OBJECTS = add$U.lo add_ui$U.lo abs$U.lo aorsmul$U.lo \
- aorsmul_i$U.lo and$U.lo array_init$U.lo bin_ui$U.lo \
- bin_uiui$U.lo cdiv_q$U.lo cdiv_q_ui$U.lo cdiv_qr$U.lo \
- cdiv_qr_ui$U.lo cdiv_r$U.lo cdiv_r_ui$U.lo cdiv_ui$U.lo \
- cfdiv_q_2exp$U.lo cfdiv_r_2exp$U.lo clear$U.lo clears$U.lo \
- clrbit$U.lo cmp$U.lo cmp_d$U.lo cmp_si$U.lo cmp_ui$U.lo \
- cmpabs$U.lo cmpabs_d$U.lo cmpabs_ui$U.lo com$U.lo combit$U.lo \
- cong$U.lo cong_2exp$U.lo cong_ui$U.lo divexact$U.lo \
- divegcd$U.lo dive_ui$U.lo divis$U.lo divis_ui$U.lo \
- divis_2exp$U.lo dump$U.lo export$U.lo fac_ui$U.lo fdiv_q$U.lo \
- fdiv_q_ui$U.lo fdiv_qr$U.lo fdiv_qr_ui$U.lo fdiv_r$U.lo \
- fdiv_r_ui$U.lo fdiv_ui$U.lo fib_ui$U.lo fib2_ui$U.lo \
- fits_sint$U.lo fits_slong$U.lo fits_sshort$U.lo fits_uint$U.lo \
- fits_ulong$U.lo fits_ushort$U.lo gcd$U.lo gcd_ui$U.lo \
- gcdext$U.lo get_d$U.lo get_d_2exp$U.lo get_si$U.lo \
- get_str$U.lo get_ui$U.lo getlimbn$U.lo hamdist$U.lo \
- import$U.lo init$U.lo init2$U.lo inits$U.lo inp_raw$U.lo \
- inp_str$U.lo invert$U.lo ior$U.lo iset$U.lo iset_d$U.lo \
- iset_si$U.lo iset_str$U.lo iset_ui$U.lo jacobi$U.lo \
- kronsz$U.lo kronuz$U.lo kronzs$U.lo kronzu$U.lo lcm$U.lo \
- lcm_ui$U.lo lucnum_ui$U.lo lucnum2_ui$U.lo millerrabin$U.lo \
- mod$U.lo mul$U.lo mul_2exp$U.lo mul_si$U.lo mul_ui$U.lo \
- n_pow_ui$U.lo neg$U.lo nextprime$U.lo out_raw$U.lo \
- out_str$U.lo perfpow$U.lo perfsqr$U.lo popcount$U.lo \
- pow_ui$U.lo powm$U.lo powm_sec$U.lo powm_ui$U.lo pprime_p$U.lo \
- random$U.lo random2$U.lo realloc$U.lo realloc2$U.lo \
- remove$U.lo root$U.lo rootrem$U.lo rrandomb$U.lo scan0$U.lo \
- scan1$U.lo set$U.lo set_d$U.lo set_f$U.lo set_q$U.lo \
- set_si$U.lo set_str$U.lo set_ui$U.lo setbit$U.lo size$U.lo \
- sizeinbase$U.lo sqrt$U.lo sqrtrem$U.lo sub$U.lo sub_ui$U.lo \
- swap$U.lo tdiv_ui$U.lo tdiv_q$U.lo tdiv_q_2exp$U.lo \
- tdiv_q_ui$U.lo tdiv_qr$U.lo tdiv_qr_ui$U.lo tdiv_r$U.lo \
- tdiv_r_2exp$U.lo tdiv_r_ui$U.lo tstbit$U.lo ui_pow_ui$U.lo \
- ui_sub$U.lo urandomb$U.lo urandomm$U.lo xor$U.lo
+am_libmpz_la_OBJECTS = 2fac_ui.lo add.lo add_ui.lo abs.lo aorsmul.lo \
+ aorsmul_i.lo and.lo array_init.lo bin_ui.lo bin_uiui.lo \
+ cdiv_q.lo cdiv_q_ui.lo cdiv_qr.lo cdiv_qr_ui.lo cdiv_r.lo \
+ cdiv_r_ui.lo cdiv_ui.lo cfdiv_q_2exp.lo cfdiv_r_2exp.lo \
+ clear.lo clears.lo clrbit.lo cmp.lo cmp_d.lo cmp_si.lo \
+ cmp_ui.lo cmpabs.lo cmpabs_d.lo cmpabs_ui.lo com.lo combit.lo \
+ cong.lo cong_2exp.lo cong_ui.lo divexact.lo divegcd.lo \
+ dive_ui.lo divis.lo divis_ui.lo divis_2exp.lo dump.lo \
+ export.lo fac_ui.lo fdiv_q.lo fdiv_q_ui.lo fdiv_qr.lo \
+ fdiv_qr_ui.lo fdiv_r.lo fdiv_r_ui.lo fdiv_ui.lo fib_ui.lo \
+ fib2_ui.lo fits_sint.lo fits_slong.lo fits_sshort.lo \
+ fits_uint.lo fits_ulong.lo fits_ushort.lo gcd.lo gcd_ui.lo \
+ gcdext.lo get_d.lo get_d_2exp.lo get_si.lo get_str.lo \
+ get_ui.lo getlimbn.lo hamdist.lo import.lo init.lo init2.lo \
+ inits.lo inp_raw.lo inp_str.lo invert.lo ior.lo iset.lo \
+ iset_d.lo iset_si.lo iset_str.lo iset_ui.lo jacobi.lo \
+ kronsz.lo kronuz.lo kronzs.lo kronzu.lo lcm.lo lcm_ui.lo \
+ lucnum_ui.lo lucnum2_ui.lo mfac_uiui.lo millerrabin.lo mod.lo \
+ mul.lo mul_2exp.lo mul_si.lo mul_ui.lo n_pow_ui.lo neg.lo \
+ nextprime.lo oddfac_1.lo out_raw.lo out_str.lo perfpow.lo \
+ perfsqr.lo popcount.lo pow_ui.lo powm.lo powm_sec.lo \
+ powm_ui.lo pprime_p.lo prodlimbs.lo primorial_ui.lo random.lo \
+ random2.lo realloc.lo realloc2.lo remove.lo root.lo rootrem.lo \
+ rrandomb.lo scan0.lo scan1.lo set.lo set_d.lo set_f.lo \
+ set_q.lo set_si.lo set_str.lo set_ui.lo setbit.lo size.lo \
+ sizeinbase.lo sqrt.lo sqrtrem.lo sub.lo sub_ui.lo swap.lo \
+ tdiv_ui.lo tdiv_q.lo tdiv_q_2exp.lo tdiv_q_ui.lo tdiv_qr.lo \
+ tdiv_qr_ui.lo tdiv_r.lo tdiv_r_2exp.lo tdiv_r_ui.lo tstbit.lo \
+ ui_pow_ui.lo ui_sub.lo urandomb.lo urandomm.lo xor.lo
libmpz_la_OBJECTS = $(am_libmpz_la_OBJECTS)
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
depcomp =
$(LDFLAGS) -o $@
SOURCES = $(libmpz_la_SOURCES)
DIST_SOURCES = $(libmpz_la_SOURCES)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
STRIP = @STRIP@
TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
U_FOR_BUILD = @U_FOR_BUILD@
VERSION = @VERSION@
WITH_READLINE_01 = @WITH_READLINE_01@
mkdir_p = @mkdir_p@
mpn_objects = @mpn_objects@
mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
INCLUDES = -D__GMP_WITHIN_GMP -I$(top_srcdir)
noinst_LTLIBRARIES = libmpz.la
libmpz_la_SOURCES = aors.h aors_ui.h fits_s.h mul_i.h \
+ 2fac_ui.c \
add.c add_ui.c abs.c aorsmul.c aorsmul_i.c and.c array_init.c \
bin_ui.c bin_uiui.c cdiv_q.c \
cdiv_q_ui.c cdiv_qr.c cdiv_qr_ui.c cdiv_r.c cdiv_r_ui.c cdiv_ui.c \
import.c init.c init2.c inits.c inp_raw.c inp_str.c \
invert.c ior.c iset.c iset_d.c iset_si.c iset_str.c iset_ui.c \
jacobi.c kronsz.c kronuz.c kronzs.c kronzu.c \
- lcm.c lcm_ui.c lucnum_ui.c lucnum2_ui.c millerrabin.c \
+ lcm.c lcm_ui.c lucnum_ui.c lucnum2_ui.c mfac_uiui.c millerrabin.c \
mod.c mul.c mul_2exp.c mul_si.c mul_ui.c n_pow_ui.c neg.c nextprime.c \
+ oddfac_1.c \
out_raw.c out_str.c perfpow.c perfsqr.c popcount.c pow_ui.c powm.c \
- powm_sec.c powm_ui.c pprime_p.c random.c random2.c \
+ powm_sec.c powm_ui.c pprime_p.c prodlimbs.c primorial_ui.c random.c random2.c \
realloc.c realloc2.c remove.c root.c rootrem.c rrandomb.c \
scan0.c scan1.c set.c set_d.c set_f.c set_q.c set_si.c set_str.c \
set_ui.c setbit.c size.c sizeinbase.c sqrt.c sqrtrem.c sub.c sub_ui.c \
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-libmpz.la: $(libmpz_la_OBJECTS) $(libmpz_la_DEPENDENCIES)
+libmpz.la: $(libmpz_la_OBJECTS) $(libmpz_la_DEPENDENCIES) $(EXTRA_libmpz_la_DEPENDENCIES)
$(LINK) $(libmpz_la_OBJECTS) $(libmpz_la_LIBADD) $(LIBS)
mostlyclean-compile:
distclean-compile:
-rm -f *.tab.c
-$(top_builddir)/ansi2knr:
- $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
- -test "$U" = "" || rm -f *_.c
.c.o:
$(COMPILE) -c $<
.c.lo:
$(LTCOMPILE) -c -o $@ $<
-abs_.c: abs.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/abs.c; then echo $(srcdir)/abs.c; else echo abs.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-add_.c: add.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/add.c; then echo $(srcdir)/add.c; else echo add.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-add_ui_.c: add_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/add_ui.c; then echo $(srcdir)/add_ui.c; else echo add_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-and_.c: and.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/and.c; then echo $(srcdir)/and.c; else echo and.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-aorsmul_.c: aorsmul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/aorsmul.c; then echo $(srcdir)/aorsmul.c; else echo aorsmul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-aorsmul_i_.c: aorsmul_i.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/aorsmul_i.c; then echo $(srcdir)/aorsmul_i.c; else echo aorsmul_i.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-array_init_.c: array_init.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/array_init.c; then echo $(srcdir)/array_init.c; else echo array_init.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-bin_ui_.c: bin_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/bin_ui.c; then echo $(srcdir)/bin_ui.c; else echo bin_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-bin_uiui_.c: bin_uiui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/bin_uiui.c; then echo $(srcdir)/bin_uiui.c; else echo bin_uiui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cdiv_q_.c: cdiv_q.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cdiv_q.c; then echo $(srcdir)/cdiv_q.c; else echo cdiv_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cdiv_q_ui_.c: cdiv_q_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cdiv_q_ui.c; then echo $(srcdir)/cdiv_q_ui.c; else echo cdiv_q_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cdiv_qr_.c: cdiv_qr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cdiv_qr.c; then echo $(srcdir)/cdiv_qr.c; else echo cdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cdiv_qr_ui_.c: cdiv_qr_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cdiv_qr_ui.c; then echo $(srcdir)/cdiv_qr_ui.c; else echo cdiv_qr_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cdiv_r_.c: cdiv_r.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cdiv_r.c; then echo $(srcdir)/cdiv_r.c; else echo cdiv_r.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cdiv_r_ui_.c: cdiv_r_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cdiv_r_ui.c; then echo $(srcdir)/cdiv_r_ui.c; else echo cdiv_r_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cdiv_ui_.c: cdiv_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cdiv_ui.c; then echo $(srcdir)/cdiv_ui.c; else echo cdiv_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cfdiv_q_2exp_.c: cfdiv_q_2exp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cfdiv_q_2exp.c; then echo $(srcdir)/cfdiv_q_2exp.c; else echo cfdiv_q_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cfdiv_r_2exp_.c: cfdiv_r_2exp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cfdiv_r_2exp.c; then echo $(srcdir)/cfdiv_r_2exp.c; else echo cfdiv_r_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-clear_.c: clear.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/clear.c; then echo $(srcdir)/clear.c; else echo clear.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-clears_.c: clears.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/clears.c; then echo $(srcdir)/clears.c; else echo clears.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-clrbit_.c: clrbit.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/clrbit.c; then echo $(srcdir)/clrbit.c; else echo clrbit.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmp_.c: cmp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp.c; then echo $(srcdir)/cmp.c; else echo cmp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmp_d_.c: cmp_d.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp_d.c; then echo $(srcdir)/cmp_d.c; else echo cmp_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmp_si_.c: cmp_si.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp_si.c; then echo $(srcdir)/cmp_si.c; else echo cmp_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmp_ui_.c: cmp_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmp_ui.c; then echo $(srcdir)/cmp_ui.c; else echo cmp_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmpabs_.c: cmpabs.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmpabs.c; then echo $(srcdir)/cmpabs.c; else echo cmpabs.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmpabs_d_.c: cmpabs_d.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmpabs_d.c; then echo $(srcdir)/cmpabs_d.c; else echo cmpabs_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cmpabs_ui_.c: cmpabs_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cmpabs_ui.c; then echo $(srcdir)/cmpabs_ui.c; else echo cmpabs_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-com_.c: com.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/com.c; then echo $(srcdir)/com.c; else echo com.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-combit_.c: combit.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/combit.c; then echo $(srcdir)/combit.c; else echo combit.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cong_.c: cong.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cong.c; then echo $(srcdir)/cong.c; else echo cong.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cong_2exp_.c: cong_2exp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cong_2exp.c; then echo $(srcdir)/cong_2exp.c; else echo cong_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-cong_ui_.c: cong_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/cong_ui.c; then echo $(srcdir)/cong_ui.c; else echo cong_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dive_ui_.c: dive_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dive_ui.c; then echo $(srcdir)/dive_ui.c; else echo dive_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divegcd_.c: divegcd.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divegcd.c; then echo $(srcdir)/divegcd.c; else echo divegcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divexact_.c: divexact.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divexact.c; then echo $(srcdir)/divexact.c; else echo divexact.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divis_.c: divis.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divis.c; then echo $(srcdir)/divis.c; else echo divis.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divis_2exp_.c: divis_2exp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divis_2exp.c; then echo $(srcdir)/divis_2exp.c; else echo divis_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divis_ui_.c: divis_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divis_ui.c; then echo $(srcdir)/divis_ui.c; else echo divis_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dump_.c: dump.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dump.c; then echo $(srcdir)/dump.c; else echo dump.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-export_.c: export.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/export.c; then echo $(srcdir)/export.c; else echo export.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fac_ui_.c: fac_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fac_ui.c; then echo $(srcdir)/fac_ui.c; else echo fac_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fdiv_q_.c: fdiv_q.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fdiv_q.c; then echo $(srcdir)/fdiv_q.c; else echo fdiv_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fdiv_q_ui_.c: fdiv_q_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fdiv_q_ui.c; then echo $(srcdir)/fdiv_q_ui.c; else echo fdiv_q_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fdiv_qr_.c: fdiv_qr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fdiv_qr.c; then echo $(srcdir)/fdiv_qr.c; else echo fdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fdiv_qr_ui_.c: fdiv_qr_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fdiv_qr_ui.c; then echo $(srcdir)/fdiv_qr_ui.c; else echo fdiv_qr_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fdiv_r_.c: fdiv_r.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fdiv_r.c; then echo $(srcdir)/fdiv_r.c; else echo fdiv_r.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fdiv_r_ui_.c: fdiv_r_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fdiv_r_ui.c; then echo $(srcdir)/fdiv_r_ui.c; else echo fdiv_r_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fdiv_ui_.c: fdiv_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fdiv_ui.c; then echo $(srcdir)/fdiv_ui.c; else echo fdiv_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fib2_ui_.c: fib2_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fib2_ui.c; then echo $(srcdir)/fib2_ui.c; else echo fib2_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fib_ui_.c: fib_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fib_ui.c; then echo $(srcdir)/fib_ui.c; else echo fib_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fits_sint_.c: fits_sint.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_sint.c; then echo $(srcdir)/fits_sint.c; else echo fits_sint.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fits_slong_.c: fits_slong.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_slong.c; then echo $(srcdir)/fits_slong.c; else echo fits_slong.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fits_sshort_.c: fits_sshort.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_sshort.c; then echo $(srcdir)/fits_sshort.c; else echo fits_sshort.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fits_uint_.c: fits_uint.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_uint.c; then echo $(srcdir)/fits_uint.c; else echo fits_uint.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fits_ulong_.c: fits_ulong.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_ulong.c; then echo $(srcdir)/fits_ulong.c; else echo fits_ulong.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fits_ushort_.c: fits_ushort.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fits_ushort.c; then echo $(srcdir)/fits_ushort.c; else echo fits_ushort.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gcd_.c: gcd.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcd.c; then echo $(srcdir)/gcd.c; else echo gcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gcd_ui_.c: gcd_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcd_ui.c; then echo $(srcdir)/gcd_ui.c; else echo gcd_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gcdext_.c: gcdext.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdext.c; then echo $(srcdir)/gcdext.c; else echo gcdext.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_d_.c: get_d.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_d.c; then echo $(srcdir)/get_d.c; else echo get_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_d_2exp_.c: get_d_2exp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_d_2exp.c; then echo $(srcdir)/get_d_2exp.c; else echo get_d_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_si_.c: get_si.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_si.c; then echo $(srcdir)/get_si.c; else echo get_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_str_.c: get_str.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_str.c; then echo $(srcdir)/get_str.c; else echo get_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_ui_.c: get_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_ui.c; then echo $(srcdir)/get_ui.c; else echo get_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-getlimbn_.c: getlimbn.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/getlimbn.c; then echo $(srcdir)/getlimbn.c; else echo getlimbn.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-hamdist_.c: hamdist.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/hamdist.c; then echo $(srcdir)/hamdist.c; else echo hamdist.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-import_.c: import.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/import.c; then echo $(srcdir)/import.c; else echo import.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-init_.c: init.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/init.c; then echo $(srcdir)/init.c; else echo init.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-init2_.c: init2.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/init2.c; then echo $(srcdir)/init2.c; else echo init2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-inits_.c: inits.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/inits.c; then echo $(srcdir)/inits.c; else echo inits.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-inp_raw_.c: inp_raw.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/inp_raw.c; then echo $(srcdir)/inp_raw.c; else echo inp_raw.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-inp_str_.c: inp_str.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/inp_str.c; then echo $(srcdir)/inp_str.c; else echo inp_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-invert_.c: invert.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/invert.c; then echo $(srcdir)/invert.c; else echo invert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-ior_.c: ior.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/ior.c; then echo $(srcdir)/ior.c; else echo ior.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-iset_.c: iset.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset.c; then echo $(srcdir)/iset.c; else echo iset.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-iset_d_.c: iset_d.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset_d.c; then echo $(srcdir)/iset_d.c; else echo iset_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-iset_si_.c: iset_si.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset_si.c; then echo $(srcdir)/iset_si.c; else echo iset_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-iset_str_.c: iset_str.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset_str.c; then echo $(srcdir)/iset_str.c; else echo iset_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-iset_ui_.c: iset_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/iset_ui.c; then echo $(srcdir)/iset_ui.c; else echo iset_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-jacobi_.c: jacobi.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/jacobi.c; then echo $(srcdir)/jacobi.c; else echo jacobi.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-kronsz_.c: kronsz.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/kronsz.c; then echo $(srcdir)/kronsz.c; else echo kronsz.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-kronuz_.c: kronuz.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/kronuz.c; then echo $(srcdir)/kronuz.c; else echo kronuz.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-kronzs_.c: kronzs.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/kronzs.c; then echo $(srcdir)/kronzs.c; else echo kronzs.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-kronzu_.c: kronzu.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/kronzu.c; then echo $(srcdir)/kronzu.c; else echo kronzu.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-lcm_.c: lcm.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/lcm.c; then echo $(srcdir)/lcm.c; else echo lcm.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-lcm_ui_.c: lcm_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/lcm_ui.c; then echo $(srcdir)/lcm_ui.c; else echo lcm_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-lucnum2_ui_.c: lucnum2_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/lucnum2_ui.c; then echo $(srcdir)/lucnum2_ui.c; else echo lucnum2_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-lucnum_ui_.c: lucnum_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/lucnum_ui.c; then echo $(srcdir)/lucnum_ui.c; else echo lucnum_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-millerrabin_.c: millerrabin.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/millerrabin.c; then echo $(srcdir)/millerrabin.c; else echo millerrabin.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mod_.c: mod.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod.c; then echo $(srcdir)/mod.c; else echo mod.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_.c: mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul.c; then echo $(srcdir)/mul.c; else echo mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_2exp_.c: mul_2exp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_2exp.c; then echo $(srcdir)/mul_2exp.c; else echo mul_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_si_.c: mul_si.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_si.c; then echo $(srcdir)/mul_si.c; else echo mul_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_ui_.c: mul_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_ui.c; then echo $(srcdir)/mul_ui.c; else echo mul_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-n_pow_ui_.c: n_pow_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/n_pow_ui.c; then echo $(srcdir)/n_pow_ui.c; else echo n_pow_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-neg_.c: neg.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/neg.c; then echo $(srcdir)/neg.c; else echo neg.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-nextprime_.c: nextprime.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/nextprime.c; then echo $(srcdir)/nextprime.c; else echo nextprime.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-out_raw_.c: out_raw.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/out_raw.c; then echo $(srcdir)/out_raw.c; else echo out_raw.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-out_str_.c: out_str.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/out_str.c; then echo $(srcdir)/out_str.c; else echo out_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-perfpow_.c: perfpow.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/perfpow.c; then echo $(srcdir)/perfpow.c; else echo perfpow.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-perfsqr_.c: perfsqr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/perfsqr.c; then echo $(srcdir)/perfsqr.c; else echo perfsqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-popcount_.c: popcount.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/popcount.c; then echo $(srcdir)/popcount.c; else echo popcount.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-pow_ui_.c: pow_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pow_ui.c; then echo $(srcdir)/pow_ui.c; else echo pow_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-powm_.c: powm.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/powm.c; then echo $(srcdir)/powm.c; else echo powm.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-powm_sec_.c: powm_sec.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/powm_sec.c; then echo $(srcdir)/powm_sec.c; else echo powm_sec.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-powm_ui_.c: powm_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/powm_ui.c; then echo $(srcdir)/powm_ui.c; else echo powm_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-pprime_p_.c: pprime_p.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pprime_p.c; then echo $(srcdir)/pprime_p.c; else echo pprime_p.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-random_.c: random.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/random.c; then echo $(srcdir)/random.c; else echo random.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-random2_.c: random2.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/random2.c; then echo $(srcdir)/random2.c; else echo random2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-realloc_.c: realloc.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/realloc.c; then echo $(srcdir)/realloc.c; else echo realloc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-realloc2_.c: realloc2.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/realloc2.c; then echo $(srcdir)/realloc2.c; else echo realloc2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-remove_.c: remove.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/remove.c; then echo $(srcdir)/remove.c; else echo remove.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-root_.c: root.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/root.c; then echo $(srcdir)/root.c; else echo root.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-rootrem_.c: rootrem.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/rootrem.c; then echo $(srcdir)/rootrem.c; else echo rootrem.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-rrandomb_.c: rrandomb.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/rrandomb.c; then echo $(srcdir)/rrandomb.c; else echo rrandomb.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-scan0_.c: scan0.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/scan0.c; then echo $(srcdir)/scan0.c; else echo scan0.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-scan1_.c: scan1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/scan1.c; then echo $(srcdir)/scan1.c; else echo scan1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_.c: set.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set.c; then echo $(srcdir)/set.c; else echo set.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_d_.c: set_d.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_d.c; then echo $(srcdir)/set_d.c; else echo set_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_f_.c: set_f.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_f.c; then echo $(srcdir)/set_f.c; else echo set_f.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_q_.c: set_q.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_q.c; then echo $(srcdir)/set_q.c; else echo set_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_si_.c: set_si.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_si.c; then echo $(srcdir)/set_si.c; else echo set_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_str_.c: set_str.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_str.c; then echo $(srcdir)/set_str.c; else echo set_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_ui_.c: set_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_ui.c; then echo $(srcdir)/set_ui.c; else echo set_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-setbit_.c: setbit.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/setbit.c; then echo $(srcdir)/setbit.c; else echo setbit.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-size_.c: size.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/size.c; then echo $(srcdir)/size.c; else echo size.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sizeinbase_.c: sizeinbase.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sizeinbase.c; then echo $(srcdir)/sizeinbase.c; else echo sizeinbase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sqrt_.c: sqrt.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqrt.c; then echo $(srcdir)/sqrt.c; else echo sqrt.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sqrtrem_.c: sqrtrem.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqrtrem.c; then echo $(srcdir)/sqrtrem.c; else echo sqrtrem.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sub_.c: sub.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sub.c; then echo $(srcdir)/sub.c; else echo sub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sub_ui_.c: sub_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sub_ui.c; then echo $(srcdir)/sub_ui.c; else echo sub_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-swap_.c: swap.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/swap.c; then echo $(srcdir)/swap.c; else echo swap.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tdiv_q_.c: tdiv_q.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_q.c; then echo $(srcdir)/tdiv_q.c; else echo tdiv_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tdiv_q_2exp_.c: tdiv_q_2exp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_q_2exp.c; then echo $(srcdir)/tdiv_q_2exp.c; else echo tdiv_q_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tdiv_q_ui_.c: tdiv_q_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_q_ui.c; then echo $(srcdir)/tdiv_q_ui.c; else echo tdiv_q_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tdiv_qr_.c: tdiv_qr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_qr.c; then echo $(srcdir)/tdiv_qr.c; else echo tdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tdiv_qr_ui_.c: tdiv_qr_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_qr_ui.c; then echo $(srcdir)/tdiv_qr_ui.c; else echo tdiv_qr_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tdiv_r_.c: tdiv_r.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_r.c; then echo $(srcdir)/tdiv_r.c; else echo tdiv_r.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tdiv_r_2exp_.c: tdiv_r_2exp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_r_2exp.c; then echo $(srcdir)/tdiv_r_2exp.c; else echo tdiv_r_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tdiv_r_ui_.c: tdiv_r_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_r_ui.c; then echo $(srcdir)/tdiv_r_ui.c; else echo tdiv_r_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tdiv_ui_.c: tdiv_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_ui.c; then echo $(srcdir)/tdiv_ui.c; else echo tdiv_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tstbit_.c: tstbit.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tstbit.c; then echo $(srcdir)/tstbit.c; else echo tstbit.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-ui_pow_ui_.c: ui_pow_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/ui_pow_ui.c; then echo $(srcdir)/ui_pow_ui.c; else echo ui_pow_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-ui_sub_.c: ui_sub.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/ui_sub.c; then echo $(srcdir)/ui_sub.c; else echo ui_sub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-urandomb_.c: urandomb.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/urandomb.c; then echo $(srcdir)/urandomb.c; else echo urandomb.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-urandomm_.c: urandomm.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/urandomm.c; then echo $(srcdir)/urandomm.c; else echo urandomm.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-xor_.c: xor.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/xor.c; then echo $(srcdir)/xor.c; else echo xor.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-abs_.$(OBJEXT) abs_.lo add_.$(OBJEXT) add_.lo add_ui_.$(OBJEXT) \
-add_ui_.lo and_.$(OBJEXT) and_.lo aorsmul_.$(OBJEXT) aorsmul_.lo \
-aorsmul_i_.$(OBJEXT) aorsmul_i_.lo array_init_.$(OBJEXT) \
-array_init_.lo bin_ui_.$(OBJEXT) bin_ui_.lo bin_uiui_.$(OBJEXT) \
-bin_uiui_.lo cdiv_q_.$(OBJEXT) cdiv_q_.lo cdiv_q_ui_.$(OBJEXT) \
-cdiv_q_ui_.lo cdiv_qr_.$(OBJEXT) cdiv_qr_.lo cdiv_qr_ui_.$(OBJEXT) \
-cdiv_qr_ui_.lo cdiv_r_.$(OBJEXT) cdiv_r_.lo cdiv_r_ui_.$(OBJEXT) \
-cdiv_r_ui_.lo cdiv_ui_.$(OBJEXT) cdiv_ui_.lo cfdiv_q_2exp_.$(OBJEXT) \
-cfdiv_q_2exp_.lo cfdiv_r_2exp_.$(OBJEXT) cfdiv_r_2exp_.lo \
-clear_.$(OBJEXT) clear_.lo clears_.$(OBJEXT) clears_.lo \
-clrbit_.$(OBJEXT) clrbit_.lo cmp_.$(OBJEXT) cmp_.lo cmp_d_.$(OBJEXT) \
-cmp_d_.lo cmp_si_.$(OBJEXT) cmp_si_.lo cmp_ui_.$(OBJEXT) cmp_ui_.lo \
-cmpabs_.$(OBJEXT) cmpabs_.lo cmpabs_d_.$(OBJEXT) cmpabs_d_.lo \
-cmpabs_ui_.$(OBJEXT) cmpabs_ui_.lo com_.$(OBJEXT) com_.lo \
-combit_.$(OBJEXT) combit_.lo cong_.$(OBJEXT) cong_.lo \
-cong_2exp_.$(OBJEXT) cong_2exp_.lo cong_ui_.$(OBJEXT) cong_ui_.lo \
-dive_ui_.$(OBJEXT) dive_ui_.lo divegcd_.$(OBJEXT) divegcd_.lo \
-divexact_.$(OBJEXT) divexact_.lo divis_.$(OBJEXT) divis_.lo \
-divis_2exp_.$(OBJEXT) divis_2exp_.lo divis_ui_.$(OBJEXT) divis_ui_.lo \
-dump_.$(OBJEXT) dump_.lo export_.$(OBJEXT) export_.lo \
-fac_ui_.$(OBJEXT) fac_ui_.lo fdiv_q_.$(OBJEXT) fdiv_q_.lo \
-fdiv_q_ui_.$(OBJEXT) fdiv_q_ui_.lo fdiv_qr_.$(OBJEXT) fdiv_qr_.lo \
-fdiv_qr_ui_.$(OBJEXT) fdiv_qr_ui_.lo fdiv_r_.$(OBJEXT) fdiv_r_.lo \
-fdiv_r_ui_.$(OBJEXT) fdiv_r_ui_.lo fdiv_ui_.$(OBJEXT) fdiv_ui_.lo \
-fib2_ui_.$(OBJEXT) fib2_ui_.lo fib_ui_.$(OBJEXT) fib_ui_.lo \
-fits_sint_.$(OBJEXT) fits_sint_.lo fits_slong_.$(OBJEXT) \
-fits_slong_.lo fits_sshort_.$(OBJEXT) fits_sshort_.lo \
-fits_uint_.$(OBJEXT) fits_uint_.lo fits_ulong_.$(OBJEXT) \
-fits_ulong_.lo fits_ushort_.$(OBJEXT) fits_ushort_.lo gcd_.$(OBJEXT) \
-gcd_.lo gcd_ui_.$(OBJEXT) gcd_ui_.lo gcdext_.$(OBJEXT) gcdext_.lo \
-get_d_.$(OBJEXT) get_d_.lo get_d_2exp_.$(OBJEXT) get_d_2exp_.lo \
-get_si_.$(OBJEXT) get_si_.lo get_str_.$(OBJEXT) get_str_.lo \
-get_ui_.$(OBJEXT) get_ui_.lo getlimbn_.$(OBJEXT) getlimbn_.lo \
-hamdist_.$(OBJEXT) hamdist_.lo import_.$(OBJEXT) import_.lo \
-init_.$(OBJEXT) init_.lo init2_.$(OBJEXT) init2_.lo inits_.$(OBJEXT) \
-inits_.lo inp_raw_.$(OBJEXT) inp_raw_.lo inp_str_.$(OBJEXT) \
-inp_str_.lo invert_.$(OBJEXT) invert_.lo ior_.$(OBJEXT) ior_.lo \
-iset_.$(OBJEXT) iset_.lo iset_d_.$(OBJEXT) iset_d_.lo \
-iset_si_.$(OBJEXT) iset_si_.lo iset_str_.$(OBJEXT) iset_str_.lo \
-iset_ui_.$(OBJEXT) iset_ui_.lo jacobi_.$(OBJEXT) jacobi_.lo \
-kronsz_.$(OBJEXT) kronsz_.lo kronuz_.$(OBJEXT) kronuz_.lo \
-kronzs_.$(OBJEXT) kronzs_.lo kronzu_.$(OBJEXT) kronzu_.lo \
-lcm_.$(OBJEXT) lcm_.lo lcm_ui_.$(OBJEXT) lcm_ui_.lo \
-lucnum2_ui_.$(OBJEXT) lucnum2_ui_.lo lucnum_ui_.$(OBJEXT) \
-lucnum_ui_.lo millerrabin_.$(OBJEXT) millerrabin_.lo mod_.$(OBJEXT) \
-mod_.lo mul_.$(OBJEXT) mul_.lo mul_2exp_.$(OBJEXT) mul_2exp_.lo \
-mul_si_.$(OBJEXT) mul_si_.lo mul_ui_.$(OBJEXT) mul_ui_.lo \
-n_pow_ui_.$(OBJEXT) n_pow_ui_.lo neg_.$(OBJEXT) neg_.lo \
-nextprime_.$(OBJEXT) nextprime_.lo out_raw_.$(OBJEXT) out_raw_.lo \
-out_str_.$(OBJEXT) out_str_.lo perfpow_.$(OBJEXT) perfpow_.lo \
-perfsqr_.$(OBJEXT) perfsqr_.lo popcount_.$(OBJEXT) popcount_.lo \
-pow_ui_.$(OBJEXT) pow_ui_.lo powm_.$(OBJEXT) powm_.lo \
-powm_sec_.$(OBJEXT) powm_sec_.lo powm_ui_.$(OBJEXT) powm_ui_.lo \
-pprime_p_.$(OBJEXT) pprime_p_.lo random_.$(OBJEXT) random_.lo \
-random2_.$(OBJEXT) random2_.lo realloc_.$(OBJEXT) realloc_.lo \
-realloc2_.$(OBJEXT) realloc2_.lo remove_.$(OBJEXT) remove_.lo \
-root_.$(OBJEXT) root_.lo rootrem_.$(OBJEXT) rootrem_.lo \
-rrandomb_.$(OBJEXT) rrandomb_.lo scan0_.$(OBJEXT) scan0_.lo \
-scan1_.$(OBJEXT) scan1_.lo set_.$(OBJEXT) set_.lo set_d_.$(OBJEXT) \
-set_d_.lo set_f_.$(OBJEXT) set_f_.lo set_q_.$(OBJEXT) set_q_.lo \
-set_si_.$(OBJEXT) set_si_.lo set_str_.$(OBJEXT) set_str_.lo \
-set_ui_.$(OBJEXT) set_ui_.lo setbit_.$(OBJEXT) setbit_.lo \
-size_.$(OBJEXT) size_.lo sizeinbase_.$(OBJEXT) sizeinbase_.lo \
-sqrt_.$(OBJEXT) sqrt_.lo sqrtrem_.$(OBJEXT) sqrtrem_.lo sub_.$(OBJEXT) \
-sub_.lo sub_ui_.$(OBJEXT) sub_ui_.lo swap_.$(OBJEXT) swap_.lo \
-tdiv_q_.$(OBJEXT) tdiv_q_.lo tdiv_q_2exp_.$(OBJEXT) tdiv_q_2exp_.lo \
-tdiv_q_ui_.$(OBJEXT) tdiv_q_ui_.lo tdiv_qr_.$(OBJEXT) tdiv_qr_.lo \
-tdiv_qr_ui_.$(OBJEXT) tdiv_qr_ui_.lo tdiv_r_.$(OBJEXT) tdiv_r_.lo \
-tdiv_r_2exp_.$(OBJEXT) tdiv_r_2exp_.lo tdiv_r_ui_.$(OBJEXT) \
-tdiv_r_ui_.lo tdiv_ui_.$(OBJEXT) tdiv_ui_.lo tstbit_.$(OBJEXT) \
-tstbit_.lo ui_pow_ui_.$(OBJEXT) ui_pow_ui_.lo ui_sub_.$(OBJEXT) \
-ui_sub_.lo urandomb_.$(OBJEXT) urandomb_.lo urandomm_.$(OBJEXT) \
-urandomm_.lo xor_.$(OBJEXT) xor_.lo : $(ANSI2KNR)
mostlyclean-libtool:
-rm -f *.lo
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
mostlyclean: mostlyclean-am
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
uninstall-am:
-.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+.MAKE: install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
clean-libtool clean-noinstLTLIBRARIES ctags distclean \
install-pdf install-pdf-am install-ps install-ps-am \
install-strip installcheck installcheck-am installdirs \
maintainer-clean maintainer-clean-generic mostlyclean \
- mostlyclean-compile mostlyclean-generic mostlyclean-kr \
- mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
- uninstall-am
+ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+ pdf pdf-am ps ps-am tags uninstall uninstall-am
-# These are BUILT_SOURCES at the top-level, so normally they're built before
-# recursing into this directory.
-#
-fac_ui.h:
- cd ..; $(MAKE) $(AM_MAKEFLAGS) mpz/fac_ui.h
-
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
.NOEXPORT:
/* mpz_abs(dst, src) -- Assign the absolute value of SRC to DST.
-Copyright 1991, 1993, 1994, 1995, 2001 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1995, 2001, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
void
mpz_abs (mpz_ptr w, mpz_srcptr u)
{
- mp_ptr wp, up;
+ mp_ptr wp;
+ mp_srcptr up;
mp_size_t size;
- size = ABS (u->_mp_size);
+ size = ABSIZ (u);
if (u != w)
{
- if (w->_mp_alloc < size)
- _mpz_realloc (w, size);
+ wp = MPZ_NEWALLOC (w, size);
- wp = w->_mp_d;
- up = u->_mp_d;
+ up = PTR (u);
MPN_COPY (wp, up, size);
}
- w->_mp_size = size;
+ SIZ (w) = size;
}
/* mpz_and -- Logical and.
-Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2003, 2005 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2003, 2005, 2012
+Free Software Foundation, Inc.
This file is part of the GNU MP Library.
op1_ptr = PTR(op1);
op2_ptr = PTR(op2);
- res_ptr = PTR(res);
if (op1_size >= 0)
{
/* Handle allocation, now then we know exactly how much space is
needed for the result. */
- if (UNLIKELY (ALLOC(res) < res_size))
- {
- _mpz_realloc (res, res_size);
- res_ptr = PTR(res);
- /* Don't re-read op1_ptr and op2_ptr. Since res_size <=
- MIN(op1_size, op2_size), we will not reach this code when op1
- is identical to res or op2 is identical to res. */
- }
+ res_ptr = MPZ_REALLOC (res, res_size);
+ /* Don't re-read op1_ptr and op2_ptr. Since res_size <=
+ MIN(op1_size, op2_size), res is not changed when op1
+ is identical to res or op2 is identical to res. */
SIZ(res) = res_size;
- if (LIKELY (res_size != 0))
- mpn_and_n (res_ptr, op1_ptr, op2_ptr, res_size);
+ if (LIKELY (res_size != 0))
+ mpn_and_n (res_ptr, op1_ptr, op2_ptr, res_size);
return;
}
else /* op2_size < 0 */
{
if (op2_size < 0)
{
- mp_ptr opx;
+ mp_ptr opx, opy;
mp_limb_t cy;
- mp_size_t res_alloc;
/* Both operands are negative, so will be the result.
-((-OP1) & (-OP2)) = -(~(OP1 - 1) & ~(OP2 - 1)) =
op1_size = -op1_size;
op2_size = -op2_size;
- res_alloc = 1 + MAX (op1_size, op2_size);
+ if (op1_size > op2_size)
+ MPN_SRCPTR_SWAP (op1_ptr, op1_size, op2_ptr, op2_size);
- opx = TMP_ALLOC_LIMBS (op1_size);
+ TMP_ALLOC_LIMBS_2 (opx, op1_size, opy, op2_size);
mpn_sub_1 (opx, op1_ptr, op1_size, (mp_limb_t) 1);
op1_ptr = opx;
- opx = TMP_ALLOC_LIMBS (op2_size);
- mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1);
- op2_ptr = opx;
-
- if (ALLOC(res) < res_alloc)
- {
- _mpz_realloc (res, res_alloc);
- res_ptr = PTR(res);
- /* Don't re-read OP1_PTR and OP2_PTR. They point to temporary
- space--never to the space PTR(res) used to point to before
- reallocation. */
- }
-
- if (op1_size >= op2_size)
- {
- MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,
- op1_size - op2_size);
- for (i = op2_size - 1; i >= 0; i--)
- res_ptr[i] = op1_ptr[i] | op2_ptr[i];
- res_size = op1_size;
- }
- else
- {
- MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
- op2_size - op1_size);
- for (i = op1_size - 1; i >= 0; i--)
- res_ptr[i] = op1_ptr[i] | op2_ptr[i];
- res_size = op2_size;
- }
+ mpn_sub_1 (opy, op2_ptr, op2_size, (mp_limb_t) 1);
+ op2_ptr = opy;
+
+ res_ptr = MPZ_REALLOC (res, 1 + op2_size);
+ /* Don't re-read OP1_PTR and OP2_PTR. They point to temporary
+ space--never to the space PTR(res) used to point to before
+ reallocation. */
+
+ MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
+ op2_size - op1_size);
+ mpn_ior_n (res_ptr, op1_ptr, op2_ptr, op1_size);
+ res_size = op2_size;
cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1);
- if (cy)
- {
- res_ptr[res_size] = cy;
- res_size++;
- }
+ res_ptr[res_size] = cy;
+ res_size += (cy != 0);
SIZ(res) = -res_size;
TMP_FREE;
{
/* We should compute -OP1 & OP2. Swap OP1 and OP2 and fall
through to the code that handles OP1 & -OP2. */
- MPZ_SRCPTR_SWAP (op1, op2);
- MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size);
+ MPN_SRCPTR_SWAP (op1_ptr, op1_size, op2_ptr, op2_size);
}
}
/* Handle allocation, now then we know exactly how much space is
needed for the result. */
- if (ALLOC(res) < res_size)
- {
- _mpz_realloc (res, res_size);
- res_ptr = PTR(res);
- /* Don't re-read OP1_PTR or OP2_PTR. Since res_size = op1_size,
- we will not reach this code when op1 is identical to res.
- OP2_PTR points to temporary space. */
- }
+ res_ptr = MPZ_REALLOC (res, res_size);
+ /* Don't re-read OP1_PTR or OP2_PTR. Since res_size = op1_size,
+ op1 is not changed if it is identical to res.
+ OP2_PTR points to temporary space. */
MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size, res_size - op2_size);
- for (i = op2_size - 1; i >= 0; i--)
- res_ptr[i] = op1_ptr[i] & ~op2_ptr[i];
+ mpn_andn_n (res_ptr, op1_ptr, op2_ptr, op2_size);
SIZ(res) = res_size;
}
/* Handle allocation, now then we know exactly how much space is
needed for the result. */
- if (ALLOC(res) < res_size)
- {
- _mpz_realloc (res, res_size);
- res_ptr = PTR(res);
- /* Don't re-read OP1_PTR. Since res_size <= op1_size, we will
- not reach this code when op1 is identical to res. */
- /* Don't re-read OP2_PTR. It points to temporary space--never
- to the space PTR(res) used to point to before reallocation. */
- }
-
- for (i = res_size - 1; i >= 0; i--)
- res_ptr[i] = op1_ptr[i] & ~op2_ptr[i];
+ res_ptr = MPZ_REALLOC (res, res_size);
+ /* Don't re-read OP1_PTR. Since res_size <= op1_size,
+ op1 is not changed if it is identical to res.
+ Don't re-read OP2_PTR. It points to temporary space--never
+ to the space PTR(res) used to point to before reallocation. */
+
+ if (LIKELY (res_size != 0))
+ mpn_andn_n (res_ptr, op1_ptr, op2_ptr, res_size);
SIZ(res) = res_size;
}
/* mpz_add, mpz_sub -- add or subtract integers.
-Copyright 1991, 1993, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2011, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
#include "gmp-impl.h"
-#ifdef BERKELEY_MP
-
-#include "mp.h"
-#ifdef OPERATION_add
-#define FUNCTION madd
-#define VARIATION
-#endif
-#ifdef OPERATION_sub
-#define FUNCTION msub
-#define VARIATION -
-#endif
-#define ARGUMENTS mpz_srcptr u, mpz_srcptr v, mpz_ptr w
-
-#else /* normal GMP */
-
#ifdef OPERATION_add
#define FUNCTION mpz_add
#define VARIATION
#ifdef OPERATION_sub
#define FUNCTION mpz_sub
#define VARIATION -
-#endif
-#define ARGUMENTS mpz_ptr w, mpz_srcptr u, mpz_srcptr v
-
#endif
#ifndef FUNCTION
void
-FUNCTION (ARGUMENTS)
+FUNCTION (mpz_ptr w, mpz_srcptr u, mpz_srcptr v)
{
mp_srcptr up, vp;
mp_ptr wp;
mp_size_t abs_usize;
mp_size_t abs_vsize;
- usize = u->_mp_size;
- vsize = VARIATION v->_mp_size;
+ usize = SIZ(u);
+ vsize = VARIATION SIZ(v);
abs_usize = ABS (usize);
abs_vsize = ABS (vsize);
/* If not space for w (and possible carry), increase space. */
wsize = abs_usize + 1;
- if (w->_mp_alloc < wsize)
- _mpz_realloc (w, wsize);
+ wp = MPZ_REALLOC (w, wsize);
/* These must be after realloc (u or v may be the same as w). */
- up = u->_mp_d;
- vp = v->_mp_d;
- wp = w->_mp_d;
+ up = PTR(u);
+ vp = PTR(v);
if ((usize ^ vsize) < 0)
{
wsize = -wsize;
}
- w->_mp_size = wsize;
+ SIZ(w) = wsize;
}
/* mpz_add_ui, mpz_sub_ui -- Add or subtract an mpz_t and an unsigned
one-word integer.
-Copyright 1991, 1993, 1994, 1996, 1999, 2000, 2001, 2002, 2004 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 1999, 2000, 2001, 2002, 2004, 2012 Free
+Software Foundation, Inc.
This file is part of the GNU MP Library.
}
#endif
- usize = u->_mp_size;
+ usize = SIZ (u);
abs_usize = ABS (usize);
/* If not space for W (and possible carry), increase space. */
wsize = abs_usize + 1;
- if (w->_mp_alloc < wsize)
- _mpz_realloc (w, wsize);
+ wp = MPZ_REALLOC (w, wsize);
/* These must be after realloc (U may be the same as W). */
- up = u->_mp_d;
- wp = w->_mp_d;
+ up = PTR (u);
if (abs_usize == 0)
{
wp[0] = vval;
- w->_mp_size = VARIATION_NEG (vval != 0);
+ SIZ (w) = VARIATION_NEG (vval != 0);
return;
}
}
}
- w->_mp_size = wsize;
+ SIZ (w) = wsize;
}
/* mpz_addmul, mpz_submul -- add or subtract multiple.
-Copyright 2001, 2004, 2005 Free Software Foundation, Inc.
+Copyright 2001, 2004, 2005, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The sign of w is retained for the result, unless the absolute value
submul underflows, in which case it flips. */
-static void __gmpz_aorsmul __GMP_PROTO ((REGPARM_3_1 (mpz_ptr w, mpz_srcptr x, mpz_srcptr y, mp_size_t sub))) REGPARM_ATTR (1);
+static void __gmpz_aorsmul (REGPARM_3_1 (mpz_ptr w, mpz_srcptr x, mpz_srcptr y, mp_size_t sub)) REGPARM_ATTR (1);
#define mpz_aorsmul(w,x,y,sub) __gmpz_aorsmul (REGPARM_3_1 (w, x, y, sub))
REGPARM_ATTR (1) static void
wsize = ABS(wsize_signed);
tsize = xsize + ysize;
- MPZ_REALLOC (w, MAX (wsize, tsize) + 1);
- wp = PTR(w);
+ wp = MPZ_REALLOC (w, MAX (wsize, tsize) + 1);
if (wsize_signed == 0)
{
/* Nothing to add to, just set w=x*y. No w==x or w==y overlap here,
- since we know x,y!=0 but w==0. */
+ since we know x,y!=0 but w==0. */
high = mpn_mul (wp, PTR(x),xsize, PTR(y),ysize);
tsize -= (high == 0);
SIZ(w) = (sub >= 0 ? tsize : -tsize);
mp_size_t usize = wsize;
if (usize < tsize)
- {
- up = tp;
- usize = tsize;
- tp = wp;
- tsize = wsize;
+ {
+ up = tp;
+ usize = tsize;
+ tp = wp;
+ tsize = wsize;
- wsize = usize;
- }
+ wsize = usize;
+ }
c = mpn_add (wp, up,usize, tp,tsize);
wp[wsize] = c;
mp_size_t usize = wsize;
if (mpn_cmp_twosizes_lt (up,usize, tp,tsize))
- {
- up = tp;
- usize = tsize;
- tp = wp;
- tsize = wsize;
-
- wsize = usize;
- wsize_signed = -wsize_signed;
- }
+ {
+ up = tp;
+ usize = tsize;
+ tp = wp;
+ tsize = wsize;
+
+ wsize = usize;
+ wsize_signed = -wsize_signed;
+ }
ASSERT_NOCARRY (mpn_sub (wp, up,usize, tp,tsize));
wsize = usize;
ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR
COMPLETELY IN FUTURE GNU MP RELEASES.
-Copyright 2001, 2002, 2004, 2005 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2004, 2005, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
if (wsize_signed == 0)
{
/* nothing to add to, just set x*y, "sub" gives the sign */
- MPZ_REALLOC (w, xsize+1);
- wp = PTR (w);
+ wp = MPZ_REALLOC (w, xsize+1);
cy = mpn_mul_1 (wp, PTR(x), xsize, y);
wp[xsize] = cy;
xsize += (cy != 0);
wsize = ABS (wsize_signed);
new_wsize = MAX (wsize, xsize);
- MPZ_REALLOC (w, new_wsize+1);
- wp = PTR (w);
+ wp = MPZ_REALLOC (w, new_wsize+1);
xp = PTR (x);
min_size = MIN (wsize, xsize);
dsize = xsize - wsize;
#if HAVE_NATIVE_mpn_mul_1c
if (dsize > 0)
- cy = mpn_mul_1c (wp, xp, dsize, y, cy);
+ cy = mpn_mul_1c (wp, xp, dsize, y, cy);
else if (dsize < 0)
- {
- dsize = -dsize;
- cy = mpn_add_1 (wp, wp, dsize, cy);
- }
+ {
+ dsize = -dsize;
+ cy = mpn_add_1 (wp, wp, dsize, cy);
+ }
#else
if (dsize != 0)
- {
- mp_limb_t cy2;
- if (dsize > 0)
- cy2 = mpn_mul_1 (wp, xp, dsize, y);
- else
- {
- dsize = -dsize;
- cy2 = 0;
- }
- cy = cy2 + mpn_add_1 (wp, wp, dsize, cy);
- }
+ {
+ mp_limb_t cy2;
+ if (dsize > 0)
+ cy2 = mpn_mul_1 (wp, xp, dsize, y);
+ else
+ {
+ dsize = -dsize;
+ cy2 = 0;
+ }
+ cy = cy2 + mpn_add_1 (wp, wp, dsize, cy);
+ }
#endif
wp[dsize] = cy;
cy = mpn_submul_1 (wp, xp, min_size, y);
if (wsize >= xsize)
- {
- /* if w bigger than x, then propagate borrow through it */
- if (wsize != xsize)
- cy = mpn_sub_1 (wp+xsize, wp+xsize, wsize-xsize, cy);
-
- if (cy != 0)
- {
- /* Borrow out of w, take twos complement negative to get
- absolute value, flip sign of w. */
- wp[new_wsize] = ~-cy; /* extra limb is 0-cy */
- mpn_com (wp, wp, new_wsize);
- new_wsize++;
- MPN_INCR_U (wp, new_wsize, CNST_LIMB(1));
- wsize_signed = -wsize_signed;
- }
- }
+ {
+ /* if w bigger than x, then propagate borrow through it */
+ if (wsize != xsize)
+ cy = mpn_sub_1 (wp+xsize, wp+xsize, wsize-xsize, cy);
+
+ if (cy != 0)
+ {
+ /* Borrow out of w, take twos complement negative to get
+ absolute value, flip sign of w. */
+ wp[new_wsize] = ~-cy; /* extra limb is 0-cy */
+ mpn_com (wp, wp, new_wsize);
+ new_wsize++;
+ MPN_INCR_U (wp, new_wsize, CNST_LIMB(1));
+ wsize_signed = -wsize_signed;
+ }
+ }
else /* wsize < xsize */
- {
- /* x bigger than w, so want x*y-w. Submul has given w-x*y, so
- take twos complement and use an mpn_mul_1 for the rest. */
+ {
+ /* x bigger than w, so want x*y-w. Submul has given w-x*y, so
+ take twos complement and use an mpn_mul_1 for the rest. */
- mp_limb_t cy2;
+ mp_limb_t cy2;
- /* -(-cy*b^n + w-x*y) = (cy-1)*b^n + ~(w-x*y) + 1 */
- mpn_com (wp, wp, wsize);
- cy += mpn_add_1 (wp, wp, wsize, CNST_LIMB(1));
- cy -= 1;
+ /* -(-cy*b^n + w-x*y) = (cy-1)*b^n + ~(w-x*y) + 1 */
+ mpn_com (wp, wp, wsize);
+ cy += mpn_add_1 (wp, wp, wsize, CNST_LIMB(1));
+ cy -= 1;
- /* If cy-1 == -1 then hold that -1 for latter. mpn_submul_1 never
- returns cy==MP_LIMB_T_MAX so that value always indicates a -1. */
- cy2 = (cy == MP_LIMB_T_MAX);
- cy += cy2;
- MPN_MUL_1C (cy, wp+wsize, xp+wsize, xsize-wsize, y, cy);
- wp[new_wsize] = cy;
- new_wsize += (cy != 0);
+ /* If cy-1 == -1 then hold that -1 for latter. mpn_submul_1 never
+ returns cy==MP_LIMB_T_MAX so that value always indicates a -1. */
+ cy2 = (cy == MP_LIMB_T_MAX);
+ cy += cy2;
+ MPN_MUL_1C (cy, wp+wsize, xp+wsize, xsize-wsize, y, cy);
+ wp[new_wsize] = cy;
+ new_wsize += (cy != 0);
- /* Apply any -1 from above. The value at wp+wsize is non-zero
- because y!=0 and the high limb of x will be non-zero. */
- if (cy2)
- MPN_DECR_U (wp+wsize, new_wsize-wsize, CNST_LIMB(1));
+ /* Apply any -1 from above. The value at wp+wsize is non-zero
+ because y!=0 and the high limb of x will be non-zero. */
+ if (cy2)
+ MPN_DECR_U (wp+wsize, new_wsize-wsize, CNST_LIMB(1));
- wsize_signed = -wsize_signed;
- }
+ wsize_signed = -wsize_signed;
+ }
/* submul can produce high zero limbs due to cancellation, both when w
- has more limbs or x has more */
+ has more limbs or x has more */
MPN_NORMALIZE (wp, new_wsize);
}
/* mpz_array_init (array, array_size, size_per_elem) --
-Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
for (i = 0; i < arr_size; i++)
{
- arr[i]._mp_alloc = nlimbs + 1; /* Yes, lie a little... */
- arr[i]._mp_size = 0;
- arr[i]._mp_d = p + i * nlimbs;
+ ALLOC (&arr[i]) = nlimbs + 1; /* Yes, lie a little... */
+ SIZ (&arr[i]) = 0;
+ PTR (&arr[i]) = p + i * nlimbs;
}
}
/* mpz_bin_ui - compute n over k.
-Copyright 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+Copyright 1998, 1999, 2000, 2001, 2002, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
mp_limb_t kacc;
mp_size_t negate;
- if (mpz_sgn (n) < 0)
+ if (SIZ (n) < 0)
{
/* bin(n,k) = (-1)^k * bin(-n+k-1,k), and set ni = -n+k-1 - k = -n-1 */
mpz_init (ni);
else
{
/* bin(n,k) == 0 if k>n
- (no test for this under the n<0 case, since -n+k-1 >= k there) */
+ (no test for this under the n<0 case, since -n+k-1 >= k there) */
if (mpz_cmp_ui (n, k) < 0)
- {
- mpz_set_ui (r, 0L);
- return;
- }
+ {
+ SIZ (r) = 0;
+ return;
+ }
/* set ni = n-k */
mpz_init (ni);
/* Now wanting bin(ni+k,k), with ni positive, and "negate" is the sign (0
for positive, 1 for negative). */
- mpz_set_ui (r, 1L);
+ SIZ (r) = 1; PTR (r)[0] = 1;
/* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. In this case it's
whether ni+k-k < k meaning ni<k, and if so change to denominator ni+k-k
mpz_add_ui (ni, ni, 1L);
mpz_mul (nacc, nacc, ni);
umul_ppmm (k1, k0, kacc, i << GMP_NAIL_BITS);
- k0 >>= GMP_NAIL_BITS;
if (k1 != 0)
{
/* Accumulator overflow. Perform bignum step. */
mpz_mul (r, r, nacc);
- mpz_set_ui (nacc, 1L);
- DIVIDE ();
+ SIZ (nacc) = 1; PTR (nacc)[0] = 1;
+ DIVIDE ();
kacc = i;
}
else
{
/* Save new products in accumulators to keep accumulating. */
- kacc = k0;
+ kacc = k0 >> GMP_NAIL_BITS;
}
}
/* mpz_bin_uiui - compute n over k.
-Copyright 1998, 1999, 2000, 2001, 2002, 2003, 2006 Free Software Foundation,
-Inc.
+Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato.
+
+Copyright 2010, 2011, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#include "gmp-impl.h"
#include "longlong.h"
+#ifndef BIN_GOETGHELUCK_THRESHOLD
+#define BIN_GOETGHELUCK_THRESHOLD 1000
+#endif
+#ifndef BIN_UIUI_ENABLE_SMALLDC
+#define BIN_UIUI_ENABLE_SMALLDC 1
+#endif
+#ifndef BIN_UIUI_RECURSIVE_SMALLDC
+#define BIN_UIUI_RECURSIVE_SMALLDC (GMP_NUMB_BITS > 32)
+#endif
-/* Enhancement: It ought to be possible to calculate the size of the final
- result in advance, to a rough approximation at least, and use it to do
- just one realloc. Stirling's approximation n! ~= sqrt(2*pi*n)*(n/e)^n
- (Knuth section 1.2.5) might be of use. */
-
-/* "inc" in the main loop allocates a chunk more space if not already
- enough, so as to avoid repeated reallocs. The final step on the other
- hand requires only one more limb. */
-#define MULDIV(inc) \
- do { \
- ASSERT (rsize <= ralloc); \
- \
- if (rsize == ralloc) \
- { \
- mp_size_t new_ralloc = ralloc + (inc); \
- rp = __GMP_REALLOCATE_FUNC_LIMBS (rp, ralloc, new_ralloc); \
- ralloc = new_ralloc; \
- } \
- \
- rp[rsize] = mpn_mul_1 (rp, rp, rsize, nacc); \
- MPN_DIVREM_OR_DIVEXACT_1 (rp, rp, rsize+1, kacc); \
- rsize += (rp[rsize] != 0); \
- \
-} while (0)
+/* Algorithm:
-void
-mpz_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
+ Accumulate chunks of factors first limb-by-limb (using one of mul0-mul8)
+ which are then accumulated into mpn numbers. The first inner loop
+ accumulates divisor factors, the 2nd inner loop accumulates exactly the same
+ number of dividend factors. We avoid accumulating more for the divisor,
+ even with its smaller factors, since we else cannot guarantee divisibility.
+
+ Since we know each division will yield an integer, we compute the quotient
+ using Hensel norm: If the quotient is limited by 2^t, we compute A / B mod
+ 2^t.
+
+ Improvements:
+
+ (1) An obvious improvement to this code would be to compute mod 2^t
+ everywhere. Unfortunately, we cannot determine t beforehand, unless we
+ invoke some approximation, such as Stirling's formula. Of course, we don't
+ need t to be tight. However, it is not clear that this would help much,
+ our numbers are kept reasonably small already.
+
+ (2) Compute nmax/kmax semi-accurately, without scalar division or a loop.
+ Extracting the 3 msb, then doing a table lookup using cnt*8+msb as index,
+ would make it both reasonably accurate and fast. (We could use a table
+ stored into a limb, perhaps.) The table should take the removed factors of
+ 2 into account (those done on-the-fly in mulN).
+
+ (3) The first time in the loop we compute the odd part of a
+ factorial in kp, we might use oddfac_1 for this task.
+ */
+
+/* This threshold determines how large divisor to accumulate before we call
+ bdiv. Perhaps we should never call bdiv, and accumulate all we are told,
+ since we are just basecase code anyway? Presumably, this depends on the
+ relative speed of the asymptotically fast code and this code. */
+#define SOME_THRESHOLD 20
+
+/* Multiply-into-limb functions. These remove factors of 2 on-the-fly. FIXME:
+ All versions of MAXFACS don't take this 2 removal into account now, meaning
+ that then, shifting just adds some overhead. (We remove factors from the
+ completed limb anyway.) */
+
+static mp_limb_t
+mul1 (mp_limb_t m)
+{
+ return m;
+}
+
+static mp_limb_t
+mul2 (mp_limb_t m)
{
- unsigned long int i, j;
- mp_limb_t nacc, kacc;
- unsigned long int cnt;
- mp_size_t rsize, ralloc;
- mp_ptr rp;
-
- /* bin(n,k) = 0 if k>n. */
- if (n < k)
+ /* We need to shift before multiplying, to avoid an overflow. */
+ mp_limb_t m01 = (m | 1) * ((m + 1) >> 1);
+ return m01;
+}
+
+static mp_limb_t
+mul3 (mp_limb_t m)
+{
+ mp_limb_t m01 = (m + 0) * (m + 1) >> 1;
+ mp_limb_t m2 = (m + 2);
+ return m01 * m2;
+}
+
+static mp_limb_t
+mul4 (mp_limb_t m)
+{
+ mp_limb_t m01 = (m + 0) * (m + 1) >> 1;
+ mp_limb_t m23 = (m + 2) * (m + 3) >> 1;
+ return m01 * m23;
+}
+
+static mp_limb_t
+mul5 (mp_limb_t m)
+{
+ mp_limb_t m012 = (m + 0) * (m + 1) * (m + 2) >> 1;
+ mp_limb_t m34 = (m + 3) * (m + 4) >> 1;
+ return m012 * m34;
+}
+
+static mp_limb_t
+mul6 (mp_limb_t m)
+{
+ mp_limb_t m01 = (m + 0) * (m + 1);
+ mp_limb_t m23 = (m + 2) * (m + 3);
+ mp_limb_t m45 = (m + 4) * (m + 5) >> 1;
+ mp_limb_t m0123 = m01 * m23 >> 3;
+ return m0123 * m45;
+}
+
+static mp_limb_t
+mul7 (mp_limb_t m)
+{
+ mp_limb_t m01 = (m + 0) * (m + 1);
+ mp_limb_t m23 = (m + 2) * (m + 3);
+ mp_limb_t m456 = (m + 4) * (m + 5) * (m + 6) >> 1;
+ mp_limb_t m0123 = m01 * m23 >> 3;
+ return m0123 * m456;
+}
+
+static mp_limb_t
+mul8 (mp_limb_t m)
+{
+ mp_limb_t m01 = (m + 0) * (m + 1);
+ mp_limb_t m23 = (m + 2) * (m + 3);
+ mp_limb_t m45 = (m + 4) * (m + 5);
+ mp_limb_t m67 = (m + 6) * (m + 7);
+ mp_limb_t m0123 = m01 * m23 >> 3;
+ mp_limb_t m4567 = m45 * m67 >> 3;
+ return m0123 * m4567;
+}
+
+typedef mp_limb_t (* mulfunc_t) (mp_limb_t);
+
+static const mulfunc_t mulfunc[] = {mul1,mul2,mul3,mul4,mul5,mul6,mul7,mul8};
+#define M (numberof(mulfunc))
+
+/* Number of factors-of-2 removed by the corresponding mulN functon. */
+static const unsigned char tcnttab[] = {0, 1, 1, 2, 2, 4, 4, 6};
+
+#if 1
+/* This variant is inaccurate but share the code with other functions. */
+#define MAXFACS(max,l) \
+ do { \
+ (max) = log_n_max (l); \
+ } while (0)
+#else
+
+/* This variant is exact(?) but uses a loop. It takes the 2 removal
+ of mulN into account. */
+static const unsigned long ftab[] =
+#if GMP_NUMB_BITS == 64
+ /* 1 to 8 factors per iteration */
+ {CNST_LIMB(0xffffffffffffffff),CNST_LIMB(0x100000000),0x32cbfe,0x16a0b,0x24c4,0xa16,0x34b,0x1b2 /*,0xdf,0x8d */};
+#endif
+#if GMP_NUMB_BITS == 32
+ /* 1 to 7 factors per iteration */
+ {0xffffffff,0x10000,0x801,0x16b,0x71,0x42,0x26 /* ,0x1e */};
+#endif
+
+#define MAXFACS(max,l) \
+ do { \
+ int __i; \
+ for (__i = numberof (ftab) - 1; l > ftab[__i]; __i--) \
+ ; \
+ (max) = __i + 1; \
+ } while (0)
+#endif
+
+/* Entry i contains (i!/2^t)^(-1) where t is chosen such that the parenthesis
+ is an odd integer. */
+static const mp_limb_t facinv[] = { ONE_LIMB_ODD_FACTORIAL_INVERSES_TABLE };
+
+static void
+mpz_bdiv_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
+{
+ int nmax, kmax, nmaxnow, numfac;
+ mp_ptr np, kp;
+ mp_size_t nn, kn, alloc;
+ mp_limb_t i, j, t, iii, jjj, cy, dinv;
+ mp_bitcnt_t i2cnt, j2cnt;
+ int cnt;
+ mp_size_t maxn;
+ TMP_DECL;
+
+ ASSERT (k > ODD_FACTORIAL_TABLE_LIMIT);
+ TMP_MARK;
+
+ maxn = 1 + n / GMP_NUMB_BITS; /* absolutely largest result size (limbs) */
+
+ /* FIXME: This allocation might be insufficient, but is usually way too
+ large. */
+ alloc = SOME_THRESHOLD - 1 + MAX (3 * maxn / 2, SOME_THRESHOLD);
+ alloc = MIN (alloc, k) + 1;
+ np = TMP_ALLOC_LIMBS (alloc);
+ kp = TMP_ALLOC_LIMBS (SOME_THRESHOLD + 1);
+
+ MAXFACS (nmax, n);
+ ASSERT (nmax <= M);
+ MAXFACS (kmax, k);
+ ASSERT (kmax <= M);
+ ASSERT (k >= M);
+
+ i = n - k + 1;
+
+ np[0] = 1; nn = 1;
+
+ i2cnt = 0; /* total low zeros in dividend */
+ j2cnt = __gmp_fac2cnt_table[ODD_FACTORIAL_TABLE_LIMIT / 2 - 1];
+ /* total low zeros in divisor */
+
+ numfac = 1;
+ j = ODD_FACTORIAL_TABLE_LIMIT + 1;
+ jjj = ODD_FACTORIAL_TABLE_MAX;
+ ASSERT (__gmp_oddfac_table[ODD_FACTORIAL_TABLE_LIMIT] == ODD_FACTORIAL_TABLE_MAX);
+
+ while (1)
+ {
+ kp[0] = jjj; /* store new factors */
+ kn = 1;
+ t = k - j + 1;
+ kmax = MIN (kmax, t);
+
+ while (kmax != 0 && kn < SOME_THRESHOLD)
+ {
+ jjj = mulfunc[kmax - 1] (j);
+ j += kmax; /* number of factors used */
+ count_trailing_zeros (cnt, jjj); /* count low zeros */
+ jjj >>= cnt; /* remove remaining low zeros */
+ j2cnt += tcnttab[kmax - 1] + cnt; /* update low zeros count */
+ cy = mpn_mul_1 (kp, kp, kn, jjj); /* accumulate new factors */
+ kp[kn] = cy;
+ kn += cy != 0;
+ t = k - j + 1;
+ kmax = MIN (kmax, t);
+ }
+ numfac = j - numfac;
+
+ while (numfac != 0)
+ {
+ nmaxnow = MIN (nmax, numfac);
+ iii = mulfunc[nmaxnow - 1] (i);
+ i += nmaxnow; /* number of factors used */
+ count_trailing_zeros (cnt, iii); /* count low zeros */
+ iii >>= cnt; /* remove remaining low zeros */
+ i2cnt += tcnttab[nmaxnow - 1] + cnt; /* update low zeros count */
+ cy = mpn_mul_1 (np, np, nn, iii); /* accumulate new factors */
+ np[nn] = cy;
+ nn += cy != 0;
+ numfac -= nmaxnow;
+ }
+
+ ASSERT (nn < alloc);
+
+ binvert_limb (dinv, kp[0]);
+ nn += (np[nn - 1] >= kp[kn - 1]);
+ nn -= kn;
+ mpn_sbpi1_bdiv_q (np, np, nn, kp, MIN(kn,nn), -dinv);
+
+ if (kmax == 0)
+ break;
+ numfac = j;
+
+ jjj = mulfunc[kmax - 1] (j);
+ j += kmax; /* number of factors used */
+ count_trailing_zeros (cnt, jjj); /* count low zeros */
+ jjj >>= cnt; /* remove remaining low zeros */
+ j2cnt += tcnttab[kmax - 1] + cnt; /* update low zeros count */
+ }
+
+ /* Put back the right number of factors of 2. */
+ cnt = i2cnt - j2cnt;
+ if (cnt != 0)
{
- SIZ(r) = 0;
- return;
+ ASSERT (cnt < GMP_NUMB_BITS); /* can happen, but not for intended use */
+ cy = mpn_lshift (np, np, nn, cnt);
+ np[nn] = cy;
+ nn += cy != 0;
}
- rp = PTR(r);
+ nn -= np[nn - 1] == 0; /* normalisation */
- /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. */
- k = MIN (k, n-k);
+ kp = MPZ_NEWALLOC (r, nn);
+ SIZ(r) = nn;
+ MPN_COPY (kp, np, nn);
+ TMP_FREE;
+}
- /* bin(n,0) = 1 */
- if (k == 0)
+static void
+mpz_smallk_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
+{
+ int nmax, numfac;
+ mp_ptr rp;
+ mp_size_t rn, alloc;
+ mp_limb_t i, iii, cy;
+ mp_bitcnt_t i2cnt, cnt;
+
+ count_leading_zeros (cnt, (mp_limb_t) n);
+ cnt = GMP_LIMB_BITS - cnt;
+ alloc = cnt * k / GMP_NUMB_BITS + 3; /* FIXME: ensure rounding is enough. */
+ rp = MPZ_NEWALLOC (r, alloc);
+
+ MAXFACS (nmax, n);
+ nmax = MIN (nmax, M);
+
+ i = n - k + 1;
+
+ nmax = MIN (nmax, k);
+ rp[0] = mulfunc[nmax - 1] (i);
+ rn = 1;
+ i += nmax; /* number of factors used */
+ i2cnt = tcnttab[nmax - 1]; /* low zeros count */
+ numfac = k - nmax;
+ while (numfac != 0)
{
- SIZ(r) = 1;
- rp[0] = 1;
- return;
+ nmax = MIN (nmax, numfac);
+ iii = mulfunc[nmax - 1] (i);
+ i += nmax; /* number of factors used */
+ i2cnt += tcnttab[nmax - 1]; /* update low zeros count */
+ cy = mpn_mul_1 (rp, rp, rn, iii); /* accumulate new factors */
+ rp[rn] = cy;
+ rn += cy != 0;
+ numfac -= nmax;
}
- j = n - k + 1;
- rp[0] = j;
- rsize = 1;
- ralloc = ALLOC(r);
+ ASSERT (rn < alloc);
+
+ mpn_pi1_bdiv_q_1 (rp, rp, rn, __gmp_oddfac_table[k], facinv[k - 2],
+ __gmp_fac2cnt_table[k / 2 - 1] - i2cnt);
+ /* A two-fold, branch-free normalisation is possible :*/
+ /* rn -= rp[rn - 1] == 0; */
+ /* rn -= rp[rn - 1] == 0; */
+ MPN_NORMALIZE_NOT_ZERO (rp, rn);
+
+ SIZ(r) = rn;
+}
+
+/* Algorithm:
+
+ Plain and simply multiply things together.
+
+ We tabulate factorials (k!/2^t)^(-1) mod B (where t is chosen such
+ that k!/2^t is odd).
+
+*/
+
+static mp_limb_t
+bc_bin_uiui (unsigned int n, unsigned int k)
+{
+ return ((__gmp_oddfac_table[n] * facinv[k - 2] * facinv[n - k - 2])
+ << (__gmp_fac2cnt_table[n / 2 - 1] - __gmp_fac2cnt_table[k / 2 - 1] - __gmp_fac2cnt_table[(n-k) / 2 - 1]))
+ & GMP_NUMB_MASK;
+}
+
+/* Algorithm:
+
+ Recursively exploit the relation
+ bin(n,k) = bin(n,k>>1)*bin(n-k>>1,k-k>>1)/bin(k,k>>1) .
+
+ Values for binomial(k,k>>1) that fit in a limb are precomputed
+ (with inverses).
+*/
+
+/* bin2kk[i - ODD_CENTRAL_BINOMIAL_OFFSET] =
+ binomial(i*2,i)/2^t (where t is chosen so that it is odd). */
+static const mp_limb_t bin2kk[] = { ONE_LIMB_ODD_CENTRAL_BINOMIAL_TABLE };
+
+/* bin2kkinv[i] = bin2kk[i]^-1 mod B */
+static const mp_limb_t bin2kkinv[] = { ONE_LIMB_ODD_CENTRAL_BINOMIAL_INVERSE_TABLE };
+
+/* bin2kk[i] = binomial((i+MIN_S)*2,i+MIN_S)/2^t. This table contains the t values. */
+static const unsigned char fac2bin[] = { CENTRAL_BINOMIAL_2FAC_TABLE };
+
+static void
+mpz_smallkdc_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
+{
+ mp_ptr rp;
+ mp_size_t rn;
+ unsigned long int hk;
+
+ hk = k >> 1;
+
+ if ((! BIN_UIUI_RECURSIVE_SMALLDC) || hk <= ODD_FACTORIAL_TABLE_LIMIT)
+ mpz_smallk_bin_uiui (r, n, hk);
+ else
+ mpz_smallkdc_bin_uiui (r, n, hk);
+ k -= hk;
+ n -= hk;
+ if (n <= ODD_FACTORIAL_EXTTABLE_LIMIT) {
+ mp_limb_t cy;
+ rn = SIZ (r);
+ rp = MPZ_REALLOC (r, rn + 1);
+ cy = mpn_mul_1 (rp, rp, rn, bc_bin_uiui (n, k));
+ rp [rn] = cy;
+ rn += cy != 0;
+ } else {
+ mp_limb_t buffer[ODD_CENTRAL_BINOMIAL_TABLE_LIMIT + 3];
+ mpz_t t;
+
+ ALLOC (t) = ODD_CENTRAL_BINOMIAL_TABLE_LIMIT + 3;
+ PTR (t) = buffer;
+ if ((! BIN_UIUI_RECURSIVE_SMALLDC) || k <= ODD_FACTORIAL_TABLE_LIMIT)
+ mpz_smallk_bin_uiui (t, n, k);
+ else
+ mpz_smallkdc_bin_uiui (t, n, k);
+ mpz_mul (r, r, t);
+ rp = PTR (r);
+ rn = SIZ (r);
+ }
+
+ mpn_pi1_bdiv_q_1 (rp, rp, rn, bin2kk[k - ODD_CENTRAL_BINOMIAL_OFFSET],
+ bin2kkinv[k - ODD_CENTRAL_BINOMIAL_OFFSET],
+ fac2bin[k - ODD_CENTRAL_BINOMIAL_OFFSET] - (k != hk));
+ /* A two-fold, branch-free normalisation is possible :*/
+ /* rn -= rp[rn - 1] == 0; */
+ /* rn -= rp[rn - 1] == 0; */
+ MPN_NORMALIZE_NOT_ZERO (rp, rn);
+
+ SIZ(r) = rn;
+}
+
+/* mpz_goetgheluck_bin_uiui(RESULT, N, K) -- Set RESULT to binomial(N,K).
+ *
+ * Contributed to the GNU project by Marco Bodrato.
+ *
+ * Implementation of the algorithm by P. Goetgheluck, "Computing
+ * Binomial Coefficients", The American Mathematical Monthly, Vol. 94,
+ * No. 4 (April 1987), pp. 360-365.
+ *
+ * Acknowledgment: Peter Luschny did spot the slowness of the previous
+ * code and suggested the reference.
+ */
+
+/* TODO: Remove duplicated constants / macros / static functions...
+ */
+
+/*************************************************************/
+/* Section macros: common macros, for swing/fac/bin (&sieve) */
+/*************************************************************/
+
+#define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I) \
+ if ((PR) > (MAX_PR)) { \
+ (VEC)[(I)++] = (PR); \
+ (PR) = 1; \
+ }
+
+#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \
+ do { \
+ if ((PR) > (MAX_PR)) { \
+ (VEC)[(I)++] = (PR); \
+ (PR) = (P); \
+ } else \
+ (PR) *= (P); \
+ } while (0)
+
+#define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) \
+ __max_i = (end); \
+ \
+ do { \
+ ++__i; \
+ if (((sieve)[__index] & __mask) == 0) \
+ { \
+ (prime) = id_to_n(__i)
+
+#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \
+ do { \
+ mp_limb_t __mask, __index, __max_i, __i; \
+ \
+ __i = (start)-(off); \
+ __index = __i / GMP_LIMB_BITS; \
+ __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \
+ __i += (off); \
+ \
+ LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)
+
+#define LOOP_ON_SIEVE_STOP \
+ } \
+ __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \
+ __index += __mask & 1; \
+ } while (__i <= __max_i) \
+
+#define LOOP_ON_SIEVE_END \
+ LOOP_ON_SIEVE_STOP; \
+ } while (0)
+
+/*********************************************************/
+/* Section sieve: sieving functions and tools for primes */
+/*********************************************************/
+
+#if WANT_ASSERT
+static mp_limb_t
+bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
+#endif
+
+/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
+static mp_limb_t
+id_to_n (mp_limb_t id) { return id*3+1+(id&1); }
+
+/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
+static mp_limb_t
+n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
- /* Initialize accumulators. */
- nacc = 1;
- kacc = 1;
+static mp_size_t
+primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
- for (i = 2; i <= k; i++)
+/*********************************************************/
+/* Section binomial: fast binomial implementation */
+/*********************************************************/
+
+#define COUNT_A_PRIME(P, N, K, PR, MAX_PR, VEC, I) \
+ do { \
+ mp_limb_t __a, __b, __prime, __ma,__mb; \
+ __prime = (P); \
+ __a = (N); __b = (K); __mb = 0; \
+ FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \
+ do { \
+ __mb += __b % __prime; __b /= __prime; \
+ __ma = __a % __prime; __a /= __prime; \
+ if (__ma < __mb) { \
+ __mb = 1; (PR) *= __prime; \
+ } else __mb = 0; \
+ } while (__a >= __prime); \
+ } while (0)
+
+#define SH_COUNT_A_PRIME(P, N, K, PR, MAX_PR, VEC, I) \
+ do { \
+ mp_limb_t __prime; \
+ __prime = (P); \
+ if (((N) % __prime) < ((K) % __prime)) { \
+ FACTOR_LIST_STORE (__prime, PR, MAX_PR, VEC, I); \
+ } \
+ } while (0)
+
+/* Returns an approximation of the sqare root of x. *
+ * It gives: x <= limb_apprsqrt (x) ^ 2 < x * 9/4 */
+static mp_limb_t
+limb_apprsqrt (mp_limb_t x)
+{
+ int s;
+
+ ASSERT (x > 2);
+ count_leading_zeros (s, x - 1);
+ s = GMP_LIMB_BITS - 1 - s;
+ return (CNST_LIMB(1) << (s >> 1)) + (CNST_LIMB(1) << ((s - 1) >> 1));
+}
+
+static void
+mpz_goetgheluck_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
+{
+ mp_limb_t *sieve, *factors, count;
+ mp_limb_t prod, max_prod, j;
+ TMP_DECL;
+
+ ASSERT (BIN_GOETGHELUCK_THRESHOLD >= 13);
+ ASSERT (n >= 25);
+
+ TMP_MARK;
+ sieve = TMP_ALLOC_LIMBS (primesieve_size (n));
+
+ count = gmp_primesieve (sieve, n) + 1;
+ factors = TMP_ALLOC_LIMBS (count / log_n_max (n) + 1);
+
+ max_prod = GMP_NUMB_MAX / n;
+
+ /* Handle primes = 2, 3 separately. */
+ popc_limb (count, n - k);
+ popc_limb (j, k);
+ count += j;
+ popc_limb (j, n);
+ count -= j;
+ prod = CNST_LIMB(1) << count;
+
+ j = 0;
+ COUNT_A_PRIME (3, n, k, prod, max_prod, factors, j);
+
+ /* Accumulate prime factors from 5 to n/2 */
+ {
+ mp_limb_t s;
+
+ {
+ mp_limb_t prime;
+ s = limb_apprsqrt(n);
+ s = n_to_bit (s);
+ LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);
+ COUNT_A_PRIME (prime, n, k, prod, max_prod, factors, j);
+ LOOP_ON_SIEVE_END;
+ s++;
+ }
+
+ ASSERT (max_prod <= GMP_NUMB_MAX / 2);
+ max_prod <<= 1;
+ ASSERT (bit_to_n (s) * bit_to_n (s) > n);
+ ASSERT (s <= n_to_bit (n >> 1));
+ {
+ mp_limb_t prime;
+
+ LOOP_ON_SIEVE_BEGIN (prime, s, n_to_bit (n >> 1), 0,sieve);
+ SH_COUNT_A_PRIME (prime, n, k, prod, max_prod, factors, j);
+ LOOP_ON_SIEVE_END;
+ }
+ max_prod >>= 1;
+ }
+
+ /* Store primes from (n-k)+1 to n */
+ ASSERT (n_to_bit (n - k) < n_to_bit (n));
{
- mp_limb_t n1, n0;
-
- /* Remove common 2 factors. */
- cnt = ((nacc | kacc) & 1) ^ 1;
- nacc >>= cnt;
- kacc >>= cnt;
-
- j++;
- /* Accumulate next multiples. */
- umul_ppmm (n1, n0, nacc, (mp_limb_t) j << GMP_NAIL_BITS);
- n0 >>= GMP_NAIL_BITS;
- if (n1 == 0)
- {
- /* Save new products in accumulators to keep accumulating. */
- nacc = n0;
- kacc = kacc * i;
- }
- else
- {
- /* Accumulator overflow. Perform bignum step. */
- MULDIV (32);
- nacc = j;
- kacc = i;
- }
+ mp_limb_t prime;
+ LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n - k) + 1, n_to_bit (n), 0,sieve);
+ FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
+ LOOP_ON_SIEVE_END;
}
- /* Take care of whatever is left in accumulators. */
- MULDIV (1);
+ if (LIKELY (j != 0))
+ {
+ factors[j++] = prod;
+ mpz_prodlimbs (r, factors, j);
+ }
+ else
+ {
+ PTR (r)[0] = prod;
+ SIZ (r) = 1;
+ }
+ TMP_FREE;
+}
- ALLOC(r) = ralloc;
- SIZ(r) = rsize;
- PTR(r) = rp;
+#undef COUNT_A_PRIME
+#undef SH_COUNT_A_PRIME
+#undef LOOP_ON_SIEVE_END
+#undef LOOP_ON_SIEVE_STOP
+#undef LOOP_ON_SIEVE_BEGIN
+#undef LOOP_ON_SIEVE_CONTINUE
+
+/*********************************************************/
+/* End of implementation of Goetgheluck's algorithm */
+/*********************************************************/
+
+void
+mpz_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
+{
+ if (UNLIKELY (n < k)) {
+ SIZ (r) = 0;
+#if BITS_PER_ULONG > GMP_NUMB_BITS
+ } else if (UNLIKELY (n > GMP_NUMB_MAX)) {
+ mpz_t tmp;
+
+ mpz_init_set_ui (tmp, n);
+ mpz_bin_ui (r, tmp, k);
+ mpz_clear (tmp);
+#endif
+ } else {
+ ASSERT (n <= GMP_NUMB_MAX);
+ /* Rewrite bin(n,k) as bin(n,n-k) if that is smaller. */
+ k = MIN (k, n - k);
+ if (k < 2) {
+ PTR(r)[0] = k ? n : 1; /* 1 + ((-k) & (n-1)); */
+ SIZ(r) = 1;
+ } else if (n <= ODD_FACTORIAL_EXTTABLE_LIMIT) { /* k >= 2, n >= 4 */
+ PTR(r)[0] = bc_bin_uiui (n, k);
+ SIZ(r) = 1;
+ } else if (k <= ODD_FACTORIAL_TABLE_LIMIT)
+ mpz_smallk_bin_uiui (r, n, k);
+ else if (BIN_UIUI_ENABLE_SMALLDC &&
+ k <= (BIN_UIUI_RECURSIVE_SMALLDC ? ODD_CENTRAL_BINOMIAL_TABLE_LIMIT : ODD_FACTORIAL_TABLE_LIMIT)* 2)
+ mpz_smallkdc_bin_uiui (r, n, k);
+ else if (ABOVE_THRESHOLD (k, BIN_GOETGHELUCK_THRESHOLD) &&
+ k > (n >> 4)) /* k > ODD_FACTORIAL_TABLE_LIMIT */
+ mpz_goetgheluck_bin_uiui (r, n, k);
+ else
+ mpz_bdiv_bin_uiui (r, n, k);
+ }
}
/* mpz_cdiv_q -- Division rounding the quotient towards +infinity. The
remainder gets the opposite sign as the denominator.
-Copyright 1994, 1995, 1996, 2000, 2001, 2005 Free Software Foundation, Inc.
+Copyright 1994, 1995, 1996, 2000, 2001, 2005, 2012 Free Software Foundation,
+Inc.
This file is part of the GNU MP Library.
void
mpz_cdiv_q (mpz_ptr quot, mpz_srcptr dividend, mpz_srcptr divisor)
{
- mp_size_t dividend_size = dividend->_mp_size;
- mp_size_t divisor_size = divisor->_mp_size;
+ mp_size_t dividend_size = SIZ (dividend);
+ mp_size_t divisor_size = SIZ (divisor);
mpz_t rem;
TMP_DECL;
mpz_tdiv_qr (quot, rem, dividend, divisor);
- if ((divisor_size ^ dividend_size) >= 0 && rem->_mp_size != 0)
+ if ((divisor_size ^ dividend_size) >= 0 && SIZ (rem) != 0)
mpz_add_ui (quot, quot, 1L);
TMP_FREE;
always fit into the return type, the negative of the true remainder is
returned.
-Copyright 1994, 1996, 1999, 2001, 2002, 2004 Free Software Foundation, Inc.
+Copyright 1994, 1996, 1999, 2001, 2002, 2004, 2012 Free Software Foundation,
+Inc.
This file is part of the GNU MP Library.
mp_ptr np, qp;
mp_limb_t rl;
- if (divisor == 0)
+ if (UNLIKELY (divisor == 0))
DIVIDE_BY_ZERO;
ns = SIZ(dividend);
}
nn = ABS(ns);
- MPZ_REALLOC (quot, nn);
- qp = PTR(quot);
+ qp = MPZ_REALLOC (quot, nn);
np = PTR(dividend);
#if BITS_PER_ULONG > GMP_NUMB_BITS /* avoid warnings about shift amount */
/* mpz_cdiv_qr -- Division rounding the quotient towards +infinity. The
remainder gets the opposite sign as the denominator.
-Copyright 1994, 1995, 1996, 2000, 2001, 2005 Free Software Foundation, Inc.
+Copyright 1994, 1995, 1996, 2000, 2001, 2005, 2012 Free Software Foundation,
+Inc.
This file is part of the GNU MP Library.
void
mpz_cdiv_qr (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
{
- mp_size_t divisor_size = divisor->_mp_size;
+ mp_size_t divisor_size = SIZ (divisor);
mp_size_t xsize;
mpz_t temp_divisor; /* N.B.: lives until function returns! */
TMP_DECL;
divisor = temp_divisor;
}
- xsize = dividend->_mp_size ^ divisor_size;;
+ xsize = SIZ (dividend) ^ divisor_size;;
mpz_tdiv_qr (quot, rem, dividend, divisor);
- if (xsize >= 0 && rem->_mp_size != 0)
+ if (xsize >= 0 && SIZ (rem) != 0)
{
mpz_add_ui (quot, quot, 1L);
mpz_sub (rem, rem, divisor);
always fit into the return type, the negative of the true remainder is
returned.
-Copyright 1994, 1995, 1996, 1999, 2001, 2002, 2004 Free Software Foundation,
-Inc.
+Copyright 1994, 1995, 1996, 1999, 2001, 2002, 2004, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
mp_ptr np, qp;
mp_limb_t rl;
- if (divisor == 0)
+ if (UNLIKELY (divisor == 0))
DIVIDE_BY_ZERO;
ns = SIZ(dividend);
}
nn = ABS(ns);
- MPZ_REALLOC (quot, nn);
- qp = PTR(quot);
+ qp = MPZ_REALLOC (quot, nn);
np = PTR(dividend);
#if BITS_PER_ULONG > GMP_NUMB_BITS /* avoid warnings about shift amount */
mp_ptr rp;
mp_size_t rn;
- MPZ_REALLOC (rem, 2);
- rp = PTR(rem);
+ rp = MPZ_REALLOC (rem, 2);
if (nn == 1) /* tdiv_qr requirements; tested above for 0 */
{
/* mpz_cdiv_r -- Division rounding the quotient towards +infinity. The
remainder gets the opposite sign as the denominator.
-Copyright 1994, 1995, 1996, 2001, 2005 Free Software Foundation, Inc.
+Copyright 1994, 1995, 1996, 2001, 2005, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
void
mpz_cdiv_r (mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
{
- mp_size_t divisor_size = divisor->_mp_size;
+ mp_size_t divisor_size = SIZ (divisor);
mpz_t temp_divisor; /* N.B.: lives until function returns! */
TMP_DECL;
mpz_tdiv_r (rem, dividend, divisor);
- if ((divisor_size ^ dividend->_mp_size) >= 0 && rem->_mp_size != 0)
+ if ((divisor_size ^ SIZ (dividend)) >= 0 && SIZ (rem) != 0)
mpz_sub (rem, rem, divisor);
TMP_FREE;
always fit into the return type, the negative of the true remainder is
returned.
-Copyright 1994, 1995, 1996, 2001, 2002, 2004, 2005 Free Software Foundation,
-Inc.
+Copyright 1994, 1995, 1996, 2001, 2002, 2004, 2005, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
mp_ptr np;
mp_limb_t rl;
- if (divisor == 0)
+ if (UNLIKELY (divisor == 0))
DIVIDE_BY_ZERO;
ns = SIZ(dividend);
mp_size_t rn;
TMP_DECL;
- MPZ_REALLOC (rem, 2);
- rp = PTR(rem);
+ rp = MPZ_REALLOC (rem, 2);
if (nn == 1) /* tdiv_qr requirements; tested above for 0 */
{
always fit into the return type, the negative of the true remainder is
returned.
-Copyright 1994, 1995, 1996, 2001, 2002, 2004, 2005 Free Software Foundation,
-Inc.
+Copyright 1994, 1995, 1996, 2001, 2002, 2004, 2005, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
mp_ptr np;
mp_limb_t rl;
- if (divisor == 0)
+ if (UNLIKELY (divisor == 0))
DIVIDE_BY_ZERO;
ns = SIZ(dividend);
/* mpz_cdiv_q_2exp, mpz_fdiv_q_2exp -- quotient from mpz divided by 2^n.
-Copyright 1991, 1993, 1994, 1996, 1998, 1999, 2001, 2002, 2004 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 1998, 1999, 2001, 2002, 2004, 2012 Free
+Software Foundation, Inc.
This file is part of the GNU MP Library.
/* dir==1 for ceil, dir==-1 for floor */
-static void __gmpz_cfdiv_q_2exp __GMP_PROTO ((REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_bitcnt_t, int))) REGPARM_ATTR (1);
+static void __gmpz_cfdiv_q_2exp (REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_bitcnt_t, int)) REGPARM_ATTR (1);
#define cfdiv_q_2exp(w,u,cnt,dir) __gmpz_cfdiv_q_2exp (REGPARM_3_1 (w,u,cnt,dir))
REGPARM_ATTR (1) static void
{
if (wsize != 0)
{
- mp_limb_t cy;
+ mp_limb_t cy;
cy = mpn_add_1 (wp, wp, wsize, CNST_LIMB(1));
wp[wsize] = cy;
wsize += cy;
/* mpz_cdiv_r_2exp, mpz_fdiv_r_2exp -- remainder from mpz divided by 2^n.
-Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2004, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
/* dir==1 for ceil, dir==-1 for floor */
-static void __gmpz_cfdiv_r_2exp __GMP_PROTO ((REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_bitcnt_t, int))) REGPARM_ATTR (1);
+static void __gmpz_cfdiv_r_2exp (REGPARM_3_1 (mpz_ptr, mpz_srcptr, mp_bitcnt_t, int)) REGPARM_ATTR (1);
#define cfdiv_r_2exp(w,u,cnt,dir) __gmpz_cfdiv_r_2exp (REGPARM_3_1 (w, u, cnt, dir))
REGPARM_ATTR (1) static void
/* Round towards zero, means just truncate */
if (w == u)
- {
- /* if already smaller than limb_cnt then do nothing */
- if (abs_usize <= limb_cnt)
- return;
- wp = PTR(w);
- }
+ {
+ /* if already smaller than limb_cnt then do nothing */
+ if (abs_usize <= limb_cnt)
+ return;
+ wp = PTR(w);
+ }
else
- {
- i = MIN (abs_usize, limb_cnt+1);
- MPZ_REALLOC (w, i);
- wp = PTR(w);
- MPN_COPY (wp, up, i);
-
- /* if smaller than limb_cnt then only the copy is needed */
- if (abs_usize <= limb_cnt)
- {
- SIZ(w) = usize;
- return;
- }
- }
+ {
+ i = MIN (abs_usize, limb_cnt+1);
+ wp = MPZ_REALLOC (w, i);
+ MPN_COPY (wp, up, i);
+
+ /* if smaller than limb_cnt then only the copy is needed */
+ if (abs_usize <= limb_cnt)
+ {
+ SIZ(w) = usize;
+ return;
+ }
+ }
}
else
{
/* if u!=0 and smaller than divisor, then must negate */
if (abs_usize <= limb_cnt)
- goto negate;
+ goto negate;
/* if non-zero low limb, then must negate */
for (i = 0; i < limb_cnt; i++)
- if (up[i] != 0)
- goto negate;
+ if (up[i] != 0)
+ goto negate;
/* if non-zero partial limb, then must negate */
if ((up[limb_cnt] & LOW_MASK (cnt)) != 0)
- goto negate;
+ goto negate;
/* otherwise low bits of u are zero, so that's the result */
SIZ(w) = 0;
negate:
/* twos complement negation to get 2**cnt-u */
- MPZ_REALLOC (w, limb_cnt+1);
+ wp = MPZ_REALLOC (w, limb_cnt+1);
up = PTR(u);
- wp = PTR(w);
/* Ones complement */
i = MIN (abs_usize, limb_cnt+1);
mpn_com (wp, up, i);
for ( ; i <= limb_cnt; i++)
- wp[i] = GMP_NUMB_MAX;
+ wp[i] = GMP_NUMB_MAX;
/* Twos complement. Since u!=0 in the relevant part, the twos
- complement never gives 0 and a carry, so can use MPN_INCR_U. */
+ complement never gives 0 and a carry, so can use MPN_INCR_U. */
MPN_INCR_U (wp, limb_cnt+1, CNST_LIMB(1));
usize = -usize;
{
limb_cnt--;
if (limb_cnt < 0)
- {
- SIZ(w) = 0;
- return;
- }
+ {
+ SIZ(w) = 0;
+ return;
+ }
high = wp[limb_cnt];
}
/* mpz_clear -- de-allocate the space occupied by the dynamic digit space of
an integer.
-Copyright 1991, 1993, 1994, 1995, 2000, 2001 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2012 Free Software Foundation,
+Inc.
This file is part of the GNU MP Library.
void
mpz_clear (mpz_ptr m)
{
- (*__gmp_free_func) (m->_mp_d, m->_mp_alloc * BYTES_PER_MP_LIMB);
+ (*__gmp_free_func) (PTR (m), ALLOC (m) * BYTES_PER_MP_LIMB);
}
/* mpz_clrbit -- clear a specified bit.
-Copyright 1991, 1993, 1994, 1995, 2001, 2002 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1995, 2001, 2002, 2012 Free Software Foundation,
+Inc.
This file is part of the GNU MP Library.
#include "gmp-impl.h"
void
-mpz_clrbit (mpz_ptr d, mp_bitcnt_t bit_index)
+mpz_clrbit (mpz_ptr d, mp_bitcnt_t bit_idx)
{
- mp_size_t dsize = d->_mp_size;
- mp_ptr dp = d->_mp_d;
- mp_size_t limb_index;
+ mp_size_t dsize = SIZ (d);
+ mp_ptr dp = PTR (d);
+ mp_size_t limb_idx;
+ mp_limb_t mask;
- limb_index = bit_index / GMP_NUMB_BITS;
+ limb_idx = bit_idx / GMP_NUMB_BITS;
+ mask = CNST_LIMB(1) << (bit_idx % GMP_NUMB_BITS);
if (dsize >= 0)
{
- if (limb_index < dsize)
+ if (limb_idx < dsize)
{
- mp_limb_t dlimb;
- dlimb = dp[limb_index];
- dlimb &= ~((mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS));
- dp[limb_index] = dlimb;
+ mp_limb_t dlimb;
+ dlimb = dp[limb_idx];
+ dlimb &= ~mask;
+ dp[limb_idx] = dlimb;
- if (UNLIKELY (dlimb == 0 && limb_index == dsize-1))
- {
- /* high limb became zero, must normalize */
- do {
- dsize--;
- } while (dsize > 0 && dp[dsize-1] == 0);
- d->_mp_size = dsize;
- }
+ if (UNLIKELY (dlimb == 0 && limb_idx == dsize-1))
+ {
+ /* high limb became zero, must normalize */
+ MPN_NORMALIZE (dp, limb_idx);
+ SIZ (d) = limb_idx;
+ }
}
else
;
dsize = -dsize;
- /* No upper bound on this loop, we're sure there's a non-zero limb
- sooner ot later. */
- for (zero_bound = 0; ; zero_bound++)
- if (dp[zero_bound] != 0)
- break;
+ /* No index upper bound on this loop, we're sure there's a non-zero limb
+ sooner or later. */
+ zero_bound = 0;
+ while (dp[zero_bound] == 0)
+ zero_bound++;
- if (limb_index > zero_bound)
+ if (limb_idx > zero_bound)
{
- if (limb_index < dsize)
- dp[limb_index] |= (mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS);
+ if (limb_idx < dsize)
+ dp[limb_idx] |= mask;
else
{
/* Ugh. The bit should be cleared outside of the end of the
number. We have to increase the size of the number. */
- if (UNLIKELY (d->_mp_alloc < limb_index + 1))
- dp = _mpz_realloc (d, limb_index + 1);
-
- MPN_ZERO (dp + dsize, limb_index - dsize);
- dp[limb_index] = (mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS);
- d->_mp_size = -(limb_index + 1);
+ dp = MPZ_REALLOC (d, limb_idx + 1);
+ SIZ (d) = -(limb_idx + 1);
+ MPN_ZERO (dp + dsize, limb_idx - dsize);
+ dp[limb_idx] = mask;
}
}
- else if (limb_index == zero_bound)
+ else if (limb_idx == zero_bound)
{
- dp[limb_index] = ((((dp[limb_index] - 1)
- | ((mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS))) + 1)
- & GMP_NUMB_MASK);
- if (dp[limb_index] == 0)
+ dp[limb_idx] = ((((dp[limb_idx] - 1) | mask) + 1) & GMP_NUMB_MASK);
+ if (dp[limb_idx] == 0)
{
- mp_size_t i;
- for (i = limb_index + 1; i < dsize; i++)
- {
- dp[i] = (dp[i] + 1) & GMP_NUMB_MASK;
- if (dp[i] != 0)
- goto fin;
- }
- /* We got carry all way out beyond the end of D. Increase
- its size (and allocation if necessary). */
- dsize++;
- if (UNLIKELY (d->_mp_alloc < dsize))
- dp = _mpz_realloc (d, dsize);
+ /* Increment at limb_idx + 1. Extend the number with a zero limb
+ for simplicity. */
+ dp = MPZ_REALLOC (d, dsize + 1);
+ dp[dsize] = 0;
+ MPN_INCR_U (dp + limb_idx + 1, dsize - limb_idx, 1);
+ dsize += dp[dsize];
- dp[i] = 1;
- d->_mp_size = -dsize;
- fin:;
+ SIZ (d) = -dsize;
}
}
else
/* mpz_cmp(u,v) -- Compare U, V. Return positive, zero, or negative
based on if U > V, U == V, or U < V.
-Copyright 1991, 1993, 1994, 1996, 2001, 2002 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 2001, 2002, 2011 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-#ifdef BERKELEY_MP
-#include "mp.h"
-#endif
#include "gmp.h"
#include "gmp-impl.h"
int
-#ifdef BERKELEY_MP
-mcmp (mpz_srcptr u, mpz_srcptr v)
-#else
mpz_cmp (mpz_srcptr u, mpz_srcptr v) __GMP_NOTHROW
-#endif
{
mp_size_t usize, vsize, dsize, asize;
mp_srcptr up, vp;
if (zsize >= 0)
{
if (d < 0.0)
- return 1; /* >=0 cmp <0 */
+ return 1; /* >=0 cmp <0 */
ret = 1;
}
else
{
if (d >= 0.0)
- return -1; /* <0 cmp >=0 */
+ return -1; /* <0 cmp >=0 */
ret = -1;
d = -d;
zsize = -zsize;
/* mpz_cmp_si(u,v) -- Compare an integer U with a single-word int V.
Return positive, zero, or negative based on if U > V, U == V, or U < V.
-Copyright 1991, 1993, 1994, 1995, 1996, 2000, 2001, 2002 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1995, 1996, 2000, 2001, 2002, 2012, 2013 Free
+Software Foundation, Inc.
This file is part of the GNU MP Library.
int
_mpz_cmp_si (mpz_srcptr u, signed long int v_digit) __GMP_NOTHROW
{
- mp_size_t usize = u->_mp_size;
+ mp_size_t usize = SIZ (u);
mp_size_t vsize;
mp_limb_t u_digit;
- unsigned long int absv_digit = (unsigned long int) v_digit;
+ unsigned long int absv_digit;
#if GMP_NAIL_BITS != 0
/* FIXME. This isn't very pretty. */
else if (v_digit < 0)
{
vsize = -1;
- absv_digit = -absv_digit;
}
+ absv_digit = ABS_CAST (unsigned long int, v_digit);
if (usize != vsize)
return usize - vsize;
if (usize == 0)
return 0;
- u_digit = u->_mp_d[0];
+ u_digit = PTR (u)[0];
if (u_digit == (mp_limb_t) absv_digit)
return 0;
/* mpz_cmpabs_d -- compare absolute values of mpz and double.
-Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2003, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
if (d == 0.0)
return (zsize != 0);
if (zsize == 0)
- return (d != 0 ? -1 : 0);
+ return -1; /* d != 0 */
/* 2. Ignore signs. */
zsize = ABS(zsize);
/* mpz_com(mpz_ptr dst, mpz_ptr src) -- Assign the bit-complemented value of
SRC to DST.
-Copyright 1991, 1993, 1994, 1996, 2001, 2003 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 2001, 2003, 2012 Free Software Foundation,
+Inc.
This file is part of the GNU MP Library.
void
mpz_com (mpz_ptr dst, mpz_srcptr src)
{
- mp_size_t size = src->_mp_size;
+ mp_size_t size = SIZ (src);
mp_srcptr src_ptr;
mp_ptr dst_ptr;
But this can be simplified using the identity -x = ~x + 1.
So we're going to compute (~~x) + 1 = x + 1! */
- if (dst->_mp_alloc < size + 1)
- _mpz_realloc (dst, size + 1);
-
- src_ptr = src->_mp_d;
- dst_ptr = dst->_mp_d;
-
if (UNLIKELY (size == 0))
{
/* special case, as mpn_add_1 wants size!=0 */
- dst_ptr[0] = 1;
- dst->_mp_size = -1;
- return;
+ PTR (dst)[0] = 1;
+ SIZ (dst) = -1;
}
+ else
+ {
+ mp_limb_t cy;
- {
- mp_limb_t cy;
+ dst_ptr = MPZ_REALLOC (dst, size + 1);
- cy = mpn_add_1 (dst_ptr, src_ptr, size, (mp_limb_t) 1);
- if (cy)
- {
- dst_ptr[size] = cy;
- size++;
- }
- }
+ src_ptr = PTR (src);
+
+ cy = mpn_add_1 (dst_ptr, src_ptr, size, (mp_limb_t) 1);
+ dst_ptr[size] = cy;
+ size += (cy != 0);
- /* Store a negative size, to indicate ones-extension. */
- dst->_mp_size = -size;
+ /* Store a negative size, to indicate ones-extension. */
+ SIZ (dst) = -size;
+ }
}
else
{
So we're going to compute ~~(x - 1) = x - 1! */
size = -size;
- if (dst->_mp_alloc < size)
- _mpz_realloc (dst, size);
+ dst_ptr = MPZ_REALLOC (dst, size);
- src_ptr = src->_mp_d;
- dst_ptr = dst->_mp_d;
+ src_ptr = PTR (src);
mpn_sub_1 (dst_ptr, src_ptr, size, (mp_limb_t) 1);
size -= dst_ptr[size - 1] == 0;
/* Store a positive size, to indicate zero-extension. */
- dst->_mp_size = size;
+ SIZ (dst) = size;
}
}
/* mpz_combit -- complement a specified bit.
-Copyright 2002, 2003 Free Software Foundation, Inc.
+Copyright 2002, 2003, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
void
mpz_combit (mpz_ptr d, mp_bitcnt_t bit_index)
{
- mp_size_t dsize = ABSIZ(d);
- mp_ptr dp = LIMBS(d);
+ mp_size_t dsize = SIZ(d);
+ mp_ptr dp = PTR(d);
mp_size_t limb_index = bit_index / GMP_NUMB_BITS;
- mp_limb_t bit = ((mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS));
+ mp_limb_t bit = (CNST_LIMB (1) << (bit_index % GMP_NUMB_BITS));
- if (limb_index >= dsize)
- {
- MPZ_REALLOC(d, limb_index + 1);
- dp = LIMBS(d);
-
- MPN_ZERO(dp + dsize, limb_index + 1 - dsize);
- dsize = limb_index + 1;
- }
+ /* Check for the most common case: Positive input, no realloc or
+ normalization needed. */
+ if (limb_index + 1 < dsize)
+ dp[limb_index] ^= bit;
- if (SIZ(d) >= 0)
+ /* Check for the hairy case. d < 0, and we have all zero bits to the
+ right of the bit to toggle. */
+ else if (limb_index < -dsize && mpn_zero_p (dp, limb_index)
+ && (dp[limb_index] & (bit - 1)) == 0)
{
- dp[limb_index] ^= bit;
- MPN_NORMALIZE (dp, dsize);
- SIZ(d) = dsize;
+ ASSERT (dsize < 0);
+ dsize = -dsize;
+
+ if (dp[limb_index] & bit)
+ {
+ /* We toggle the least significant one bit. Corresponds to
+ an add, with potential carry propagation, on the absolute
+ value. */
+ dp = MPZ_REALLOC (d, 1 + dsize);
+ dp[dsize] = 0;
+ MPN_INCR_U (dp + limb_index, 1 + dsize - limb_index, bit);
+ SIZ(d) -= dp[dsize];
+ }
+ else
+ {
+ /* We toggle a zero bit, subtract from the absolute value. */
+ MPN_DECR_U (dp + limb_index, dsize - limb_index, bit);
+ MPN_NORMALIZE (dp, dsize);
+ ASSERT (dsize > 0);
+ SIZ(d) = -dsize;
+ }
}
else
{
- mp_limb_t x = -dp[limb_index];
- mp_size_t i;
-
- /* non-zero limb below us means ones-complement */
- for (i = limb_index-1; i >= 0; i--)
- if (dp[i] != 0)
- {
- x--; /* change twos comp to ones comp */
- break;
- }
-
- if (x & bit)
+ /* Simple case: Toggle the bit in the absolute value. */
+ dsize = ABS(dsize);
+ if (limb_index < dsize)
{
- mp_limb_t c;
-
- /* Clearing the bit increases the magitude. We might need a carry. */
- MPZ_REALLOC(d, dsize + 1);
- dp = LIMBS(d);
-
- __GMPN_ADD_1 (c, dp+limb_index, dp+limb_index,
- dsize - limb_index, bit);
- dp[dsize] = c;
- dsize += c;
+ dp[limb_index] ^= bit;
+
+ /* Can happen only when limb_index = dsize - 1. Avoid SIZ(d)
+ bookkeeping in the common case. */
+ if (dp[dsize-1] == 0)
+ {
+ dsize--;
+ MPN_NORMALIZE (dp, dsize);
+ SIZ (d) = SIZ (d) >= 0 ? dsize : -dsize;
+ }
}
else
- /* Setting the bit decreases the magnitude */
- mpn_sub_1(dp+limb_index, dp+limb_index, dsize + limb_index, bit);
-
- MPN_NORMALIZE (dp, dsize);
- SIZ(d) = -dsize;
+ {
+ dp = MPZ_REALLOC (d, limb_index + 1);
+ MPN_ZERO(dp + dsize, limb_index - dsize);
+ dp[limb_index++] = bit;
+ SIZ(d) = SIZ(d) >= 0 ? limb_index : -limb_index;
+ }
}
}
if (csize == 1)
{
if (dsize == 1)
- {
- cong_1:
- if (sign < 0)
- NEG_MOD (clow, clow, dlow);
-
- if (ABOVE_THRESHOLD (asize, BMOD_1_TO_MOD_1_THRESHOLD))
- {
- r = mpn_mod_1 (ap, asize, dlow);
- if (clow < dlow)
- return r == clow;
- else
- return r == (clow % dlow);
- }
-
- if ((dlow & 1) == 0)
- {
- /* Strip low zero bits to get odd d required by modexact. If
- d==e*2^n then a==c mod d if and only if both a==c mod e and
- a==c mod 2^n, the latter having been done above. */
- unsigned twos;
- count_trailing_zeros (twos, dlow);
- dlow >>= twos;
- }
-
- r = mpn_modexact_1c_odd (ap, asize, dlow, clow);
- return r == 0 || r == dlow;
- }
+ {
+ cong_1:
+ if (sign < 0)
+ NEG_MOD (clow, clow, dlow);
+
+ if (ABOVE_THRESHOLD (asize, BMOD_1_TO_MOD_1_THRESHOLD))
+ {
+ r = mpn_mod_1 (ap, asize, dlow);
+ if (clow < dlow)
+ return r == clow;
+ else
+ return r == (clow % dlow);
+ }
+
+ if ((dlow & 1) == 0)
+ {
+ /* Strip low zero bits to get odd d required by modexact. If
+ d==e*2^n then a==c mod d if and only if both a==c mod e and
+ a==c mod 2^n, the latter having been done above. */
+ unsigned twos;
+ count_trailing_zeros (twos, dlow);
+ dlow >>= twos;
+ }
+
+ r = mpn_modexact_1c_odd (ap, asize, dlow, clow);
+ return r == 0 || r == dlow;
+ }
/* dlow==0 is avoided since we don't want to bother handling extra low
- zero bits if dsecond is even (would involve borrow if a,c differ in
- sign and alow,clow!=0). */
+ zero bits if dsecond is even (would involve borrow if a,c differ in
+ sign and alow,clow!=0). */
if (dsize == 2 && dlow != 0)
- {
- mp_limb_t dsecond = dp[1];
-
- if (dsecond <= dmask)
- {
- unsigned twos;
- count_trailing_zeros (twos, dlow);
- dlow = (dlow >> twos) | (dsecond << (GMP_NUMB_BITS-twos));
- ASSERT_LIMB (dlow);
-
- /* dlow will be odd here, so the test for it even under cong_1
- is unnecessary, but the rest of that code is wanted. */
- goto cong_1;
- }
- }
+ {
+ mp_limb_t dsecond = dp[1];
+
+ if (dsecond <= dmask)
+ {
+ unsigned twos;
+ count_trailing_zeros (twos, dlow);
+ dlow = (dlow >> twos) | (dsecond << (GMP_NUMB_BITS-twos));
+ ASSERT_LIMB (dlow);
+
+ /* dlow will be odd here, so the test for it even under cong_1
+ is unnecessary, but the rest of that code is wanted. */
+ goto cong_1;
+ }
+ }
}
TMP_MARK;
{
/* same signs, subtract */
if (asize > csize || mpn_cmp (ap, cp, asize) >= 0)
- ASSERT_NOCARRY (mpn_sub (xp, ap, asize, cp, csize));
+ ASSERT_NOCARRY (mpn_sub (xp, ap, asize, cp, csize));
else
- ASSERT_NOCARRY (mpn_sub_n (xp, cp, ap, asize));
+ ASSERT_NOCARRY (mpn_sub_n (xp, cp, ap, asize));
MPN_NORMALIZE (xp, asize);
}
else
/* a==c for limbs in common */
if (mpn_cmp (ap, cp, MIN (csize, dlimbs)) != 0)
- return 0;
+ return 0;
/* if that's all of dlimbs, then a==c for remaining bits */
if (csize > dlimbs)
- return ((ap[dlimbs]-cp[dlimbs]) & dmask) == 0;
+ return ((ap[dlimbs]-cp[dlimbs]) & dmask) == 0;
a_zeros:
/* a remains, need all zero bits */
/* if d covers all of a and c, then must be exactly equal */
if (asize <= dlimbs)
- return asize == csize;
+ return asize == csize;
/* whole limbs zero */
for (i = csize; i < dlimbs; i++)
- if (ap[i] != 0)
- return 0;
+ if (ap[i] != 0)
+ return 0;
/* partial limb zero */
return (ap[dlimbs] & dmask) == 0;
/* different signs, negated comparison */
/* common low zero limbs, stopping at first non-zeros, which must
- match twos complement */
+ match twos complement */
i = 0;
for (;;)
- {
- ASSERT (i < csize); /* always have a non-zero limb on c */
- alimb = ap[i];
- climb = cp[i];
- sum = (alimb + climb) & GMP_NUMB_MASK;
+ {
+ ASSERT (i < csize); /* always have a non-zero limb on c */
+ alimb = ap[i];
+ climb = cp[i];
+ sum = (alimb + climb) & GMP_NUMB_MASK;
- if (i >= dlimbs)
- return (sum & dmask) == 0;
- i++;
+ if (i >= dlimbs)
+ return (sum & dmask) == 0;
+ i++;
- /* require both zero, or first non-zeros as twos-complements */
- if (sum != 0)
- return 0;
+ /* require both zero, or first non-zeros as twos-complements */
+ if (sum != 0)
+ return 0;
- if (alimb != 0)
- break;
- }
+ if (alimb != 0)
+ break;
+ }
/* further limbs matching as ones-complement */
for (;;)
- {
- if (i >= csize)
- break;
+ {
+ if (i >= csize)
+ break;
- alimb = ap[i];
- climb = cp[i];
- sum = (alimb + climb + 1) & GMP_NUMB_MASK;
+ alimb = ap[i];
+ climb = cp[i];
+ sum = (alimb + climb + 1) & GMP_NUMB_MASK;
- if (i >= dlimbs)
- return (sum & dmask) == 0;
+ if (i >= dlimbs)
+ return (sum & dmask) == 0;
- if (sum != 0)
- return 0;
+ if (sum != 0)
+ return 0;
- i++;
- }
+ i++;
+ }
/* no more c, so require all 1 bits in a */
if (asize < dlimbs)
- return 0; /* not enough a */
+ return 0; /* not enough a */
/* whole limbs */
for ( ; i < dlimbs; i++)
- if (ap[i] != GMP_NUMB_MAX)
- return 0;
+ if (ap[i] != GMP_NUMB_MAX)
+ return 0;
/* if only whole limbs, no further fetches from a */
if (dbits == 0)
- return 1;
+ return 1;
/* need enough a */
if (asize == dlimbs)
- return 0;
+ return 0;
return ((ap[dlimbs]+1) & dmask) == 0;
}
/* mpz_congruent_ui_p -- test congruence of mpz and ulong.
-Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
if (asize == 0)
{
if (cu < du)
- return cu == 0;
+ return cu == 0;
else
- return (cu % du) == 0;
+ return (cu % du) == 0;
}
/* For nails don't try to be clever if c or d is bigger than a limb, just
{
r = mpn_mod_1 (ap, asize, d);
if (c < d)
- return r == c;
+ return r == c;
else
- return r == (c % d);
+ return r == (c % d);
}
if ((d & 1) == 0)
{
/* Strip low zero bits to get odd d required by modexact. If
- d==e*2^n then a==c mod d if and only if both a==c mod 2^n
- and a==c mod e. */
+ d==e*2^n then a==c mod d if and only if both a==c mod 2^n
+ and a==c mod e. */
- unsigned twos;
+ unsigned twos;
if ((ap[0]-c) & LOW_ZEROS_MASK (d))
- return 0;
+ return 0;
count_trailing_zeros (twos, d);
d >>= twos;
/* mpz_divexact_ui -- exact division mpz by ulong.
-Copyright 2001, 2002 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
mp_size_t size, abs_size;
mp_ptr dst_ptr;
- if (divisor == 0)
+ if (UNLIKELY (divisor == 0))
DIVIDE_BY_ZERO;
/* For nails don't try to be clever if d is bigger than a limb, just fake
}
abs_size = ABS (size);
- MPZ_REALLOC (dst, abs_size);
- dst_ptr = PTR(dst);
+ dst_ptr = MPZ_REALLOC (dst, abs_size);
MPN_DIVREM_OR_DIVEXACT_1 (dst_ptr, PTR(src), abs_size, (mp_limb_t) divisor);
abs_size -= (dst_ptr[abs_size-1] == 0);
THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
-Copyright 2000, 2005 Free Software Foundation, Inc.
+Copyright 2000, 2005, 2011, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
implementation. */
+#if GMP_NUMB_BITS % 2 == 0
static void
mpz_divexact_by3 (mpz_ptr q, mpz_srcptr a)
{
mp_size_t size = SIZ(a);
- if (size == 0)
- {
- SIZ(q) = 0;
- return;
- }
- else
- {
- mp_size_t abs_size = ABS(size);
- mp_ptr qp;
+ mp_size_t abs_size = ABS(size);
+ mp_ptr qp;
- MPZ_REALLOC (q, abs_size);
+ qp = MPZ_REALLOC (q, abs_size);
- qp = PTR(q);
- mpn_divexact_by3 (qp, PTR(a), abs_size);
+ mpn_bdiv_dbm1 (qp, PTR(a), abs_size, GMP_NUMB_MASK / 3);
- abs_size -= (qp[abs_size-1] == 0);
- SIZ(q) = (size>0 ? abs_size : -abs_size);
- }
+ abs_size -= (qp[abs_size-1] == 0);
+ SIZ(q) = (size>0 ? abs_size : -abs_size);
+}
+#endif
+
+#if GMP_NUMB_BITS % 4 == 0
+static void
+mpz_divexact_by5 (mpz_ptr q, mpz_srcptr a)
+{
+ mp_size_t size = SIZ(a);
+ mp_size_t abs_size = ABS(size);
+ mp_ptr qp;
+
+ qp = MPZ_REALLOC (q, abs_size);
+
+ mpn_bdiv_dbm1 (qp, PTR(a), abs_size, GMP_NUMB_MASK / 5);
+
+ abs_size -= (qp[abs_size-1] == 0);
+ SIZ(q) = (size>0 ? abs_size : -abs_size);
+}
+#endif
+
+static void
+mpz_divexact_limb (mpz_ptr q, mpz_srcptr a, mp_limb_t d)
+{
+ mp_size_t size = SIZ(a);
+ mp_size_t abs_size = ABS(size);
+ mp_ptr qp;
+
+ qp = MPZ_REALLOC (q, abs_size);
+
+ mpn_divexact_1 (qp, PTR(a), abs_size, d);
+
+ abs_size -= (qp[abs_size-1] == 0);
+ SIZ(q) = (size>0 ? abs_size : -abs_size);
}
void
{
ASSERT (mpz_sgn (d) > 0);
+ if (SIZ(a) == 0)
+ {
+ SIZ(q) = 0;
+ return;
+ }
+
if (SIZ(d) == 1)
{
mp_limb_t dl = PTR(d)[0];
int twos;
- if (dl == 1)
- {
- if (q != a)
- mpz_set (q, a);
- return;
- }
- if (dl == 3)
- {
- mpz_divexact_by3 (q, a);
- return;
- }
-
- count_trailing_zeros (twos, dl);
- dl >>= twos;
+ if ((dl & 1) == 0)
+ {
+ count_trailing_zeros (twos, dl);
+ dl >>= twos;
+ mpz_tdiv_q_2exp (q, a, twos);
+ a = q;
+ }
if (dl == 1)
- {
- mpz_tdiv_q_2exp (q, a, twos);
- return;
- }
+ {
+ if (q != a)
+ mpz_set (q, a);
+ return;
+ }
+#if GMP_NUMB_BITS % 2 == 0
if (dl == 3)
- {
- mpz_tdiv_q_2exp (q, a, twos);
- mpz_divexact_by3 (q, q);
- return;
- }
+ {
+ mpz_divexact_by3 (q, a);
+ return;
+ }
+#endif
+#if GMP_NUMB_BITS % 4 == 0
+ if (dl == 5)
+ {
+ mpz_divexact_by5 (q, a);
+ return;
+ }
+#endif
+
+ mpz_divexact_limb (q, a, dl);
+ return;
}
mpz_divexact (q, a, d);
Contributed to the GNU project by Niels Möller.
Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2005,
-2006, 2007, 2009 Free Software Foundation, Inc.
+2006, 2007, 2009, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
nn = ABSIZ (num);
dn = ABSIZ (den);
- qn = nn - dn + 1;
- MPZ_REALLOC (quot, qn);
-
if (nn < dn)
{
/* This special case avoids segfaults below when the function is
return;
}
- TMP_MARK;
+ qn = nn - dn + 1;
- qp = PTR(quot);
+ TMP_MARK;
if (quot == num || quot == den)
qp = TMP_ALLOC_LIMBS (qn);
+ else
+ qp = MPZ_REALLOC (quot, qn);
np = PTR(num);
dp = PTR(den);
mpn_divexact (qp, np, nn, dp, dn);
MPN_NORMALIZE (qp, qn);
- SIZ(quot) = (SIZ(num) ^ SIZ(den)) >= 0 ? qn : -qn;
-
if (qp != PTR(quot))
- MPN_COPY (PTR(quot), qp, qn);
+ MPN_COPY (MPZ_REALLOC (quot, qn), qp, qn);
+
+ SIZ(quot) = (SIZ(num) ^ SIZ(den)) >= 0 ? qn : -qn;
TMP_FREE;
}
if (! (d & 1))
{
/* Strip low zero bits to get odd d required by modexact. If d==e*2^n
- and a is divisible by 2^n and by e, then it's divisible by d. */
+ and a is divisible by 2^n and by e, then it's divisible by d. */
if ((ap[0] & LOW_ZEROS_MASK (d)) != 0)
- return 0;
+ return 0;
count_trailing_zeros (twos, (mp_limb_t) d);
d >>= twos;
/* mpz_export -- create word data from mpz.
-Copyright 2002, 2003 Free Software Foundation, Inc.
+Copyright 2002, 2003, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#define HOST_ENDIAN (* (signed char *) &endian_test)
#endif
-
-#define MPN_SIZEINBASE_2EXP(result, ptr, size, base2exp) \
- do { \
- int __cnt; \
- unsigned long __totbits; \
- ASSERT ((size) > 0); \
- ASSERT ((ptr)[(size)-1] != 0); \
- count_leading_zeros (__cnt, (ptr)[(size)-1]); \
- __totbits = (size) * GMP_NUMB_BITS - (__cnt - GMP_NAIL_BITS); \
- (result) = (__totbits + (base2exp)-1) / (base2exp); \
- } while (0)
-
-
void *
mpz_export (void *data, size_t *countp, int order,
- size_t size, int endian, size_t nail, mpz_srcptr z)
+ size_t size, int endian, size_t nail, mpz_srcptr z)
{
mp_size_t zsize;
mp_srcptr zp;
ASSERT (order == 1 || order == -1);
ASSERT (endian == 1 || endian == 0 || endian == -1);
ASSERT (nail <= 8*size);
- ASSERT (8*size-nail > 0);
+ ASSERT (nail < 8*size || SIZ(z) == 0); /* nail < 8*size+(SIZ(z)==0) */
if (countp == NULL)
countp = &dummy;
if (nail == GMP_NAIL_BITS)
{
if (size == sizeof (mp_limb_t) && align == 0)
- {
- if (order == -1 && endian == HOST_ENDIAN)
- {
- MPN_COPY ((mp_ptr) data, zp, (mp_size_t) count);
- return data;
- }
- if (order == 1 && endian == HOST_ENDIAN)
- {
- MPN_REVERSE ((mp_ptr) data, zp, (mp_size_t) count);
- return data;
- }
-
- if (order == -1 && endian == -HOST_ENDIAN)
- {
- MPN_BSWAP ((mp_ptr) data, zp, (mp_size_t) count);
- return data;
- }
- if (order == 1 && endian == -HOST_ENDIAN)
- {
- MPN_BSWAP_REVERSE ((mp_ptr) data, zp, (mp_size_t) count);
- return data;
- }
- }
+ {
+ if (order == -1 && endian == HOST_ENDIAN)
+ {
+ MPN_COPY ((mp_ptr) data, zp, (mp_size_t) count);
+ return data;
+ }
+ if (order == 1 && endian == HOST_ENDIAN)
+ {
+ MPN_REVERSE ((mp_ptr) data, zp, (mp_size_t) count);
+ return data;
+ }
+
+ if (order == -1 && endian == -HOST_ENDIAN)
+ {
+ MPN_BSWAP ((mp_ptr) data, zp, (mp_size_t) count);
+ return data;
+ }
+ if (order == 1 && endian == -HOST_ENDIAN)
+ {
+ MPN_BSWAP_REVERSE ((mp_ptr) data, zp, (mp_size_t) count);
+ return data;
+ }
+ }
}
{
limb = 0;
for (i = 0; i < count; i++)
{
- for (j = 0; j < wbytes; j++)
- {
- EXTRACT (8, + 0);
- dp -= endian;
- }
- if (wbits != 0)
- {
- EXTRACT (wbits, & wbitsmask);
- dp -= endian;
- j++;
- }
- for ( ; j < size; j++)
- {
- *dp = '\0';
- dp -= endian;
- }
- dp += woffset;
+ for (j = 0; j < wbytes; j++)
+ {
+ EXTRACT (8, + 0);
+ dp -= endian;
+ }
+ if (wbits != 0)
+ {
+ EXTRACT (wbits, & wbitsmask);
+ dp -= endian;
+ j++;
+ }
+ for ( ; j < size; j++)
+ {
+ *dp = '\0';
+ dp -= endian;
+ }
+ dp += woffset;
}
ASSERT (zp == PTR(z) + ABSIZ(z));
/* low byte of word after most significant */
ASSERT (dp == (unsigned char *) data
- + (order < 0 ? count*size : - (mp_size_t) size)
- + (endian >= 0 ? (mp_size_t) size - 1 : 0));
+ + (order < 0 ? count*size : - (mp_size_t) size)
+ + (endian >= 0 ? (mp_size_t) size - 1 : 0));
}
return data;
}
-/* mpz_fac_ui(result, n) -- Set RESULT to N!.
+/* mpz_fac_ui(RESULT, N) -- Set RESULT to N!.
-Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2003 Free Software
-Foundation, Inc.
+Contributed to the GNU project by Marco Bodrato.
+
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2003, 2011, 2012
+Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#include "gmp.h"
#include "gmp-impl.h"
-#include "longlong.h"
-
-#include "fac_ui.h"
-
-
-static void odd_product __GMP_PROTO ((unsigned long, unsigned long, mpz_t *));
-static void ap_product_small __GMP_PROTO ((mpz_t, mp_limb_t, mp_limb_t, unsigned long, unsigned long));
-
-/* must be >=2 */
-#define APCONST 5
-
-/* for single non-zero limb */
-#define MPZ_SET_1_NZ(z,n) \
- do { \
- mpz_ptr __z = (z); \
- ASSERT ((n) != 0); \
- PTR(__z)[0] = (n); \
- SIZ(__z) = 1; \
- } while (0)
-
-/* for src>0 and n>0 */
-#define MPZ_MUL_1_POS(dst,src,n) \
- do { \
- mpz_ptr __dst = (dst); \
- mpz_srcptr __src = (src); \
- mp_size_t __size = SIZ(__src); \
- mp_ptr __dst_p; \
- mp_limb_t __c; \
- \
- ASSERT (__size > 0); \
- ASSERT ((n) != 0); \
- \
- MPZ_REALLOC (__dst, __size+1); \
- __dst_p = PTR(__dst); \
- \
- __c = mpn_mul_1 (__dst_p, PTR(__src), __size, n); \
- __dst_p[__size] = __c; \
- SIZ(__dst) = __size + (__c != 0); \
+#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \
+ do { \
+ if ((PR) > (MAX_PR)) { \
+ (VEC)[(I)++] = (PR); \
+ (PR) = (P); \
+ } else \
+ (PR) *= (P); \
} while (0)
-
-#if BITS_PER_ULONG == GMP_LIMB_BITS
-#define BSWAP_ULONG(x,y) BSWAP_LIMB(x,y)
+#if TUNE_PROGRAM_BUILD
+#define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD_LIMIT-1)+1))
+#else
+#define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_ODD_THRESHOLD)+1))
#endif
-/* We used to have a case here for limb==2*long, doing a BSWAP_LIMB followed
- by a shift down to get the high part. But it provoked incorrect code
- from "HP aC++/ANSI C B3910B A.05.52 [Sep 05 2003]" in ILP32 mode. This
- case would have been nice for gcc ia64 where BSWAP_LIMB is a mux1, but we
- can get that directly muxing a 4-byte ulong if it matters enough. */
-
-#if ! defined (BSWAP_ULONG)
-#define BSWAP_ULONG(dst, src) \
- do { \
- unsigned long __bswapl_src = (src); \
- unsigned long __bswapl_dst = 0; \
- int __i; \
- for (__i = 0; __i < sizeof(unsigned long); __i++) \
- { \
- __bswapl_dst = (__bswapl_dst << 8) | (__bswapl_src & 0xFF); \
- __bswapl_src >>= 8; \
- } \
- (dst) = __bswapl_dst; \
- } while (0)
-#endif
-
-/* x is bit reverse of y */
-/* Note the divides below are all exact */
-#define BITREV_ULONG(x,y) \
- do { \
- unsigned long __dst; \
- BSWAP_ULONG(__dst,y); \
- __dst = ((__dst>>4)&(ULONG_MAX/17)) | ((__dst<<4)&((ULONG_MAX/17)*16)); \
- __dst = ((__dst>>2)&(ULONG_MAX/5) ) | ((__dst<<2)&((ULONG_MAX/5)*4) ); \
- __dst = ((__dst>>1)&(ULONG_MAX/3) ) | ((__dst<<1)&((ULONG_MAX/3)*2) ); \
- (x) = __dst; \
- } while(0)
-/* above could be improved if cpu has a nibble/bit swap/muxing instruction */
-/* above code is serialized, possible to write as a big parallel expression */
-
-
-
+/* Computes n!, the factorial of n.
+ WARNING: it assumes that n fits in a limb!
+ */
void
mpz_fac_ui (mpz_ptr x, unsigned long n)
{
- unsigned long z, stt;
- int i, j;
- mpz_t t1, st[8 * sizeof (unsigned long) + 1 - APCONST];
- mp_limb_t d[4];
-
static const mp_limb_t table[] = { ONE_LIMB_FACTORIAL_TABLE };
- if (n < numberof (table))
- {
- MPZ_SET_1_NZ (x, table[n]);
- return;
- }
-
- /* NOTE : MUST have n>=3 here */
- ASSERT (n >= 3);
- /* for estimating the alloc sizes the calculation of these formula's is not
- exact and also the formulas are only approximations, also we ignore
- the few "side" calculations, correct allocation seems to speed up the
- small sizes better, having very little effect on the large sizes */
-
- /* estimate space for stack entries see below
- number of bits for n! is
- (1+log_2(2*pi)/2)-n*log_2(exp(1))+(n+1/2)*log_2(n)=
- 2.325748065-n*1.442695041+(n+0.5)*log_2(n) */
- umul_ppmm (d[1], d[0], (mp_limb_t) n, (mp_limb_t) FAC2OVERE);
- /* d[1] is 2n/e, d[0] ignored */
- count_leading_zeros (z, d[1]);
- z = GMP_LIMB_BITS - z - 1; /* z=floor(log_2(2n/e)) */
- umul_ppmm (d[1], d[0], (mp_limb_t) n, (mp_limb_t) z);
- /* d=n*floor(log_2(2n/e)) */
- d[0] = (d[0] >> 2) | (d[1] << (GMP_LIMB_BITS - 2));
- d[1] >>= 2;
- /* d=n*floor(log_2(2n/e))/4 */
- z = d[0] + 1; /* have to ignore any overflow */
- /* so z is the number of bits wanted for st[0] */
+ ASSERT (n <= GMP_NUMB_MAX);
-
- if (n <= ((unsigned long) 1) << (APCONST))
- {
- mpz_realloc2 (x, 4 * z);
- ap_product_small (x, CNST_LIMB(2), CNST_LIMB(1), n - 1, 4L);
- return;
- }
- if (n <= ((unsigned long) 1) << (APCONST + 1))
- { /* use n!=odd(1,n)*(n/2)!*2^(n/2) */
- mpz_init2 (t1, 2 * z);
- mpz_realloc2 (x, 4 * z);
- ap_product_small (x, CNST_LIMB(2), CNST_LIMB(1), n / 2 - 1, 4L);
- ap_product_small (t1, CNST_LIMB(3), CNST_LIMB(2), (n - 1) / 2, 4L);
- mpz_mul (x, x, t1);
- mpz_clear (t1);
- mpz_mul_2exp (x, x, n / 2);
- return;
- }
- if (n <= ((unsigned long) 1) << (APCONST + 2))
+ if (n < numberof (table))
{
- /* use n!=C_2(1,n/2)^2*C_2(n/2,n)*(n/4)!*2^(n/2+n/4) all int divs
- so need (BITS_IN_N-APCONST+1)=(APCONST+3-APCONST+1)=4 stack entries */
- mpz_init2 (t1, 2 * z);
- mpz_realloc2 (x, 4 * z);
- for (i = 0; i < 4; i++)
- {
- mpz_init2 (st[i], z);
- z >>= 1;
- }
- odd_product (1, n / 2, st);
- mpz_set (x, st[0]);
- odd_product (n / 2, n, st);
- mpz_mul (x, x, x);
- ASSERT (n / 4 <= FACMUL4 + 6);
- ap_product_small (t1, CNST_LIMB(2), CNST_LIMB(1), n / 4 - 1, 4L);
- /* must have 2^APCONST odd numbers max */
- mpz_mul (t1, t1, st[0]);
- for (i = 0; i < 4; i++)
- mpz_clear (st[i]);
- mpz_mul (x, x, t1);
- mpz_clear (t1);
- mpz_mul_2exp (x, x, n / 2 + n / 4);
- return;
+ PTR (x)[0] = table[n];
+ SIZ (x) = 1;
}
-
- count_leading_zeros (stt, (mp_limb_t) n);
- stt = GMP_LIMB_BITS - stt + 1 - APCONST;
-
- for (i = 0; i < (signed long) stt; i++)
+ else if (BELOW_THRESHOLD (n, FAC_ODD_THRESHOLD))
{
- mpz_init2 (st[i], z);
- z >>= 1;
- }
-
- count_leading_zeros (z, (mp_limb_t) (n / 3));
- /* find z st 2^z>n/3 range for z is 1 <= z <= 8 * sizeof(unsigned long)-1 */
- z = GMP_LIMB_BITS - z;
-
- /*
- n! = 2^e * PRODUCT_{i=0}^{i=z-1} C_2( n/2^{i+1}, n/2^i )^{i+1}
- where 2^e || n! 3.2^z>n C_2(a,b)=PRODUCT of odd z such that a<z<=b
- */
+ mp_limb_t prod, max_prod;
+ mp_size_t j;
+ mp_ptr factors;
+ TMP_SDECL;
+
+ TMP_SMARK;
+ factors = TMP_SALLOC_LIMBS (2 + (n - numberof (table)) / FACTORS_PER_LIMB);
+
+ factors[0] = table[numberof (table)-1];
+ j = 1;
+ prod = n;
+#if TUNE_PROGRAM_BUILD
+ max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD_LIMIT;
+#else
+ max_prod = GMP_NUMB_MAX / (FAC_ODD_THRESHOLD | 1);
+#endif
+ while (--n >= numberof (table))
+ FACTOR_LIST_STORE (n, prod, max_prod, factors, j);
+ factors[j++] = prod;
+ mpz_prodlimbs (x, factors, j);
- mpz_init_set_ui (t1, 1);
- for (j = 8 * sizeof (unsigned long) / 2; j != 0; j >>= 1)
- {
- MPZ_SET_1_NZ (x, 1);
- for (i = 8 * sizeof (unsigned long) - j; i >= j; i -= 2 * j)
- if ((signed long) z >= i)
- {
- odd_product (n >> i, n >> (i - 1), st);
- /* largest odd product when j=i=1 then we have
- odd_product(n/2,n,st) which is approx (2n/e)^(n/4)
- so log_base2(largest oddproduct)=n*log_base2(2n/e)/4
- number of bits is n*log_base2(2n/e)/4+1 */
- if (i != j)
- mpz_pow_ui (st[0], st[0], i / j);
- mpz_mul (x, x, st[0]);
- }
- if ((signed long) z >= j && j != 1)
- {
- mpz_mul (t1, t1, x);
- mpz_mul (t1, t1, t1);
- }
+ TMP_SFREE;
}
- for (i = 0; i < (signed long) stt; i++)
- mpz_clear (st[i]);
- mpz_mul (x, x, t1);
- mpz_clear (t1);
- popc_limb (i, (mp_limb_t) n);
- mpz_mul_2exp (x, x, n - i);
- return;
-}
-
-/* start,step are mp_limb_t although they will fit in unsigned long */
-static void
-ap_product_small (mpz_t ret, mp_limb_t start, mp_limb_t step,
- unsigned long count, unsigned long nm)
-{
- unsigned long a;
- mp_limb_t b;
-
- ASSERT (count <= (((unsigned long) 1) << APCONST));
-/* count can never be zero ? check this and remove test below */
- if (count == 0)
+ else
{
- MPZ_SET_1_NZ (ret, 1);
- return;
- }
- if (count == 1)
- {
- MPZ_SET_1_NZ (ret, start);
- return;
- }
- switch (nm)
- {
- case 1:
- MPZ_SET_1_NZ (ret, start);
- b = start + step;
- for (a = 0; a < count - 1; b += step, a++)
- MPZ_MUL_1_POS (ret, ret, b);
- return;
- case 2:
- MPZ_SET_1_NZ (ret, start * (start + step));
- if (count == 2)
- return;
- for (b = start + 2 * step, a = count / 2 - 1; a != 0;
- a--, b += 2 * step)
- MPZ_MUL_1_POS (ret, ret, b * (b + step));
- if (count % 2 == 1)
- MPZ_MUL_1_POS (ret, ret, b);
- return;
- case 3:
- if (count == 2)
- {
- MPZ_SET_1_NZ (ret, start * (start + step));
- return;
- }
- MPZ_SET_1_NZ (ret, start * (start + step) * (start + 2 * step));
- if (count == 3)
- return;
- for (b = start + 3 * step, a = count / 3 - 1; a != 0;
- a--, b += 3 * step)
- MPZ_MUL_1_POS (ret, ret, b * (b + step) * (b + 2 * step));
- if (count % 3 == 2)
- b = b * (b + step);
- if (count % 3 != 0)
- MPZ_MUL_1_POS (ret, ret, b);
- return;
- default: /* ie nm=4 */
- if (count == 2)
- {
- MPZ_SET_1_NZ (ret, start * (start + step));
- return;
- }
- if (count == 3)
+ mp_limb_t count;
+ mpz_oddfac_1 (x, n, 0);
+ if (n <= TABLE_LIMIT_2N_MINUS_POPC_2N)
+ count = __gmp_fac2cnt_table[n / 2 - 1];
+ else
{
- MPZ_SET_1_NZ (ret, start * (start + step) * (start + 2 * step));
- return;
+ popc_limb (count, n);
+ count = n - count;
}
- MPZ_SET_1_NZ (ret,
- start * (start + step) * (start + 2 * step) * (start +
- 3 * step));
- if (count == 4)
- return;
- for (b = start + 4 * step, a = count / 4 - 1; a != 0;
- a--, b += 4 * step)
- MPZ_MUL_1_POS (ret, ret,
- b * (b + step) * (b + 2 * step) * (b + 3 * step));
- if (count % 4 == 2)
- b = b * (b + step);
- if (count % 4 == 3)
- b = b * (b + step) * (b + 2 * step);
- if (count % 4 != 0)
- MPZ_MUL_1_POS (ret, ret, b);
- return;
+ mpz_mul_2exp (x, x, count);
}
}
-/* return value in st[0]
- odd_product(l,h)=sqrt((h/e)^h/(l/e)^l) using Stirling approx and e=exp(1)
- so st[0] needs enough bits for above, st[1] needs half these bits and
- st[2] needs 1/4 of these bits etc */
-static void
-odd_product (unsigned long low, unsigned long high, mpz_t * st)
-{
- unsigned long stc = 1, stn = 0, n, y, mask, a, nm = 1;
- signed long z;
-
- low++;
- if (low % 2 == 0)
- low++;
- if (high == 0)
- high = 1;
- if (high % 2 == 0)
- high--;
-/* must have high>=low ? check this and remove test below */
- if (high < low)
- {
- MPZ_SET_1_NZ (st[0], 1);
- return;
- }
- if (high == low)
- {
- MPZ_SET_1_NZ (st[0], low);
- return;
- }
- if (high <= FACMUL2 + 2)
- {
- nm = 2;
- if (high <= FACMUL3 + 4)
- {
- nm = 3;
- if (high <= FACMUL4 + 6)
- nm = 4;
- }
- }
- high = (high - low) / 2 + 1; /* high is now count,high<=2^(BITS_PER_ULONG-1) */
- if (high <= (((unsigned long) 1) << APCONST))
- {
- ap_product_small (st[0], (mp_limb_t) low, CNST_LIMB(2), high, nm);
- return;
- }
- count_leading_zeros (n, (mp_limb_t) high);
-/* assumes clz above is LIMB based not NUMB based */
- n = GMP_LIMB_BITS - n - APCONST;
- mask = (((unsigned long) 1) << n);
- a = mask << 1;
- mask--;
-/* have 2^(BITS_IN_N-APCONST) iterations so need
- (BITS_IN_N-APCONST+1) stack entries */
- for (z = mask; z >= 0; z--)
- {
- BITREV_ULONG (y, z);
- y >>= (BITS_PER_ULONG - n);
- ap_product_small (st[stn],
- (mp_limb_t) (low + 2 * ((~y) & mask)), (mp_limb_t) a,
- (high + y) >> n, nm);
- ASSERT (((high + y) >> n) <= (((unsigned long) 1) << APCONST));
- stn++;
- y = stc++;
- while ((y & 1) == 0)
- {
- mpz_mul (st[stn - 2], st[stn - 2], st[stn - 1]);
- stn--;
- y >>= 1;
- }
- }
- ASSERT (stn == 1);
- return;
-}
+#undef FACTORS_PER_LIMB
+#undef FACTOR_LIST_STORE
/* mpz_fdiv_q -- Division rounding the quotient towards -infinity.
The remainder gets the same sign as the denominator.
-Copyright 1994, 1995, 1996, 2000, 2001, 2005 Free Software Foundation, Inc.
+Copyright 1994, 1995, 1996, 2000, 2001, 2005, 2012 Free Software Foundation,
+Inc.
This file is part of the GNU MP Library.
void
mpz_fdiv_q (mpz_ptr quot, mpz_srcptr dividend, mpz_srcptr divisor)
{
- mp_size_t dividend_size = dividend->_mp_size;
- mp_size_t divisor_size = divisor->_mp_size;
+ mp_size_t dividend_size = SIZ (dividend);
+ mp_size_t divisor_size = SIZ (divisor);
mpz_t rem;
TMP_DECL;
mpz_tdiv_qr (quot, rem, dividend, divisor);
- if ((divisor_size ^ dividend_size) < 0 && rem->_mp_size != 0)
+ if ((divisor_size ^ dividend_size) < 0 && SIZ (rem) != 0)
mpz_sub_ui (quot, quot, 1L);
TMP_FREE;
/* mpz_fdiv_q_ui -- Division rounding the quotient towards -infinity.
The remainder gets the same sign as the denominator.
-Copyright 1994, 1995, 1996, 1999, 2001, 2002, 2004 Free Software Foundation,
-Inc.
+Copyright 1994, 1995, 1996, 1999, 2001, 2002, 2004, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
mp_ptr np, qp;
mp_limb_t rl;
- if (divisor == 0)
+ if (UNLIKELY (divisor == 0))
DIVIDE_BY_ZERO;
ns = SIZ(dividend);
}
nn = ABS(ns);
- MPZ_REALLOC (quot, nn);
- qp = PTR(quot);
+ qp = MPZ_REALLOC (quot, nn);
np = PTR(dividend);
#if BITS_PER_ULONG > GMP_NUMB_BITS /* avoid warnings about shift amount */
/* mpz_fdiv_qr -- Division rounding the quotient towards -infinity.
The remainder gets the same sign as the denominator.
-Copyright 1994, 1995, 1996, 2000, 2001, 2005 Free Software Foundation, Inc.
+Copyright 1994, 1995, 1996, 2000, 2001, 2005, 2012 Free Software Foundation,
+Inc.
This file is part of the GNU MP Library.
void
mpz_fdiv_qr (mpz_ptr quot, mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
{
- mp_size_t divisor_size = divisor->_mp_size;
+ mp_size_t divisor_size = SIZ (divisor);
mp_size_t xsize;
mpz_t temp_divisor; /* N.B.: lives until function returns! */
TMP_DECL;
divisor = temp_divisor;
}
- xsize = dividend->_mp_size ^ divisor_size;;
+ xsize = SIZ (dividend) ^ divisor_size;;
mpz_tdiv_qr (quot, rem, dividend, divisor);
- if (xsize < 0 && rem->_mp_size != 0)
+ if (xsize < 0 && SIZ (rem) != 0)
{
mpz_sub_ui (quot, quot, 1L);
mpz_add (rem, rem, divisor);
/* mpz_fdiv_qr_ui -- Division rounding the quotient towards -infinity.
The remainder gets the same sign as the denominator.
-Copyright 1994, 1995, 1996, 1999, 2001, 2002, 2004 Free Software Foundation,
+Copyright 1994, 1995, 1996, 1999, 2001, 2002, 2004, 2012 Free Software Foundation,
Inc.
This file is part of the GNU MP Library.
mp_ptr np, qp;
mp_limb_t rl;
- if (divisor == 0)
+ if (UNLIKELY (divisor == 0))
DIVIDE_BY_ZERO;
ns = SIZ(dividend);
}
nn = ABS(ns);
- MPZ_REALLOC (quot, nn);
- qp = PTR(quot);
+ qp = MPZ_REALLOC (quot, nn);
np = PTR(dividend);
#if BITS_PER_ULONG > GMP_NUMB_BITS /* avoid warnings about shift amount */
/* mpz_fdiv_r -- Division rounding the quotient towards -infinity.
The remainder gets the same sign as the denominator.
-Copyright 1994, 1995, 1996, 2001, 2005 Free Software Foundation, Inc.
+Copyright 1994, 1995, 1996, 2001, 2005, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
void
mpz_fdiv_r (mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
{
- mp_size_t divisor_size = divisor->_mp_size;
+ mp_size_t divisor_size = SIZ (divisor);
mpz_t temp_divisor; /* N.B.: lives until function returns! */
TMP_DECL;
mpz_tdiv_r (rem, dividend, divisor);
- if ((divisor_size ^ dividend->_mp_size) < 0 && rem->_mp_size != 0)
+ if ((divisor_size ^ SIZ (dividend)) < 0 && SIZ (rem) != 0)
mpz_add (rem, rem, divisor);
TMP_FREE;
/* mpz_fdiv_r_ui -- Division rounding the quotient towards -infinity.
The remainder gets the same sign as the denominator.
-Copyright 1994, 1995, 1996, 2001, 2002, 2004, 2005 Free Software Foundation,
+Copyright 1994, 1995, 1996, 2001, 2002, 2004, 2005, 2012 Free Software Foundation,
Inc.
This file is part of the GNU MP Library.
mp_ptr np;
mp_limb_t rl;
- if (divisor == 0)
+ if (UNLIKELY (divisor == 0))
DIVIDE_BY_ZERO;
ns = SIZ(dividend);
mp_size_t rn;
TMP_DECL;
- MPZ_REALLOC (rem, 2);
- rp = PTR(rem);
+ rp = MPZ_REALLOC (rem, 2);
if (nn == 1) /* tdiv_qr requirements; tested above for 0 */
{
/* mpz_fdiv_ui -- Division rounding the quotient towards -infinity.
The remainder gets the same sign as the denominator.
-Copyright 1994, 1995, 1996, 2001, 2002, 2004, 2005 Free Software Foundation,
-Inc.
+Copyright 1994, 1995, 1996, 2001, 2002, 2004, 2005, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
mp_ptr np;
mp_limb_t rl;
- if (divisor == 0)
+ if (UNLIKELY (divisor == 0))
DIVIDE_BY_ZERO;
ns = SIZ(dividend);
/* mpz_fib2_ui -- calculate Fibonacci numbers.
-Copyright 2001 Free Software Foundation, Inc.
+Copyright 2001, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
mp_size_t size;
size = MPN_FIB2_SIZE (n);
- MPZ_REALLOC (fn, size);
- MPZ_REALLOC (fnsub1, size);
- fp = PTR (fn);
- f1p = PTR (fnsub1);
+ fp = MPZ_REALLOC (fn, size);
+ f1p = MPZ_REALLOC (fnsub1, size);
size = mpn_fib2_ui (fp, f1p, n);
/* mpz_fib_ui -- calculate Fibonacci numbers.
-Copyright 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2005, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
n2 = n/2;
xalloc = MPN_FIB2_SIZE (n2) + 1;
- MPZ_REALLOC (fn, 2*xalloc+1);
- fp = PTR (fn);
+ fp = MPZ_REALLOC (fn, 2*xalloc+1);
TMP_MARK;
TMP_ALLOC_LIMBS_2 (xp,xalloc, yp,xalloc);
size = mpn_fib2_ui (xp, yp, n2);
TRACE (printf ("mpz_fib_ui last step n=%lu size=%ld bit=%lu\n",
- n >> 1, size, n&1);
- mpn_trace ("xp", xp, size);
- mpn_trace ("yp", yp, size));
+ n >> 1, size, n&1);
+ mpn_trace ("xp", xp, size);
+ mpn_trace ("yp", yp, size));
if (n & 1)
{
fp[0] += (n & 2 ? -CNST_LIMB(2) : CNST_LIMB(2));
#else
if (n & 2)
- {
- ASSERT (fp[0] >= 2);
- fp[0] -= 2;
- }
+ {
+ ASSERT (fp[0] >= 2);
+ fp[0] -= 2;
+ }
else
- {
- ASSERT (c != GMP_NUMB_MAX); /* because it's the high of a mul */
- c += mpn_add_1 (fp, fp, size-1, CNST_LIMB(2));
- fp[size-1] = c;
- }
+ {
+ ASSERT (c != GMP_NUMB_MAX); /* because it's the high of a mul */
+ c += mpn_add_1 (fp, fp, size-1, CNST_LIMB(2));
+ fp[size-1] = c;
+ }
#endif
}
else
SIZ(fn) = size;
TRACE (printf ("done special, size=%ld\n", size);
- mpn_trace ("fp ", fp, size));
+ mpn_trace ("fp ", fp, size));
TMP_FREE;
}
-/* int mpz_fits_sint_p (mpz_t z) -- test whether z fits a int.
+/* int mpz_fits_sint_p (mpz_t z) -- test whether z fits an int.
Copyright 1997, 2000, 2001 Free Software Foundation, Inc.
/* mpz/gcd.c: Calculate the greatest common divisor of two integers.
-Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2005 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2005, 2010 Free
+Software Foundation, Inc.
This file is part of the GNU MP Library.
#include "gmp.h"
#include "gmp-impl.h"
#include "longlong.h"
-#ifdef BERKELEY_MP
-#include "mp.h"
-#endif
void
-#ifndef BERKELEY_MP
mpz_gcd (mpz_ptr g, mpz_srcptr u, mpz_srcptr v)
-#else /* BERKELEY_MP */
-gcd (mpz_srcptr u, mpz_srcptr v, mpz_ptr g)
-#endif /* BERKELEY_MP */
{
unsigned long int g_zero_bits, u_zero_bits, v_zero_bits;
mp_size_t g_zero_limbs, u_zero_limbs, v_zero_limbs;
mp_ptr tp;
- mp_ptr up = u->_mp_d;
- mp_size_t usize = ABS (u->_mp_size);
- mp_ptr vp = v->_mp_d;
- mp_size_t vsize = ABS (v->_mp_size);
+ mp_ptr up;
+ mp_size_t usize;
+ mp_ptr vp;
+ mp_size_t vsize;
mp_size_t gsize;
TMP_DECL;
+ up = PTR(u);
+ usize = ABSIZ (u);
+ vp = PTR(v);
+ vsize = ABSIZ (v);
/* GCD(0, V) == V. */
if (usize == 0)
{
- g->_mp_size = vsize;
+ SIZ (g) = vsize;
if (g == v)
return;
- if (g->_mp_alloc < vsize)
- _mpz_realloc (g, vsize);
- MPN_COPY (g->_mp_d, vp, vsize);
+ MPZ_REALLOC (g, vsize);
+ MPN_COPY (PTR (g), vp, vsize);
return;
}
/* GCD(U, 0) == U. */
if (vsize == 0)
{
- g->_mp_size = usize;
+ SIZ (g) = usize;
if (g == u)
return;
- if (g->_mp_alloc < usize)
- _mpz_realloc (g, usize);
- MPN_COPY (g->_mp_d, up, usize);
+ MPZ_REALLOC (g, usize);
+ MPN_COPY (PTR (g), up, usize);
return;
}
if (usize == 1)
{
- g->_mp_size = 1;
- g->_mp_d[0] = mpn_gcd_1 (vp, vsize, up[0]);
+ SIZ (g) = 1;
+ PTR (g)[0] = mpn_gcd_1 (vp, vsize, up[0]);
return;
}
if (vsize == 1)
{
- g->_mp_size = 1;
- g->_mp_d[0] = mpn_gcd_1 (up, usize, vp[0]);
+ SIZ(g) = 1;
+ PTR (g)[0] = mpn_gcd_1 (up, usize, vp[0]);
return;
}
/* Eliminate low zero bits from U and V and move to temporary storage. */
while (*up == 0)
up++;
- u_zero_limbs = up - u->_mp_d;
+ u_zero_limbs = up - PTR(u);
usize -= u_zero_limbs;
count_trailing_zeros (u_zero_bits, *up);
tp = up;
while (*vp == 0)
vp++;
- v_zero_limbs = vp - v->_mp_d;
+ v_zero_limbs = vp - PTR (v);
vsize -= v_zero_limbs;
count_trailing_zeros (v_zero_bits, *vp);
tp = vp;
{
mp_limb_t cy_limb;
gsize += (vp[vsize - 1] >> (GMP_NUMB_BITS - g_zero_bits)) != 0;
- if (g->_mp_alloc < gsize)
- _mpz_realloc (g, gsize);
- MPN_ZERO (g->_mp_d, g_zero_limbs);
+ MPZ_REALLOC (g, gsize);
+ MPN_ZERO (PTR (g), g_zero_limbs);
- tp = g->_mp_d + g_zero_limbs;
+ tp = PTR(g) + g_zero_limbs;
cy_limb = mpn_lshift (tp, vp, vsize, g_zero_bits);
if (cy_limb != 0)
tp[vsize] = cy_limb;
}
else
{
- if (g->_mp_alloc < gsize)
- _mpz_realloc (g, gsize);
- MPN_ZERO (g->_mp_d, g_zero_limbs);
- MPN_COPY (g->_mp_d + g_zero_limbs, vp, vsize);
+ MPZ_REALLOC (g, gsize);
+ MPN_ZERO (PTR (g), g_zero_limbs);
+ MPN_COPY (PTR (g) + g_zero_limbs, vp, vsize);
}
- g->_mp_size = gsize;
+ SIZ (g) = gsize;
TMP_FREE;
}
/* mpz_gcdext(g, s, t, a, b) -- Set G to gcd(a, b), and S and T such that
g = as + bt.
-Copyright 1991, 1993, 1994, 1995, 1996, 1997, 2000, 2001, 2005 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1995, 1996, 1997, 2000, 2001, 2005, 2011,
+2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
void
mpz_gcdext (mpz_ptr g, mpz_ptr s, mpz_ptr t, mpz_srcptr a, mpz_srcptr b)
{
- mp_size_t asize, bsize, usize, vsize;
- mp_srcptr ap, bp;
- mp_ptr up, vp;
+ mp_size_t asize, bsize;
+ mp_ptr tmp_ap, tmp_bp;
mp_size_t gsize, ssize, tmp_ssize;
- mp_ptr gp, sp, tmp_gp, tmp_sp;
- mpz_srcptr u, v;
- mpz_ptr ss, tt;
- __mpz_struct stmp, gtmp;
+ mp_ptr gp, tmp_gp, tmp_sp;
TMP_DECL;
- TMP_MARK;
+ /* mpn_gcdext requires that Usize >= Vsize. Therefore, we often
+ have to swap U and V. The computed cofactor will be the
+ "smallest" one, which is faster to produce. The wanted one will
+ be computed here; this is needed anyway when both are requested. */
- /* mpn_gcdext requires that U >= V. Therefore, we often have to swap U and
- V. This in turn leads to a lot of complications. The computed cofactor
- will be the wrong one, so we have to fix that up at the end. */
+ asize = ABSIZ (a);
+ bsize = ABSIZ (b);
- asize = ABS (SIZ (a));
- bsize = ABS (SIZ (b));
- ap = PTR (a);
- bp = PTR (b);
- if (asize > bsize || (asize == bsize && mpn_cmp (ap, bp, asize) > 0))
+ if (asize < bsize)
{
- usize = asize;
- vsize = bsize;
- up = TMP_ALLOC_LIMBS (usize + 1);
- vp = TMP_ALLOC_LIMBS (vsize + 1);
- MPN_COPY (up, ap, usize);
- MPN_COPY (vp, bp, vsize);
- u = a;
- v = b;
- ss = s;
- tt = t;
+ MPZ_SRCPTR_SWAP (a, b);
+ MP_SIZE_T_SWAP (asize, bsize);
+ MPZ_PTR_SWAP (s, t);
}
- else
+
+ if (bsize == 0)
{
- usize = bsize;
- vsize = asize;
- up = TMP_ALLOC_LIMBS (usize + 1);
- vp = TMP_ALLOC_LIMBS (vsize + 1);
- MPN_COPY (up, bp, usize);
- MPN_COPY (vp, ap, vsize);
- u = b;
- v = a;
- ss = t;
- tt = s;
- }
+ /* g = |a|, s = sgn(a), t = 0. */
+ ssize = SIZ (a) >= 0 ? (asize != 0) : -1;
- tmp_gp = TMP_ALLOC_LIMBS (usize + 1);
- tmp_sp = TMP_ALLOC_LIMBS (usize + 1);
+ gp = MPZ_REALLOC (g, asize);
+ MPN_COPY (gp, PTR (a), asize);
+ SIZ (g) = asize;
- if (vsize == 0)
- {
- tmp_sp[0] = 1;
- tmp_ssize = 1;
- MPN_COPY (tmp_gp, up, usize);
- gsize = usize;
+ if (t != NULL)
+ SIZ (t) = 0;
+ if (s != NULL)
+ {
+ SIZ (s) = ssize;
+ PTR (s)[0] = 1;
+ }
+ return;
}
- else
- gsize = mpn_gcdext (tmp_gp, tmp_sp, &tmp_ssize, up, usize, vp, vsize);
- ssize = ABS (tmp_ssize);
- PTR (>mp) = tmp_gp;
- SIZ (>mp) = gsize;
+ TMP_MARK;
+
+ TMP_ALLOC_LIMBS_2 (tmp_ap, asize, tmp_bp, bsize);
+ MPN_COPY (tmp_ap, PTR (a), asize);
+ MPN_COPY (tmp_bp, PTR (b), bsize);
- PTR (&stmp) = tmp_sp;
- SIZ (&stmp) = (tmp_ssize ^ SIZ (u)) >= 0 ? ssize : -ssize;
+ TMP_ALLOC_LIMBS_2 (tmp_gp, bsize, tmp_sp, bsize + 1);
- if (tt != NULL)
+ gsize = mpn_gcdext (tmp_gp, tmp_sp, &tmp_ssize, tmp_ap, asize, tmp_bp, bsize);
+
+ ssize = ABS (tmp_ssize);
+ tmp_ssize = SIZ (a) >= 0 ? tmp_ssize : -tmp_ssize;
+
+ if (t != NULL)
{
- if (SIZ (v) == 0)
- SIZ (tt) = 0;
- else
- {
- mpz_t x;
- MPZ_TMP_INIT (x, ssize + usize + 1);
- mpz_mul (x, &stmp, u);
- mpz_sub (x, >mp, x);
- mpz_tdiv_q (tt, x, v);
- }
+ mpz_t x;
+ __mpz_struct gtmp, stmp;
+
+ PTR (>mp) = tmp_gp;
+ SIZ (>mp) = gsize;
+
+ PTR (&stmp) = tmp_sp;
+ SIZ (&stmp) = tmp_ssize;
+
+ MPZ_TMP_INIT (x, ssize + asize + 1);
+ mpz_mul (x, &stmp, a);
+ mpz_sub (x, >mp, x);
+ mpz_divexact (t, x, b);
}
- if (ss != NULL)
+ if (s != NULL)
{
- if (ALLOC (ss) < ssize)
- _mpz_realloc (ss, ssize);
- sp = PTR (ss);
+ mp_ptr sp;
+
+ sp = MPZ_REALLOC (s, ssize);
MPN_COPY (sp, tmp_sp, ssize);
- SIZ (ss) = SIZ (&stmp);
+ SIZ (s) = tmp_ssize;
}
- if (ALLOC (g) < gsize)
- _mpz_realloc (g, gsize);
- gp = PTR (g);
+ gp = MPZ_REALLOC (g, gsize);
MPN_COPY (gp, tmp_gp, gsize);
SIZ (g) = gsize;
/* double mpz_get_d_2exp (signed long int *exp, mpz_t src).
-Copyright 2001, 2003, 2004 Free Software Foundation, Inc.
+Copyright 2001, 2003, 2004, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
{
mp_size_t size, abs_size;
mp_srcptr ptr;
- int cnt;
long exp;
size = SIZ(src);
ptr = PTR(src);
abs_size = ABS(size);
- count_leading_zeros (cnt, ptr[abs_size - 1]);
- exp = abs_size * GMP_NUMB_BITS - (cnt - GMP_NAIL_BITS);
+ MPN_SIZEINBASE_2EXP(exp, ptr, abs_size, 1);
*exp2 = exp;
return mpn_get_d (ptr, abs_size, size, -exp);
}
/* mpz_get_si(integer) -- Return the least significant digit from INTEGER.
-Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2006 Free Software
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2006, 2012 Free Software
Foundation, Inc.
This file is part of the GNU MP Library.
signed long int
mpz_get_si (mpz_srcptr z) __GMP_NOTHROW
{
- mp_ptr zp = z->_mp_d;
- mp_size_t size = z->_mp_size;
+ mp_ptr zp = PTR (z);
+ mp_size_t size = SIZ (z);
mp_limb_t zl = zp[0];
#if GMP_NAIL_BITS != 0
result. If STRING is not NULL, the caller must ensure enough space is
available to store the result.
-Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2005 Free Software
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2005, 2012 Free Software
Foundation, Inc.
This file is part of the GNU MP Library.
mpz_get_str (char *res_str, int base, mpz_srcptr x)
{
mp_ptr xp;
- mp_size_t x_size = x->_mp_size;
- char *str;
+ mp_size_t x_size = SIZ (x);
char *return_str;
size_t str_size;
size_t alloc_size = 0;
- char *num_to_text;
+ const char *num_to_text;
int i;
TMP_DECL;
if (base >= 0)
{
num_to_text = "0123456789abcdefghijklmnopqrstuvwxyz";
- if (base == 0)
+ if (base <= 1)
base = 10;
else if (base > 36)
{
else
{
base = -base;
+ if (base <= 1)
+ base = 10;
+ else if (base > 36)
+ return NULL;
num_to_text = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
}
/* mpn_get_str clobbers its input on non power-of-2 bases */
TMP_MARK;
- xp = x->_mp_d;
+ xp = PTR (x);
if (! POW2_P (base))
{
- xp = TMP_ALLOC_LIMBS (x_size + 1); /* +1 in case x_size==0 */
- MPN_COPY (xp, x->_mp_d, x_size);
+ xp = TMP_ALLOC_LIMBS (x_size | 1); /* |1 in case x_size==0 */
+ MPN_COPY (xp, PTR (x), x_size);
}
str_size = mpn_get_str ((unsigned char *) res_str, base, xp, x_size);
ASSERT (alloc_size == 0 || str_size <= alloc_size - (SIZ(x) < 0));
- /* might have a leading zero, skip it */
- str = res_str;
- if (*res_str == 0 && str_size != 1)
- {
- str_size--;
- str++;
- ASSERT (*str != 0); /* at most one leading zero */
- }
-
- /* Convert result to printable chars, and move down if there was a leading
- zero. */
+ /* Convert result to printable chars. */
for (i = 0; i < str_size; i++)
- res_str[i] = num_to_text[(int) str[i]];
+ res_str[i] = num_to_text[(int) res_str[i]];
res_str[str_size] = 0;
TMP_FREE;
size_t actual_size = str_size + 1 + (res_str - return_str);
ASSERT (actual_size == strlen (return_str) + 1);
__GMP_REALLOCATE_FUNC_MAYBE_TYPE (return_str, alloc_size, actual_size,
- char);
+ char);
}
return return_str;
}
/* mpz_import -- set mpz from word data.
-Copyright 2002 Free Software Foundation, Inc.
+Copyright 2002, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
void
mpz_import (mpz_ptr z, size_t count, int order,
- size_t size, int endian, size_t nail, const void *data)
+ size_t size, int endian, size_t nail, const void *data)
{
mp_size_t zsize;
mp_ptr zp;
ASSERT (nail <= 8*size);
zsize = (count * (8*size - nail) + GMP_NUMB_BITS-1) / GMP_NUMB_BITS;
- MPZ_REALLOC (z, zsize);
- zp = PTR(z);
+ zp = MPZ_REALLOC (z, zsize);
if (endian == 0)
endian = HOST_ENDIAN;
unsigned align = ((char *) data - (char *) NULL) % sizeof (mp_limb_t);
if (order == -1
- && size == sizeof (mp_limb_t)
- && endian == HOST_ENDIAN
- && align == 0)
- {
- MPN_COPY (zp, (mp_srcptr) data, (mp_size_t) count);
- goto done;
- }
+ && size == sizeof (mp_limb_t)
+ && endian == HOST_ENDIAN
+ && align == 0)
+ {
+ MPN_COPY (zp, (mp_srcptr) data, (mp_size_t) count);
+ goto done;
+ }
if (order == -1
- && size == sizeof (mp_limb_t)
- && endian == - HOST_ENDIAN
- && align == 0)
- {
- MPN_BSWAP (zp, (mp_srcptr) data, (mp_size_t) count);
- goto done;
- }
+ && size == sizeof (mp_limb_t)
+ && endian == - HOST_ENDIAN
+ && align == 0)
+ {
+ MPN_BSWAP (zp, (mp_srcptr) data, (mp_size_t) count);
+ goto done;
+ }
if (order == 1
- && size == sizeof (mp_limb_t)
- && endian == HOST_ENDIAN
- && align == 0)
- {
- MPN_REVERSE (zp, (mp_srcptr) data, (mp_size_t) count);
- goto done;
- }
+ && size == sizeof (mp_limb_t)
+ && endian == HOST_ENDIAN
+ && align == 0)
+ {
+ MPN_REVERSE (zp, (mp_srcptr) data, (mp_size_t) count);
+ goto done;
+ }
}
{
lbits = 0;
for (i = 0; i < count; i++)
{
- for (j = 0; j < wbytes; j++)
- {
- byte = *dp;
- dp -= endian;
- ACCUMULATE (8);
- }
- if (wbits != 0)
- {
- byte = *dp & wbitsmask;
- dp -= endian;
- ACCUMULATE (wbits);
- }
- dp += woffset;
+ for (j = 0; j < wbytes; j++)
+ {
+ byte = *dp;
+ dp -= endian;
+ ACCUMULATE (8);
+ }
+ if (wbits != 0)
+ {
+ byte = *dp & wbitsmask;
+ dp -= endian;
+ ACCUMULATE (wbits);
+ }
+ dp += woffset;
}
if (lbits != 0)
{
- ASSERT (lbits <= GMP_NUMB_BITS);
- ASSERT_LIMB (limb);
- *zp++ = limb;
+ ASSERT (lbits <= GMP_NUMB_BITS);
+ ASSERT_LIMB (limb);
+ *zp++ = limb;
}
ASSERT (zp == PTR(z) + zsize);
/* low byte of word after most significant */
ASSERT (dp == (unsigned char *) data
- + (order < 0 ? count*size : - (mp_size_t) size)
- + (endian >= 0 ? (mp_size_t) size - 1 : 0));
+ + (order < 0 ? count*size : - (mp_size_t) size)
+ + (endian >= 0 ? (mp_size_t) size - 1 : 0));
}
/* mpz_init() -- Make a new multiple precision number with value 0.
-Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
void
mpz_init (mpz_ptr x)
{
- x->_mp_alloc = 1;
- x->_mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
- x->_mp_size = 0;
+ ALLOC (x) = 1;
+ PTR (x) = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
+ SIZ (x) = 0;
#ifdef __CHECKER__
/* let the low limb look initialized, for the benefit of mpz_get_ui etc */
- x->_mp_d[0] = 0;
+ PTR (x) = 0;
#endif
}
/* mpz_inp_raw -- read an mpz_t in raw format.
-Copyright 2001, 2002, 2005 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2005, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
if (abs_xsize != 0)
{
- MPZ_REALLOC (x, abs_xsize);
- xp = PTR(x);
+ xp = MPZ_REALLOC (x, abs_xsize);
/* Get limb boundaries right in the read, for the benefit of the
- non-nails case. */
+ non-nails case. */
xp[0] = 0;
cp = (char *) (xp + abs_xsize) - abs_csize;
if (fread (cp, abs_csize, 1, fp) != 1)
- return 0;
+ return 0;
if (GMP_NAIL_BITS == 0)
- {
- /* Reverse limbs to least significant first, and byte swap. If
- abs_xsize is odd then on the last iteration elimb and slimb are
- the same. It doesn't seem extra code to handle that case
- separately, to save an NTOH. */
- sp = xp;
- ep = xp + abs_xsize-1;
- for (i = 0; i < (abs_xsize+1)/2; i++)
- {
- NTOH_LIMB_FETCH (elimb, ep);
- NTOH_LIMB_FETCH (slimb, sp);
- *sp++ = elimb;
- *ep-- = slimb;
- }
- }
+ {
+ /* Reverse limbs to least significant first, and byte swap. If
+ abs_xsize is odd then on the last iteration elimb and slimb are
+ the same. It doesn't seem extra code to handle that case
+ separately, to save an NTOH. */
+ sp = xp;
+ ep = xp + abs_xsize-1;
+ for (i = 0; i < (abs_xsize+1)/2; i++)
+ {
+ NTOH_LIMB_FETCH (elimb, ep);
+ NTOH_LIMB_FETCH (slimb, sp);
+ *sp++ = elimb;
+ *ep-- = slimb;
+ }
+ }
else
- {
- /* It ought to be possible to do the transformation in-place, but
- for now it's easier to use an extra temporary area. */
- mp_limb_t byte, limb;
- int bits;
- mp_size_t tpos;
- mp_ptr tp;
- TMP_DECL;
-
- TMP_MARK;
- tp = TMP_ALLOC_LIMBS (abs_xsize);
- limb = 0;
- bits = 0;
- tpos = 0;
- for (i = abs_csize-1; i >= 0; i--)
- {
- byte = (unsigned char) cp[i];
- limb |= (byte << bits);
- bits += 8;
- if (bits >= GMP_NUMB_BITS)
- {
- ASSERT (tpos < abs_xsize);
- tp[tpos++] = limb & GMP_NUMB_MASK;
- bits -= GMP_NUMB_BITS;
- ASSERT (bits < 8);
- limb = byte >> (8 - bits);
- }
- }
- if (bits != 0)
- {
- ASSERT (tpos < abs_xsize);
- tp[tpos++] = limb;
- }
- ASSERT (tpos == abs_xsize);
-
- MPN_COPY (xp, tp, abs_xsize);
- TMP_FREE;
- }
+ {
+ /* It ought to be possible to do the transformation in-place, but
+ for now it's easier to use an extra temporary area. */
+ mp_limb_t byte, limb;
+ int bits;
+ mp_size_t tpos;
+ mp_ptr tp;
+ TMP_DECL;
+
+ TMP_MARK;
+ tp = TMP_ALLOC_LIMBS (abs_xsize);
+ limb = 0;
+ bits = 0;
+ tpos = 0;
+ for (i = abs_csize-1; i >= 0; i--)
+ {
+ byte = (unsigned char) cp[i];
+ limb |= (byte << bits);
+ bits += 8;
+ if (bits >= GMP_NUMB_BITS)
+ {
+ ASSERT (tpos < abs_xsize);
+ tp[tpos++] = limb & GMP_NUMB_MASK;
+ bits -= GMP_NUMB_BITS;
+ ASSERT (bits < 8);
+ limb = byte >> (8 - bits);
+ }
+ }
+ if (bits != 0)
+ {
+ ASSERT (tpos < abs_xsize);
+ tp[tpos++] = limb;
+ }
+ ASSERT (tpos == abs_xsize);
+
+ MPN_COPY (xp, tp, abs_xsize);
+ TMP_FREE;
+ }
/* GMP 1.x mpz_out_raw wrote high zero bytes, strip any high zero
- limbs resulting from this. Should be a non-zero value here, but
- for safety don't assume that. */
+ limbs resulting from this. Should be a non-zero value here, but
+ for safety don't assume that. */
MPN_NORMALIZE (xp, abs_xsize);
}
REST ARE INTERNALS AND ARE ALMOST CERTAIN TO BE SUBJECT TO INCOMPATIBLE
CHANGES OR DISAPPEAR COMPLETELY IN FUTURE GNU MP RELEASES.
-Copyright 1991, 1993, 1994, 1996, 1998, 2000, 2001, 2002, 2003 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 1998, 2000, 2001, 2002, 2003, 2011, 2012
+Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#include <ctype.h>
#include "gmp.h"
#include "gmp-impl.h"
+#include "longlong.h"
-extern const unsigned char __gmp_digit_value_tab[];
#define digit_value_tab __gmp_digit_value_tab
size_t
/* Make sure the string is not empty, mpn_set_str would fail. */
if (str_size == 0)
{
- x->_mp_size = 0;
+ SIZ (x) = 0;
}
else
{
- xsize = 2 + (mp_size_t)
- (str_size / (GMP_NUMB_BITS * mp_bases[base].chars_per_bit_exactly));
+ LIMBS_PER_DIGIT_IN_BASE (xsize, str_size, base);
MPZ_REALLOC (x, xsize);
/* Convert the byte array in base BASE to our bignum format. */
- xsize = mpn_set_str (x->_mp_d, (unsigned char *) str, str_size, base);
- x->_mp_size = negative ? -xsize : xsize;
+ xsize = mpn_set_str (PTR (x), (unsigned char *) str, str_size, base);
+ SIZ (x) = negative ? -xsize : xsize;
}
(*__gmp_free_func) (str, alloc_size);
return nread;
If X has an inverse, return non-zero and store inverse in INVERSE,
otherwise, return 0 and put garbage in INVERSE.
-Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2005 Free Software Foundation,
-Inc.
+Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2005, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
mp_size_t xsize, nsize, size;
TMP_DECL;
- xsize = SIZ (x);
- nsize = SIZ (n);
- xsize = ABS (xsize);
- nsize = ABS (nsize);
- size = MAX (xsize, nsize) + 1;
+ xsize = ABSIZ (x);
+ nsize = ABSIZ (n);
/* No inverse exists if the leftside operand is 0. Likewise, no
inverse exists if the mod operand is 1. */
if (xsize == 0 || (nsize == 1 && (PTR (n))[0] == 1))
return 0;
+ size = MAX (xsize, nsize) + 1;
TMP_MARK;
MPZ_TMP_INIT (gcd, size);
mpz_gcdext (gcd, tmp, (mpz_ptr) 0, x, n);
/* If no inverse existed, return with an indication of that. */
- if (SIZ (gcd) != 1 || PTR(gcd)[0] != 1)
+ if (!MPZ_EQUAL_1_P (gcd))
{
TMP_FREE;
return 0;
/* mpz_ior -- Logical inclusive or.
-Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2005 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2005, 2012, 2013 Free
+Software Foundation, Inc.
This file is part of the GNU MP Library.
if (res_ptr != op1_ptr)
MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,
op1_size - op2_size);
- for (i = op2_size - 1; i >= 0; i--)
- res_ptr[i] = op1_ptr[i] | op2_ptr[i];
+ if (LIKELY (op2_size != 0))
+ mpn_ior_n (res_ptr, op1_ptr, op2_ptr, op2_size);
res_size = op1_size;
}
else
if (res_ptr != op2_ptr)
MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
op2_size - op1_size);
- for (i = op1_size - 1; i >= 0; i--)
- res_ptr[i] = op1_ptr[i] | op2_ptr[i];
+ if (LIKELY (op1_size != 0))
+ mpn_ior_n (res_ptr, op1_ptr, op2_ptr, op1_size);
res_size = op2_size;
}
{
if (op2_size < 0)
{
- mp_ptr opx;
+ mp_ptr opx, opy;
mp_limb_t cy;
/* Both operands are negative, so will be the result.
/* Possible optimization: Decrease mpn_sub precision,
as we won't use the entire res of both. */
- opx = TMP_ALLOC_LIMBS (res_size);
+ TMP_ALLOC_LIMBS_2 (opx, res_size, opy, res_size);
mpn_sub_1 (opx, op1_ptr, res_size, (mp_limb_t) 1);
op1_ptr = opx;
- opx = TMP_ALLOC_LIMBS (res_size);
- mpn_sub_1 (opx, op2_ptr, res_size, (mp_limb_t) 1);
- op2_ptr = opx;
-
- if (ALLOC(res) < res_size)
- {
- _mpz_realloc (res, res_size);
- /* op1_ptr and op2_ptr point to temporary space. */
- res_ptr = PTR(res);
- }
+ mpn_sub_1 (opy, op2_ptr, res_size, (mp_limb_t) 1);
+ op2_ptr = opy;
/* First loop finds the size of the result. */
for (i = res_size - 1; i >= 0; i--)
if (res_size != 0)
{
+ res_ptr = MPZ_REALLOC (res, res_size + 1);
+
/* Second loop computes the real result. */
- for (i = res_size - 1; i >= 0; i--)
- res_ptr[i] = op1_ptr[i] & op2_ptr[i];
+ mpn_and_n (res_ptr, op1_ptr, op2_ptr, res_size);
cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1);
if (cy)
{
/* We should compute -OP1 | OP2. Swap OP1 and OP2 and fall
through to the code that handles OP1 | -OP2. */
- MPZ_SRCPTR_SWAP (op1, op2);
- MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size);
+ MPZ_SRCPTR_SWAP (op1, op2);
+ MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size);
}
}
if (res_size != 0)
{
/* Second loop computes the real result. */
- for (i = count - 1; i >= 0; i--)
- res_ptr[i] = ~op1_ptr[i] & op2_ptr[i];
+ if (LIKELY (count != 0))
+ mpn_andn_n (res_ptr, op2_ptr, op1_ptr, count);
cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1);
if (cy)
/* mpz_init_set (src_integer) -- Make a new multiple precision number with
a value copied from SRC_INTEGER.
-Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
mp_ptr wp, up;
mp_size_t usize, size;
- usize = u->_mp_size;
+ usize = SIZ (u);
size = ABS (usize);
- w->_mp_alloc = MAX (size, 1);
- w->_mp_d = (mp_ptr) (*__gmp_allocate_func) (w->_mp_alloc * BYTES_PER_MP_LIMB);
+ ALLOC (w) = MAX (size, 1);
+ PTR (w) = (mp_ptr) (*__gmp_allocate_func) (ALLOC (w) * BYTES_PER_MP_LIMB);
- wp = w->_mp_d;
- up = u->_mp_d;
+ wp = PTR (w);
+ up = PTR (u);
MPN_COPY (wp, up, size);
- w->_mp_size = usize;
+ SIZ (w) = usize;
#ifdef __CHECKER__
/* let the low limb look initialized, for the benefit of mpz_get_ui etc */
/* mpz_init_set_d(integer, val) -- Initialize and assign INTEGER with a double
value VAL.
-Copyright 1996, 2000, 2001 Free Software Foundation, Inc.
+Copyright 1996, 2000, 2001, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
void
mpz_init_set_d (mpz_ptr dest, double val)
{
- dest->_mp_alloc = 1;
- dest->_mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
- dest->_mp_size = 0;
+ ALLOC (dest) = 1;
+ PTR (dest) = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
+ SIZ (dest) = 0;
mpz_set_d (dest, val);
}
/* mpz_init_set_si(dest,val) -- Make a new multiple precision in DEST and
assign VAL to the new number.
-Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
mp_size_t size;
mp_limb_t vl;
- dest->_mp_alloc = 1;
- dest->_mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
+ ALLOC (dest) = 1;
+ PTR (dest) = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
vl = (mp_limb_t) ABS_CAST (unsigned long int, val);
- dest->_mp_d[0] = vl & GMP_NUMB_MASK;
+ PTR (dest)[0] = vl & GMP_NUMB_MASK;
size = vl != 0;
#if GMP_NAIL_BITS != 0
if (vl > GMP_NUMB_MAX)
{
MPZ_REALLOC (dest, 2);
- dest->_mp_d[1] = vl >> GMP_NUMB_BITS;
+ PTR (dest)[1] = vl >> GMP_NUMB_BITS;
size = 2;
}
#endif
- dest->_mp_size = val >= 0 ? size : -size;
+ SIZ (dest) = val >= 0 ? size : -size;
}
i.e. 0xhh...h means base 16, 0oo...o means base 8, otherwise
assume base 10.
-Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
int
mpz_init_set_str (mpz_ptr x, const char *str, int base)
{
- x->_mp_alloc = 1;
- x->_mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
+ ALLOC (x) = 1;
+ PTR (x) = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
/* if str has no digits mpz_set_str leaves x->_mp_size unset */
- x->_mp_size = 0;
+ SIZ (x) = 0;
#ifdef __CHECKER__
/* let the low limb look initialized, for the benefit of mpz_get_ui etc */
- x->_mp_d[0] = 0;
+ PTR (x)[0] = 0;
#endif
return mpz_set_str (x, str, base);
/* mpz_init_set_ui(dest,val) -- Make a new multiple precision in DEST and
assign VAL to the new number.
-Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2004 Free Software
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2004, 2012 Free Software
Foundation, Inc.
This file is part of the GNU MP Library.
{
mp_size_t size;
- dest->_mp_alloc = 1;
- dest->_mp_d = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
-
- dest->_mp_d[0] = val & GMP_NUMB_MASK;
- size = val != 0;
-
#if BITS_PER_ULONG > GMP_NUMB_BITS /* avoid warnings about shift amount */
if (val > GMP_NUMB_MAX)
{
- MPZ_REALLOC (dest, 2);
- dest->_mp_d[1] = val >> GMP_NUMB_BITS;
+ ALLOC (dest) = 2;
+ PTR (dest) = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB*2);
+ PTR (dest)[1] = val >> GMP_NUMB_BITS;
size = 2;
}
+ else
#endif
+ {
+ ALLOC (dest) = 1;
+ PTR (dest) = (mp_ptr) (*__gmp_allocate_func) (BYTES_PER_MP_LIMB);
+
+ size = val != 0;
+ }
+ PTR (dest)[0] = val & GMP_NUMB_MASK;
- dest->_mp_size = size;
+ SIZ (dest) = size;
}
/* mpz_jacobi, mpz_legendre, mpz_kronecker -- mpz/mpz Jacobi symbols.
-Copyright 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2005, 2010, 2011, 2012 Free Software Foundation,
+Inc.
This file is part of the GNU MP Library.
#include "longlong.h"
-/* Change this to "#define TRACE(x) x" for some traces. */
-#define TRACE(x)
-
-
-#define MPN_RSHIFT_OR_COPY(dst,src,size,shift) \
- do { \
- if ((shift) != 0) \
- { \
- ASSERT_NOCARRY (mpn_rshift (dst, src, size, shift)); \
- (size) -= ((dst)[(size)-1] == 0); \
- } \
- else \
- MPN_COPY (dst, src, size); \
- } while (0)
-
-
-/* This code does triple duty as mpz_jacobi, mpz_legendre and mpz_kronecker.
+/* This code does triple duty as mpz_jacobi, mpz_legendre and
+ mpz_kronecker. For ABI compatibility, the link symbol is
+ __gmpz_jacobi, not __gmpz_kronecker, even though the latter would
+ be more logical.
mpz_jacobi could assume b is odd, but the improvements from that seem
small compared to other operations, and anything significant should be
multiple of b), but the checking for that takes little time compared to
other operations.
- The main loop is just a simple binary GCD with the jacobi symbol result
- tracked during the reduction.
-
- The special cases for a or b fitting in one limb let mod_1 or modexact_1
- get used, without any copying, and end up just as efficient as the mixed
- precision mpz_kronecker_ui etc.
-
- When tdiv_qr is called it's not necessary to make "a" odd or make a
- working copy of it, but tdiv_qr is going to be pretty slow so it's not
- worth bothering trying to save anything for that case.
-
Enhancements:
mpn_bdiv_qr should be used instead of mpn_tdiv_qr.
- Some sort of multi-step algorithm should be used. The current subtract
- and shift for every bit is very inefficient. Lehmer (per current gcdext)
- would need some low bits included in its calculation to apply the sign
- change for reciprocity. Binary Lehmer keeps low bits to strip twos
- anyway, so might be better suited. Maybe the accelerated GCD style k-ary
- reduction would work, if sign changes due to the extra factors it
- introduces can be accounted for (or maybe they can be ignored). */
-
+*/
int
mpz_jacobi (mpz_srcptr a, mpz_srcptr b)
{
mp_srcptr asrcp, bsrcp;
mp_size_t asize, bsize;
+ mp_limb_t alow, blow;
mp_ptr ap, bp;
- mp_limb_t alow, blow, ahigh, bhigh, asecond, bsecond;
- unsigned atwos, btwos;
+ unsigned btwos;
int result_bit1;
+ int res;
TMP_DECL;
- TRACE (printf ("start asize=%d bsize=%d\n", SIZ(a), SIZ(b));
- mpz_trace (" a", a);
- mpz_trace (" b", b));
-
asize = SIZ(a);
asrcp = PTR(a);
alow = asrcp[0];
bsize = SIZ(b);
- if (bsize == 0)
- return JACOBI_LS0 (alow, asize); /* (a/0) */
-
bsrcp = PTR(b);
blow = bsrcp[0];
+ /* The MPN jacobi functions require positive a and b, and b odd. So
+ we must to handle the cases of a or b zero, then signs, and then
+ the case of even b.
+ */
+
+ if (bsize == 0)
+ /* (a/0) = [ a = 1 or a = -1 ] */
+ return JACOBI_LS0 (alow, asize);
+
if (asize == 0)
- return JACOBI_0LS (blow, bsize); /* (0/b) */
+ /* (0/b) = [ b = 1 or b = - 1 ] */
+ return JACOBI_0LS (blow, bsize);
- /* (even/even)=0 */
- if (((alow | blow) & 1) == 0)
+ if ( (((alow | blow) & 1) == 0))
+ /* Common factor of 2 ==> (a/b) = 0 */
return 0;
- /* account for effect of sign of b, then ignore it */
- result_bit1 = JACOBI_BSGN_SS_BIT1 (asize, bsize);
- bsize = ABS (bsize);
+ if (bsize < 0)
+ {
+ /* (a/-1) = -1 if a < 0, +1 if a >= 0 */
+ result_bit1 = (asize < 0) << 1;
+ bsize = -bsize;
+ }
+ else
+ result_bit1 = 0;
- /* low zero limbs on b can be discarded */
JACOBI_STRIP_LOW_ZEROS (result_bit1, alow, bsrcp, bsize, blow);
count_trailing_zeros (btwos, blow);
- TRACE (printf ("b twos %u\n", btwos));
-
- /* establish shifted blow */
blow >>= btwos;
- if (bsize > 1)
+
+ if (bsize > 1 && btwos > 0)
{
- bsecond = bsrcp[1];
- if (btwos != 0)
- blow |= (bsecond << (GMP_NUMB_BITS - btwos)) & GMP_NUMB_MASK;
+ mp_limb_t b1 = bsrcp[1];
+ blow |= b1 << (GMP_NUMB_BITS - btwos);
+ if (bsize == 2 && (b1 >> btwos) == 0)
+ bsize = 1;
}
- /* account for effect of sign of a, then ignore it */
- result_bit1 ^= JACOBI_ASGN_SU_BIT1 (asize, blow);
- asize = ABS (asize);
-
- if (bsize == 1 || (bsize == 2 && (bsecond >> btwos) == 0))
+ if (asize < 0)
{
- /* special case one limb b, use modexact and no copying */
-
- /* (a/2)=(2/a) with a odd, and if b is even then a is odd here */
- result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow);
-
- if (blow == 1) /* (a/1)=1 always */
- return JACOBI_BIT1_TO_PN (result_bit1);
-
- JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, alow, asrcp, asize, blow);
- TRACE (printf ("base (%lu/%lu) with %d\n",
- alow, blow, JACOBI_BIT1_TO_PN (result_bit1)));
- return mpn_jacobi_base (alow, blow, result_bit1);
+ /* (-1/b) = -1 iff b = 3 (mod 4) */
+ result_bit1 ^= JACOBI_N1B_BIT1(blow);
+ asize = -asize;
}
- /* Discard low zero limbs of a. Usually there won't be anything to
- strip, hence not bothering with it for the bsize==1 case. */
JACOBI_STRIP_LOW_ZEROS (result_bit1, blow, asrcp, asize, alow);
- count_trailing_zeros (atwos, alow);
- TRACE (printf ("a twos %u\n", atwos));
- result_bit1 ^= JACOBI_TWOS_U_BIT1 (atwos, blow);
+ /* Ensure asize >= bsize. Take advantage of the generalized
+ reciprocity law (a/b*2^n) = (b*2^n / a) * RECIP(a,b) */
- /* establish shifted alow */
- alow >>= atwos;
- if (asize > 1)
- {
- asecond = asrcp[1];
- if (atwos != 0)
- alow |= (asecond << (GMP_NUMB_BITS - atwos)) & GMP_NUMB_MASK;
- }
-
- /* (a/2)=(2/a) with a odd */
- result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow);
-
- if (asize == 1 || (asize == 2 && (asecond >> atwos) == 0))
+ if (asize < bsize)
{
- /* another special case with modexact and no copying */
-
- if (alow == 1) /* (1/b)=1 always */
- return JACOBI_BIT1_TO_PN (result_bit1);
+ MPN_SRCPTR_SWAP (asrcp, asize, bsrcp, bsize);
+ MP_LIMB_T_SWAP (alow, blow);
- /* b still has its twos, so cancel out their effect */
- result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow);
+ /* NOTE: The value of alow (old blow) is a bit subtle. For this code
+ path, we get alow as the low, always odd, limb of shifted A. Which is
+ what we need for the reciprocity update below.
- result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow); /* now (b/a) */
- JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, blow, bsrcp, bsize, alow);
- TRACE (printf ("base (%lu/%lu) with %d\n",
- blow, alow, JACOBI_BIT1_TO_PN (result_bit1)));
- return mpn_jacobi_base (blow, alow, result_bit1);
- }
+ However, all other uses of alow assumes that it is *not*
+ shifted. Luckily, alow matters only when either
+ + btwos > 0, in which case A is always odd
- TMP_MARK;
- TMP_ALLOC_LIMBS_2 (ap, asize, bp, bsize);
+ + asize == bsize == 1, in which case this code path is never
+ taken. */
- MPN_RSHIFT_OR_COPY (ap, asrcp, asize, atwos);
- ASSERT (alow == ap[0]);
- TRACE (mpn_trace ("stripped a", ap, asize));
+ count_trailing_zeros (btwos, blow);
+ blow >>= btwos;
- MPN_RSHIFT_OR_COPY (bp, bsrcp, bsize, btwos);
- ASSERT (blow == bp[0]);
- TRACE (mpn_trace ("stripped b", bp, bsize));
+ if (bsize > 1 && btwos > 0)
+ {
+ mp_limb_t b1 = bsrcp[1];
+ blow |= b1 << (GMP_NUMB_BITS - btwos);
+ if (bsize == 2 && (b1 >> btwos) == 0)
+ bsize = 1;
+ }
- /* swap if necessary to make a longer than b */
- if (asize < bsize)
- {
- TRACE (printf ("swap\n"));
- MPN_PTR_SWAP (ap,asize, bp,bsize);
- MP_LIMB_T_SWAP (alow, blow);
result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
}
- /* If a is bigger than b then reduce to a mod b.
- Division is much faster than chipping away at "a" bit-by-bit. */
- if (asize > bsize)
+ if (bsize == 1)
{
- mp_ptr rp, qp;
+ result_bit1 ^= JACOBI_TWOS_U_BIT1(btwos, alow);
- TRACE (printf ("tdiv_qr asize=%ld bsize=%ld\n", asize, bsize));
+ if (blow == 1)
+ return JACOBI_BIT1_TO_PN (result_bit1);
- TMP_ALLOC_LIMBS_2 (rp, bsize, qp, asize-bsize+1);
- mpn_tdiv_qr (qp, rp, (mp_size_t) 0, ap, asize, bp, bsize);
- ap = rp;
- asize = bsize;
- MPN_NORMALIZE (ap, asize);
+ if (asize > 1)
+ JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, alow, asrcp, asize, blow);
- TRACE (printf ("tdiv_qr asize=%ld bsize=%ld\n", asize, bsize);
- mpn_trace (" a", ap, asize);
- mpn_trace (" b", bp, bsize));
+ return mpn_jacobi_base (alow, blow, result_bit1);
+ }
- if (asize == 0) /* (0/b)=0 for b!=1 */
- goto zero;
+ /* Allocation strategy: For A, we allocate a working copy only for A % B, but
+ when A is much larger than B, we have to allocate space for the large
+ quotient. We use the same area, pointed to by bp, for both the quotient
+ A/B and the working copy of B. */
- alow = ap[0];
- goto strip_a;
- }
+ TMP_MARK;
- for (;;)
- {
- ASSERT (asize >= 1); /* a,b non-empty */
- ASSERT (bsize >= 1);
- ASSERT (ap[asize-1] != 0); /* a,b normalized (and hence non-zero) */
- ASSERT (bp[bsize-1] != 0);
- ASSERT (alow == ap[0]); /* low limb copies should be correct */
- ASSERT (blow == bp[0]);
- ASSERT (alow & 1); /* a,b odd */
- ASSERT (blow & 1);
-
- TRACE (printf ("top asize=%ld bsize=%ld\n", asize, bsize);
- mpn_trace (" a", ap, asize);
- mpn_trace (" b", bp, bsize));
-
- /* swap if necessary to make a>=b, applying reciprocity
- high limbs are almost always enough to tell which is bigger */
- if (asize < bsize
- || (asize == bsize
- && ((ahigh=ap[asize-1]) < (bhigh=bp[asize-1])
- || (ahigh == bhigh
- && mpn_cmp (ap, bp, asize-1) < 0))))
- {
- TRACE (printf ("swap\n"));
- MPN_PTR_SWAP (ap,asize, bp,bsize);
- MP_LIMB_T_SWAP (alow, blow);
- result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
- }
-
- if (asize == 1)
- break;
-
- /* a = a-b */
- ASSERT (asize >= bsize);
- ASSERT_NOCARRY (mpn_sub (ap, ap, asize, bp, bsize));
- MPN_NORMALIZE (ap, asize);
- alow = ap[0];
-
- /* (0/b)=0 for b!=1. b!=1 when a==0 because otherwise would have had
- a==1 which is asize==1 and would have exited above. */
- if (asize == 0)
- goto zero;
-
- strip_a:
- /* low zero limbs on a can be discarded */
- JACOBI_STRIP_LOW_ZEROS (result_bit1, blow, ap, asize, alow);
-
- if ((alow & 1) == 0)
- {
- /* factors of 2 from a */
- unsigned twos;
- count_trailing_zeros (twos, alow);
- TRACE (printf ("twos %u\n", twos));
- result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, blow);
- ASSERT_NOCARRY (mpn_rshift (ap, ap, asize, twos));
- asize -= (ap[asize-1] == 0);
- alow = ap[0];
- }
- }
+ if (asize >= 2*bsize)
+ TMP_ALLOC_LIMBS_2 (ap, bsize, bp, asize - bsize + 1);
+ else
+ TMP_ALLOC_LIMBS_2 (ap, bsize, bp, bsize);
- ASSERT (asize == 1 && bsize == 1); /* just alow and blow left */
- TMP_FREE;
+ /* In the case of even B, we conceptually shift out the powers of two first,
+ and then divide A mod B. Hence, when taking those powers of two into
+ account, we must use alow *before* the division. Doing the actual division
+ first is ok, because the point is to remove multiples of B from A, and
+ multiples of 2^k B are good enough. */
+ if (asize > bsize)
+ mpn_tdiv_qr (bp, ap, 0, asrcp, asize, bsrcp, bsize);
+ else
+ MPN_COPY (ap, asrcp, bsize);
+
+ if (btwos > 0)
+ {
+ result_bit1 ^= JACOBI_TWOS_U_BIT1(btwos, alow);
- /* (1/b)=1 always (in this case have b==1 because a>=b) */
- if (alow == 1)
- return JACOBI_BIT1_TO_PN (result_bit1);
+ ASSERT_NOCARRY (mpn_rshift (bp, bsrcp, bsize, btwos));
+ bsize -= (ap[bsize-1] | bp[bsize-1]) == 0;
+ }
+ else
+ MPN_COPY (bp, bsrcp, bsize);
- /* swap with reciprocity and do (b/a) */
- result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
- TRACE (printf ("base (%lu/%lu) with %d\n",
- blow, alow, JACOBI_BIT1_TO_PN (result_bit1)));
- return mpn_jacobi_base (blow, alow, result_bit1);
+ ASSERT (blow == bp[0]);
+ res = mpn_jacobi_n (ap, bp, bsize,
+ mpn_jacobi_init (ap[0], blow, (result_bit1>>1) & 1));
- zero:
TMP_FREE;
- return 0;
+ return res;
}
a_limb = (unsigned long) ABS(a);
if ((a_limb & 1) == 0)
- {
- /* (0/b)=1 for b=+/-1, 0 otherwise */
- if (a_limb == 0)
- return (b_abs_size == 1 && b_low == 1);
-
- /* a even, b odd */
- count_trailing_zeros (twos, a_limb);
- a_limb >>= twos;
- /* (a*2^n/b) = (a/b) * twos(n,a) */
- result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, b_low);
- }
+ {
+ /* (0/b)=1 for b=+/-1, 0 otherwise */
+ if (a_limb == 0)
+ return (b_abs_size == 1 && b_low == 1);
+
+ /* a even, b odd */
+ count_trailing_zeros (twos, a_limb);
+ a_limb >>= twos;
+ /* (a*2^n/b) = (a/b) * twos(n,a) */
+ result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, b_low);
+ }
}
else
{
/* (even/even)=0, and (0/b)=0 for b!=+/-1 */
if ((a & 1) == 0)
- return 0;
+ return 0;
/* a odd, b even
- Establish shifted b_low with valid bit1 for ASGN and RECIP below.
- Zero limbs stripped are accounted for, but zero bits on b_low are
- not because they remain in {b_ptr,b_abs_size} for the
- JACOBI_MOD_OR_MODEXACT_1_ODD. */
+ Establish shifted b_low with valid bit1 for ASGN and RECIP below.
+ Zero limbs stripped are accounted for, but zero bits on b_low are
+ not because they remain in {b_ptr,b_abs_size} for the
+ JACOBI_MOD_OR_MODEXACT_1_ODD. */
JACOBI_STRIP_LOW_ZEROS (result_bit1, a, b_ptr, b_abs_size, b_low);
if ((b_low & 1) == 0)
- {
- if (UNLIKELY (b_low == GMP_NUMB_HIGHBIT))
- {
- /* need b_ptr[1] to get bit1 in b_low */
- if (b_abs_size == 1)
- {
- /* (a/0x80000000) = (a/2)^(BPML-1) */
- if ((GMP_NUMB_BITS % 2) == 0)
- result_bit1 ^= JACOBI_TWO_U_BIT1 (a);
- return JACOBI_BIT1_TO_PN (result_bit1);
- }
-
- /* b_abs_size > 1 */
- b_low = b_ptr[1] << 1;
- }
- else
- {
- count_trailing_zeros (twos, b_low);
- b_low >>= twos;
- }
- }
+ {
+ if (UNLIKELY (b_low == GMP_NUMB_HIGHBIT))
+ {
+ /* need b_ptr[1] to get bit1 in b_low */
+ if (b_abs_size == 1)
+ {
+ /* (a/0x80000000) = (a/2)^(BPML-1) */
+ if ((GMP_NUMB_BITS % 2) == 0)
+ result_bit1 ^= JACOBI_TWO_U_BIT1 (a);
+ return JACOBI_BIT1_TO_PN (result_bit1);
+ }
+
+ /* b_abs_size > 1 */
+ b_low = b_ptr[1] << 1;
+ }
+ else
+ {
+ count_trailing_zeros (twos, b_low);
+ b_low >>= twos;
+ }
+ }
result_bit1 ^= JACOBI_ASGN_SU_BIT1 (a, b_low);
a_limb = (unsigned long) ABS(a);
{
/* (0/b)=0 for b!=+/-1; and (even/even)=0 */
if (! (a & 1))
- return 0;
+ return 0;
/* a odd, b even
- Establish shifted b_low with valid bit1 for the RECIP below. Zero
- limbs stripped are accounted for, but zero bits on b_low are not
- because they remain in {b_ptr,b_abs_size} for
- JACOBI_MOD_OR_MODEXACT_1_ODD. */
+ Establish shifted b_low with valid bit1 for the RECIP below. Zero
+ limbs stripped are accounted for, but zero bits on b_low are not
+ because they remain in {b_ptr,b_abs_size} for
+ JACOBI_MOD_OR_MODEXACT_1_ODD. */
JACOBI_STRIP_LOW_ZEROS (result_bit1, a, b_ptr, b_abs_size, b_low);
if (! (b_low & 1))
- {
- if (UNLIKELY (b_low == GMP_NUMB_HIGHBIT))
- {
- /* need b_ptr[1] to get bit1 in b_low */
- if (b_abs_size == 1)
- {
- /* (a/0x80...00) == (a/2)^(NUMB-1) */
- if ((GMP_NUMB_BITS % 2) == 0)
- {
- /* JACOBI_STRIP_LOW_ZEROS does nothing to result_bit1
- when GMP_NUMB_BITS is even, so it's still 0. */
- ASSERT (result_bit1 == 0);
- result_bit1 = JACOBI_TWO_U_BIT1 (a);
- }
- return JACOBI_BIT1_TO_PN (result_bit1);
- }
-
- /* b_abs_size > 1 */
- b_low = b_ptr[1] << 1;
- }
- else
- {
- count_trailing_zeros (twos, b_low);
- b_low >>= twos;
- }
- }
+ {
+ if (UNLIKELY (b_low == GMP_NUMB_HIGHBIT))
+ {
+ /* need b_ptr[1] to get bit1 in b_low */
+ if (b_abs_size == 1)
+ {
+ /* (a/0x80...00) == (a/2)^(NUMB-1) */
+ if ((GMP_NUMB_BITS % 2) == 0)
+ {
+ /* JACOBI_STRIP_LOW_ZEROS does nothing to result_bit1
+ when GMP_NUMB_BITS is even, so it's still 0. */
+ ASSERT (result_bit1 == 0);
+ result_bit1 = JACOBI_TWO_U_BIT1 (a);
+ }
+ return JACOBI_BIT1_TO_PN (result_bit1);
+ }
+
+ /* b_abs_size > 1 */
+ b_low = b_ptr[1] << 1;
+ }
+ else
+ {
+ count_trailing_zeros (twos, b_low);
+ b_low >>= twos;
+ }
+ }
}
else
{
if (a == 0) /* (0/b)=1 for b=+/-1, 0 otherwise */
- return (b_abs_size == 1 && b_low == 1);
+ return (b_abs_size == 1 && b_low == 1);
if (! (a & 1))
- {
- /* a even, b odd */
- count_trailing_zeros (twos, a);
- a >>= twos;
- /* (a*2^n/b) = (a/b) * (2/a)^n */
- result_bit1 = JACOBI_TWOS_U_BIT1 (twos, b_low);
- }
+ {
+ /* a even, b odd */
+ count_trailing_zeros (twos, a);
+ a >>= twos;
+ /* (a*2^n/b) = (a/b) * (2/a)^n */
+ result_bit1 = JACOBI_TWOS_U_BIT1 (twos, b_low);
+ }
}
if (a == 1)
#endif
result_bit1 = JACOBI_BSGN_SS_BIT1 (a_size, b);
- b_limb = (unsigned long) ABS (b);
+ b_limb = ABS_CAST (unsigned long, b);
a_ptr = PTR(a);
if ((b_limb & 1) == 0)
int twos;
if (b_limb == 0)
- return JACOBI_LS0 (a_low, a_size); /* (a/0) */
+ return JACOBI_LS0 (a_low, a_size); /* (a/0) */
if (! (a_low & 1))
- return 0; /* (even/even)=0 */
+ return 0; /* (even/even)=0 */
/* (a/2)=(2/a) for a odd */
count_trailing_zeros (twos, b_limb);
int twos;
if (b == 0)
- return JACOBI_LS0 (a_low, a_size); /* (a/0) */
+ return JACOBI_LS0 (a_low, a_size); /* (a/0) */
if (! (a_low & 1))
- return 0; /* (even/even)=0 */
+ return 0; /* (even/even)=0 */
/* (a/2)=(2/a) for a odd */
count_trailing_zeros (twos, b);
b >>= twos;
result_bit1 = (JACOBI_TWOS_U_BIT1 (twos, a_low)
- ^ JACOBI_ASGN_SU_BIT1 (a_size, b));
+ ^ JACOBI_ASGN_SU_BIT1 (a_size, b));
}
if (b == 1)
/* mpz_lcm -- mpz/mpz least common multiple.
-Copyright 1996, 2000, 2001, 2005 Free Software Foundation, Inc.
+Copyright 1996, 2000, 2001, 2005, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#include "gmp.h"
#include "gmp-impl.h"
-#include "longlong.h"
-
void
mpz_lcm (mpz_ptr r, mpz_srcptr u, mpz_srcptr v)
{
mpz_t g;
- mp_size_t usize, vsize, size;
+ mp_size_t usize, vsize;
TMP_DECL;
usize = SIZ (u);
usize = ABS (usize);
vsize = ABS (vsize);
- if (vsize == 1)
+ if (vsize == 1 || usize == 1)
{
mp_limb_t vl, gl, c;
mp_srcptr up;
mp_ptr rp;
- one:
+ if (usize == 1)
+ {
+ usize = vsize;
+ MPZ_SRCPTR_SWAP (u, v);
+ }
+
MPZ_REALLOC (r, usize+1);
up = PTR(u);
return;
}
- if (usize == 1)
- {
- usize = vsize;
- MPZ_SRCPTR_SWAP (u, v);
- goto one;
- }
-
TMP_MARK;
- size = MAX (usize, vsize);
- MPZ_TMP_INIT (g, size);
+ MPZ_TMP_INIT (g, usize); /* v != 0 implies |gcd(u,v)| <= |u| */
mpz_gcd (g, u, v);
mpz_divexact (g, u, g);
/* mpz_lucnum2_ui -- calculate Lucas numbers.
-Copyright 2001, 2003, 2005 Free Software Foundation, Inc.
+Copyright 2001, 2003, 2005, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
size = MPN_FIB2_SIZE (n);
f1p = TMP_ALLOC_LIMBS (size);
- MPZ_REALLOC (ln, size+1);
- MPZ_REALLOC (lnsub1, size+1);
- lp = PTR(ln);
- l1p = PTR(lnsub1);
+ lp = MPZ_REALLOC (ln, size+1);
+ l1p = MPZ_REALLOC (lnsub1, size+1);
size = mpn_fib2_ui (l1p, f1p, n);
/* mpz_lucnum_ui -- calculate Lucas number.
-Copyright 2001, 2003, 2005 Free Software Foundation, Inc.
+Copyright 2001, 2003, 2005, 2011, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
since square or mul used below might need an extra limb over the true
size */
lalloc = MPN_FIB2_SIZE (n) + 2;
- MPZ_REALLOC (ln, lalloc);
- lp = PTR (ln);
+ lp = MPZ_REALLOC (ln, lalloc);
TMP_MARK;
xalloc = lalloc;
for (;;)
{
if (n & 1)
- {
- /* L[2k+1] = 5*F[k-1]*(2*F[k]+F[k-1]) - 4*(-1)^k */
+ {
+ /* L[2k+1] = 5*F[k-1]*(2*F[k]+F[k-1]) - 4*(-1)^k */
- mp_size_t yalloc, ysize;
- mp_ptr yp;
+ mp_size_t yalloc, ysize;
+ mp_ptr yp;
- TRACE (printf (" initial odd n=%lu\n", n));
+ TRACE (printf (" initial odd n=%lu\n", n));
- yalloc = MPN_FIB2_SIZE (n/2);
- yp = TMP_ALLOC_LIMBS (yalloc);
- ASSERT (xalloc >= yalloc);
+ yalloc = MPN_FIB2_SIZE (n/2);
+ yp = TMP_ALLOC_LIMBS (yalloc);
+ ASSERT (xalloc >= yalloc);
- xsize = mpn_fib2_ui (xp, yp, n/2);
+ xsize = mpn_fib2_ui (xp, yp, n/2);
- /* possible high zero on F[k-1] */
- ysize = xsize;
- ysize -= (yp[ysize-1] == 0);
- ASSERT (yp[ysize-1] != 0);
+ /* possible high zero on F[k-1] */
+ ysize = xsize;
+ ysize -= (yp[ysize-1] == 0);
+ ASSERT (yp[ysize-1] != 0);
- /* xp = 2*F[k] + F[k-1] */
+ /* xp = 2*F[k] + F[k-1] */
#if HAVE_NATIVE_mpn_addlsh1_n
- c = mpn_addlsh1_n (xp, yp, xp, xsize);
+ c = mpn_addlsh1_n (xp, yp, xp, xsize);
#else
- c = mpn_lshift (xp, xp, xsize, 1);
- c += mpn_add_n (xp, xp, yp, xsize);
+ c = mpn_lshift (xp, xp, xsize, 1);
+ c += mpn_add_n (xp, xp, yp, xsize);
#endif
- ASSERT (xalloc >= xsize+1);
- xp[xsize] = c;
- xsize += (c != 0);
- ASSERT (xp[xsize-1] != 0);
-
- ASSERT (lalloc >= xsize + ysize);
- c = mpn_mul (lp, xp, xsize, yp, ysize);
- lsize = xsize + ysize;
- lsize -= (c == 0);
-
- /* lp = 5*lp */
-#if HAVE_NATIVE_mpn_addlshift
- c = mpn_addlshift (lp, lp, lsize, 2);
+ ASSERT (xalloc >= xsize+1);
+ xp[xsize] = c;
+ xsize += (c != 0);
+ ASSERT (xp[xsize-1] != 0);
+
+ ASSERT (lalloc >= xsize + ysize);
+ c = mpn_mul (lp, xp, xsize, yp, ysize);
+ lsize = xsize + ysize;
+ lsize -= (c == 0);
+
+ /* lp = 5*lp */
+#if HAVE_NATIVE_mpn_addlsh2_n
+ c = mpn_addlsh2_n (lp, lp, lp, lsize);
#else
- c = mpn_lshift (xp, lp, lsize, 2);
- c += mpn_add_n (lp, lp, xp, lsize);
+ /* FIXME: Is this faster than mpn_mul_1 ? */
+ c = mpn_lshift (xp, lp, lsize, 2);
+ c += mpn_add_n (lp, lp, xp, lsize);
#endif
- ASSERT (lalloc >= lsize+1);
- lp[lsize] = c;
- lsize += (c != 0);
-
- /* lp = lp - 4*(-1)^k */
- if (n & 2)
- {
- /* no overflow, see comments above */
- ASSERT (lp[0] <= MP_LIMB_T_MAX-4);
- lp[0] += 4;
- }
- else
- {
- /* won't go negative */
- MPN_DECR_U (lp, lsize, CNST_LIMB(4));
- }
-
- TRACE (mpn_trace (" l",lp, lsize));
- break;
- }
+ ASSERT (lalloc >= lsize+1);
+ lp[lsize] = c;
+ lsize += (c != 0);
+
+ /* lp = lp - 4*(-1)^k */
+ if (n & 2)
+ {
+ /* no overflow, see comments above */
+ ASSERT (lp[0] <= MP_LIMB_T_MAX-4);
+ lp[0] += 4;
+ }
+ else
+ {
+ /* won't go negative */
+ MPN_DECR_U (lp, lsize, CNST_LIMB(4));
+ }
+
+ TRACE (mpn_trace (" l",lp, lsize));
+ break;
+ }
MP_PTR_SWAP (xp, lp); /* balance the swaps wanted in the L[2k] below */
zeros++;
n /= 2;
if (n <= FIB_TABLE_LUCNUM_LIMIT)
- {
- /* L[n] = F[n] + 2F[n-1] */
- lp[0] = FIB_TABLE (n) + 2 * FIB_TABLE ((int) n - 1);
- lsize = 1;
-
- TRACE (printf (" initial small n=%lu\n", n);
- mpn_trace (" l",lp, lsize));
- break;
- }
+ {
+ /* L[n] = F[n] + 2F[n-1] */
+ lp[0] = FIB_TABLE (n) + 2 * FIB_TABLE ((int) n - 1);
+ lsize = 1;
+
+ TRACE (printf (" initial small n=%lu\n", n);
+ mpn_trace (" l",lp, lsize));
+ break;
+ }
}
for ( ; zeros != 0; zeros--)
lsize -= (xp[lsize-1] == 0);
/* First time around the loop k==n determines (-1)^k, after that k is
- always even and we set n=0 to indicate that. */
+ always even and we set n=0 to indicate that. */
if (n & 1)
- {
- /* L[n]^2 == 0 or 1 mod 4, like all squares, so +2 gives no carry */
- ASSERT (xp[0] <= MP_LIMB_T_MAX-2);
- xp[0] += 2;
- n = 0;
- }
+ {
+ /* L[n]^2 == 0 or 1 mod 4, like all squares, so +2 gives no carry */
+ ASSERT (xp[0] <= MP_LIMB_T_MAX-2);
+ xp[0] += 2;
+ n = 0;
+ }
else
- {
- /* won't go negative */
- MPN_DECR_U (xp, lsize, CNST_LIMB(2));
- }
+ {
+ /* won't go negative */
+ MPN_DECR_U (xp, lsize, CNST_LIMB(2));
+ }
MP_PTR_SWAP (xp, lp);
ASSERT (lp[lsize-1] != 0);
--- /dev/null
+/* mpz_mfac_uiui(RESULT, N, M) -- Set RESULT to N!^(M) = N(N-M)(N-2M)...
+
+Contributed to the GNU project by Marco Bodrato.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/*************************************************************/
+/* Section macros: common macros, for swing/fac/bin (&sieve) */
+/*************************************************************/
+
+#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \
+ do { \
+ if ((PR) > (MAX_PR)) { \
+ (VEC)[(I)++] = (PR); \
+ (PR) = (P); \
+ } else \
+ (PR) *= (P); \
+ } while (0)
+
+/*********************************************************/
+/* Section oder factorials: */
+/*********************************************************/
+
+/* mpz_mfac_uiui (x, n, m) computes x = n!^(m) = n*(n-m)*(n-2m)*... */
+
+void
+mpz_mfac_uiui (mpz_ptr x, unsigned long n, unsigned long m)
+{
+ ASSERT (n <= GMP_NUMB_MAX);
+ ASSERT (m != 0);
+
+ if (n < 3 || n - 3 < m - 1) { /* (n < 3 || n - 1 <= m || m == 0) */
+ PTR (x)[0] = n + (n == 0);
+ SIZ (x) = 1;
+ } else { /* m < n - 1 < GMP_NUMB_MAX */
+ mp_limb_t g, sn;
+ mpz_t t;
+
+ sn = n;
+ g = mpn_gcd_1 (&sn, 1, m);
+ if (g != 1) { n/=g; m/=g; }
+
+ if (m <= 2) { /* fac or 2fac */
+ if (m == 1) {
+ if (g > 2) {
+ mpz_init (t);
+ mpz_fac_ui (t, n);
+ sn = n;
+ } else {
+ if (g == 2)
+ mpz_2fac_ui (x, n << 1);
+ else
+ mpz_fac_ui (x, n);
+ return;
+ }
+ } else { /* m == 2 */
+ if (g != 1) {
+ mpz_init (t);
+ mpz_2fac_ui (t, n);
+ sn = n / 2 + 1;
+ } else {
+ mpz_2fac_ui (x, n);
+ return;
+ }
+ }
+ } else { /* m >= 3, gcd(n,m) = 1 */
+ mp_limb_t *factors;
+ mp_limb_t prod, max_prod, j;
+ TMP_DECL;
+
+ sn = n / m + 1;
+
+ j = 0;
+ prod = n;
+ n -= m;
+ max_prod = GMP_NUMB_MAX / n;
+
+ TMP_MARK;
+ factors = TMP_ALLOC_LIMBS (sn / log_n_max (n) + 2);
+
+ for (; n > m; n -= m)
+ FACTOR_LIST_STORE (n, prod, max_prod, factors, j);
+
+ factors[j++] = n;
+ factors[j++] = prod;
+
+ if (g > 1) {
+ mpz_init (t);
+ mpz_prodlimbs (t, factors, j);
+ } else
+ mpz_prodlimbs (x, factors, j);
+
+ TMP_FREE;
+ }
+
+ if (g > 1) {
+ mpz_t p;
+
+ mpz_init (p);
+ mpz_ui_pow_ui (p, g, sn); /* g^sn */
+ mpz_mul (x, p, t);
+ mpz_clear (p);
+ mpz_clear (t);
+ }
+ }
+}
#include "gmp.h"
#include "gmp-impl.h"
-static int millerrabin __GMP_PROTO ((mpz_srcptr, mpz_srcptr,
- mpz_ptr, mpz_ptr,
- mpz_srcptr, unsigned long int));
+static int millerrabin (mpz_srcptr, mpz_srcptr,
+ mpz_ptr, mpz_ptr,
+ mpz_srcptr, unsigned long int);
int
mpz_millerrabin (mpz_srcptr n, int reps)
static int
millerrabin (mpz_srcptr n, mpz_srcptr nm1, mpz_ptr x, mpz_ptr y,
- mpz_srcptr q, unsigned long int k)
+ mpz_srcptr q, unsigned long int k)
{
unsigned long int i;
/* mpz_mod -- The mathematical mod function.
-Copyright 1991, 1993, 1994, 1995, 1996, 2001, 2002, 2005 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1995, 1996, 2001, 2002, 2005, 2010, 2012
+Free Software Foundation, Inc.
This file is part of the GNU MP Library.
void
mpz_mod (mpz_ptr rem, mpz_srcptr dividend, mpz_srcptr divisor)
{
- mp_size_t divisor_size = divisor->_mp_size;
- mpz_t temp_divisor; /* N.B.: lives until function returns! */
+ mp_size_t rn, bn;
+ mpz_t temp_divisor;
TMP_DECL;
TMP_MARK;
+ bn = ABSIZ(divisor);
+
/* We need the original value of the divisor after the remainder has been
preliminary calculated. We have to copy it to temporary space if it's
the same variable as REM. */
if (rem == divisor)
{
- MPZ_TMP_INIT (temp_divisor, ABS (divisor_size));
- mpz_set (temp_divisor, divisor);
- divisor = temp_divisor;
+ PTR(temp_divisor) = TMP_ALLOC_LIMBS (bn);
+ MPN_COPY (PTR(temp_divisor), PTR(divisor), bn);
+ }
+ else
+ {
+ PTR(temp_divisor) = PTR(divisor);
}
+ SIZ(temp_divisor) = bn;
+ divisor = temp_divisor;
mpz_tdiv_r (rem, dividend, divisor);
- if (rem->_mp_size != 0)
- {
- if (dividend->_mp_size < 0)
- {
- if (divisor->_mp_size < 0)
- mpz_sub (rem, rem, divisor);
- else
- mpz_add (rem, rem, divisor);
- }
- }
+ rn = SIZ (rem);
+ if (rn < 0)
+ mpz_add (rem, rem, divisor);
TMP_FREE;
}
/* mpz_mul -- Multiply two integers.
-Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005, 2009, 2011 Free
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005, 2009, 2011, 2012 Free
Software Foundation, Inc.
This file is part of the GNU MP Library.
#include <stdio.h> /* for NULL */
#include "gmp.h"
#include "gmp-impl.h"
-#ifdef BERKELEY_MP
-#include "mp.h"
-#endif
void
-#ifndef BERKELEY_MP
mpz_mul (mpz_ptr w, mpz_srcptr u, mpz_srcptr v)
-#else /* BERKELEY_MP */
-mult (mpz_srcptr u, mpz_srcptr v, mpz_ptr w)
-#endif /* BERKELEY_MP */
{
mp_size_t usize;
mp_size_t vsize;
if (vsize == 0)
{
- SIZ(w) = 0;
+ SIZ (w) = 0;
return;
}
#if HAVE_NATIVE_mpn_mul_2
if (vsize <= 2)
{
- MPZ_REALLOC (w, usize+vsize);
- wp = PTR(w);
+ wp = MPZ_REALLOC (w, usize+vsize);
if (vsize == 1)
- cy_limb = mpn_mul_1 (wp, PTR(u), usize, PTR(v)[0]);
+ cy_limb = mpn_mul_1 (wp, PTR (u), usize, PTR (v)[0]);
else
- {
- cy_limb = mpn_mul_2 (wp, PTR(u), usize, PTR(v));
- usize++;
- }
+ {
+ cy_limb = mpn_mul_2 (wp, PTR (u), usize, PTR (v));
+ usize++;
+ }
wp[usize] = cy_limb;
usize += (cy_limb != 0);
- SIZ(w) = (sign_product >= 0 ? usize : -usize);
+ SIZ (w) = (sign_product >= 0 ? usize : -usize);
return;
}
#else
if (vsize == 1)
{
- MPZ_REALLOC (w, usize+1);
- wp = PTR(w);
- cy_limb = mpn_mul_1 (wp, PTR(u), usize, PTR(v)[0]);
+ wp = MPZ_REALLOC (w, usize+1);
+ cy_limb = mpn_mul_1 (wp, PTR (u), usize, PTR (v)[0]);
wp[usize] = cy_limb;
usize += (cy_limb != 0);
- SIZ(w) = (sign_product >= 0 ? usize : -usize);
+ SIZ (w) = (sign_product >= 0 ? usize : -usize);
return;
}
#endif
TMP_MARK;
free_me = NULL;
- up = PTR(u);
- vp = PTR(v);
- wp = PTR(w);
+ up = PTR (u);
+ vp = PTR (v);
+ wp = PTR (w);
/* Ensure W has space enough to store the result. */
wsize = usize + vsize;
- if (ALLOC(w) < wsize)
+ if (ALLOC (w) < wsize)
{
if (wp == up || wp == vp)
{
free_me = wp;
- free_me_size = ALLOC(w);
+ free_me_size = ALLOC (w);
}
else
- (*__gmp_free_func) (wp, ALLOC(w) * BYTES_PER_MP_LIMB);
+ (*__gmp_free_func) (wp, ALLOC (w) * BYTES_PER_MP_LIMB);
- ALLOC(w) = wsize;
+ ALLOC (w) = wsize;
wp = (mp_ptr) (*__gmp_allocate_func) (wsize * BYTES_PER_MP_LIMB);
- PTR(w) = wp;
+ PTR (w) = wp;
}
else
{
wsize -= cy_limb == 0;
- SIZ(w) = sign_product < 0 ? -wsize : wsize;
+ SIZ (w) = sign_product < 0 ? -wsize : wsize;
if (free_me != NULL)
(*__gmp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB);
TMP_FREE;
/* mpz_mul_2exp -- Multiply a bignum by 2**CNT
-Copyright 1991, 1993, 1994, 1996, 2001, 2002 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 2001, 2002, 2012 Free Software Foundation,
+Inc.
This file is part of the GNU MP Library.
#include "gmp-impl.h"
void
-mpz_mul_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt)
+mpz_mul_2exp (mpz_ptr r, mpz_srcptr u, mp_bitcnt_t cnt)
{
- mp_size_t usize = u->_mp_size;
- mp_size_t abs_usize = ABS (usize);
- mp_size_t wsize;
+ mp_size_t un, rn;
mp_size_t limb_cnt;
- mp_ptr wp;
- mp_limb_t wlimb;
-
- if (usize == 0)
- {
- w->_mp_size = 0;
- return;
- }
+ mp_ptr rp;
+ mp_srcptr up;
+ mp_limb_t rlimb;
+ un = ABSIZ (u);
limb_cnt = cnt / GMP_NUMB_BITS;
- wsize = abs_usize + limb_cnt + 1;
- if (w->_mp_alloc < wsize)
- _mpz_realloc (w, wsize);
-
- wp = w->_mp_d;
- wsize = abs_usize + limb_cnt;
+ rn = un + limb_cnt;
- cnt %= GMP_NUMB_BITS;
- if (cnt != 0)
+ if (un == 0)
+ rn = 0;
+ else
{
- wlimb = mpn_lshift (wp + limb_cnt, u->_mp_d, abs_usize, cnt);
- if (wlimb != 0)
+ rp = MPZ_REALLOC (r, rn + 1);
+ up = PTR(u);
+
+ cnt %= GMP_NUMB_BITS;
+ if (cnt != 0)
{
- wp[wsize] = wlimb;
- wsize++;
+ rlimb = mpn_lshift (rp + limb_cnt, up, un, cnt);
+ rp[rn] = rlimb;
+ rn += (rlimb != 0);
+ }
+ else
+ {
+ MPN_COPY_DECR (rp + limb_cnt, up, un);
}
- }
- else
- {
- MPN_COPY_DECR (wp + limb_cnt, u->_mp_d, abs_usize);
- }
- /* Zero all whole limbs at low end. Do it here and not before calling
- mpn_lshift, not to lose for U == W. */
- MPN_ZERO (wp, limb_cnt);
+ /* Zero all whole limbs at low end. Do it here and not before calling
+ mpn_lshift, not to lose for U == R. */
+ MPN_ZERO (rp, limb_cnt);
+ }
- w->_mp_size = usize >= 0 ? wsize : -wsize;
+ SIZ(r) = SIZ(u) >= 0 ? rn : -rn;
}
/* mpz_mul_ui/si (product, multiplier, small_multiplicand) -- Set PRODUCT to
MULTIPLICATOR times SMALL_MULTIPLICAND.
-Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2005, 2008 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2005, 2008, 2012
+Free Software Foundation, Inc.
This file is part of the GNU MP Library.
FUNCTION (mpz_ptr prod, mpz_srcptr mult,
MULTIPLICAND_UNSIGNED long int small_mult)
{
- mp_size_t size = SIZ(mult);
- mp_size_t sign_product = size;
+ mp_size_t size;
+ mp_size_t sign_product;
mp_limb_t sml;
mp_limb_t cy;
mp_ptr pp;
- if (size == 0 || small_mult == 0)
+ sign_product = SIZ(mult);
+ if (sign_product == 0 || small_mult == 0)
{
SIZ(prod) = 0;
return;
}
- size = ABS (size);
+ size = ABS (sign_product);
sml = MULTIPLICAND_ABS (small_mult);
if (sml <= GMP_NUMB_MAX)
{
- MPZ_REALLOC (prod, size + 1);
- pp = PTR(prod);
- cy = mpn_mul_1 (pp, PTR(mult), size, sml & GMP_NUMB_MASK);
+ pp = MPZ_REALLOC (prod, size + 1);
+ cy = mpn_mul_1 (pp, PTR(mult), size, sml);
pp[size] = cy;
size += cy != 0;
}
tp = TMP_ALLOC_LIMBS (size + 2);
+ /* Use, maybe, mpn_mul_2? */
cy = mpn_mul_1 (tp, PTR(mult), size, sml & GMP_NUMB_MASK);
tp[size] = cy;
cy = mpn_addmul_1 (tp + 1, PTR(mult), size, sml >> GMP_NUMB_BITS);
tp[size + 1] = cy;
size += 2;
MPN_NORMALIZE_NOT_ZERO (tp, size); /* too general, need to trim one or two limb */
- MPZ_REALLOC (prod, size);
- pp = PTR(prod);
+ pp = MPZ_REALLOC (prod, size);
MPN_COPY (pp, tp, size);
TMP_FREE;
}
CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
FUTURE GNU MP RELEASES.
-Copyright 2001, 2002, 2005 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2005, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
TMP_DECL;
TRACE (printf ("mpz_n_pow_ui rp=0x%lX bp=0x%lX bsize=%ld e=%lu (0x%lX)\n",
- PTR(r), bp, bsize, e, e);
- mpn_trace ("b", bp, bsize));
+ PTR(r), bp, bsize, e, e);
+ mpn_trace ("b", bp, bsize));
ASSERT (bsize == 0 || bp[ABS(bsize)-1] != 0);
- ASSERT (MPN_SAME_OR_SEPARATE2_P (PTR(r), ABSIZ(r), bp, bsize));
+ ASSERT (MPN_SAME_OR_SEPARATE2_P (PTR(r), ALLOC(r), bp, ABS(bsize)));
/* b^0 == 1, including 0^0 == 1 */
if (e == 0)
rtwos_limbs += rtwos_bits / GMP_NUMB_BITS;
rtwos_bits %= GMP_NUMB_BITS;
TRACE (printf ("trailing zero btwos=%d rtwos_limbs=%ld rtwos_bits=%lu\n",
- btwos, rtwos_limbs, rtwos_bits));
+ btwos, rtwos_limbs, rtwos_bits));
TMP_MARK;
{
bsize_1:
/* Power up as far as possible within blimb. We start here with e!=0,
- but if e is small then we might reach e==0 and the whole b^e in rl.
- Notice this code works when blimb==1 too, reaching e==0. */
+ but if e is small then we might reach e==0 and the whole b^e in rl.
+ Notice this code works when blimb==1 too, reaching e==0. */
while (blimb <= GMP_NUMB_HALFMAX)
- {
- TRACE (printf ("small e=0x%lX blimb=0x%lX rl=0x%lX\n",
- e, blimb, rl));
- ASSERT (e != 0);
- if ((e & 1) != 0)
- rl *= blimb;
- e >>= 1;
- if (e == 0)
- goto got_rl;
- blimb *= blimb;
- }
+ {
+ TRACE (printf ("small e=0x%lX blimb=0x%lX rl=0x%lX\n",
+ e, blimb, rl));
+ ASSERT (e != 0);
+ if ((e & 1) != 0)
+ rl *= blimb;
+ e >>= 1;
+ if (e == 0)
+ goto got_rl;
+ blimb *= blimb;
+ }
#if HAVE_NATIVE_mpn_mul_2
TRACE (printf ("single power, e=0x%lX b=0x%lX rl=0x%lX\n",
- e, blimb, rl));
+ e, blimb, rl));
/* Can power b once more into blimb:blimb_low */
bsize = 2;
got_rl:
TRACE (printf ("double power e=0x%lX blimb=0x%lX:0x%lX rl=0x%lX:%lX\n",
- e, blimb, blimb_low, rl_high, rl));
+ e, blimb, blimb_low, rl_high, rl));
/* Combine left-over rtwos_bits into rl_high:rl to be handled by the
- final mul_1 or mul_2 rather than a separate lshift.
- - rl_high:rl mustn't be 1 (since then there's no final mul)
- - rl_high mustn't overflow
- - rl_high mustn't change to non-zero, since mul_1+lshift is
- probably faster than mul_2 (FIXME: is this true?) */
+ final mul_1 or mul_2 rather than a separate lshift.
+ - rl_high:rl mustn't be 1 (since then there's no final mul)
+ - rl_high mustn't overflow
+ - rl_high mustn't change to non-zero, since mul_1+lshift is
+ probably faster than mul_2 (FIXME: is this true?) */
if (rtwos_bits != 0
- && ! (rl_high == 0 && rl == 1)
- && (rl_high >> (GMP_NUMB_BITS-rtwos_bits)) == 0)
- {
- mp_limb_t new_rl_high = (rl_high << rtwos_bits)
- | (rl >> (GMP_NUMB_BITS-rtwos_bits));
- if (! (rl_high == 0 && new_rl_high != 0))
- {
- rl_high = new_rl_high;
- rl <<= rtwos_bits;
- rtwos_bits = 0;
- TRACE (printf ("merged rtwos_bits, rl=0x%lX:%lX\n",
- rl_high, rl));
- }
- }
+ && ! (rl_high == 0 && rl == 1)
+ && (rl_high >> (GMP_NUMB_BITS-rtwos_bits)) == 0)
+ {
+ mp_limb_t new_rl_high = (rl_high << rtwos_bits)
+ | (rl >> (GMP_NUMB_BITS-rtwos_bits));
+ if (! (rl_high == 0 && new_rl_high != 0))
+ {
+ rl_high = new_rl_high;
+ rl <<= rtwos_bits;
+ rtwos_bits = 0;
+ TRACE (printf ("merged rtwos_bits, rl=0x%lX:%lX\n",
+ rl_high, rl));
+ }
+ }
#else
got_rl:
TRACE (printf ("small power e=0x%lX blimb=0x%lX rl=0x%lX\n",
- e, blimb, rl));
+ e, blimb, rl));
/* Combine left-over rtwos_bits into rl to be handled by the final
- mul_1 rather than a separate lshift.
- - rl mustn't be 1 (since then there's no final mul)
- - rl mustn't overflow */
+ mul_1 rather than a separate lshift.
+ - rl mustn't be 1 (since then there's no final mul)
+ - rl mustn't overflow */
if (rtwos_bits != 0
- && rl != 1
- && (rl >> (GMP_NUMB_BITS-rtwos_bits)) == 0)
- {
- rl <<= rtwos_bits;
- rtwos_bits = 0;
- TRACE (printf ("merged rtwos_bits, rl=0x%lX\n", rl));
- }
+ && rl != 1
+ && (rl >> (GMP_NUMB_BITS-rtwos_bits)) == 0)
+ {
+ rl <<= rtwos_bits;
+ rtwos_bits = 0;
+ TRACE (printf ("merged rtwos_bits, rl=0x%lX\n", rl));
+ }
#endif
}
else if (bsize == 2)
{
mp_limb_t bsecond = bp[1];
if (btwos != 0)
- blimb |= (bsecond << (GMP_NUMB_BITS - btwos)) & GMP_NUMB_MASK;
+ blimb |= (bsecond << (GMP_NUMB_BITS - btwos)) & GMP_NUMB_MASK;
bsecond >>= btwos;
if (bsecond == 0)
- {
- /* Two limbs became one after rshift. */
- bsize = 1;
- goto bsize_1;
- }
+ {
+ /* Two limbs became one after rshift. */
+ bsize = 1;
+ goto bsize_1;
+ }
TRACE (printf ("bsize==2 using b=0x%lX:%lX", bsecond, blimb));
#if HAVE_NATIVE_mpn_mul_2
else
{
if (r_bp_overlap || btwos != 0)
- {
- mp_ptr tp = TMP_ALLOC_LIMBS (bsize);
- MPN_RSHIFT_OR_COPY (tp, bp, bsize, btwos);
- bp = tp;
- TRACE (printf ("rshift or copy bp,bsize, new bsize=%ld\n", bsize));
- }
+ {
+ mp_ptr tp = TMP_ALLOC_LIMBS (bsize);
+ MPN_RSHIFT_OR_COPY (tp, bp, bsize, btwos);
+ bp = tp;
+ TRACE (printf ("rshift or copy bp,bsize, new bsize=%ld\n", bsize));
+ }
#if HAVE_NATIVE_mpn_mul_2
/* in case 3 limbs rshift to 2 and hence use the mul_2 loop below */
blimb_low = bp[0];
blimb = bp[bsize-1];
TRACE (printf ("big bsize=%ld ", bsize);
- mpn_trace ("b", bp, bsize));
+ mpn_trace ("b", bp, bsize));
}
/* At this point blimb is the most significant limb of the base to use.
count_leading_zeros (cnt, blimb);
ralloc = (bsize*GMP_NUMB_BITS - cnt + GMP_NAIL_BITS) * e / GMP_NUMB_BITS + 5;
TRACE (printf ("ralloc %ld, from bsize=%ld blimb=0x%lX cnt=%d\n",
- ralloc, bsize, blimb, cnt));
- MPZ_REALLOC (r, ralloc + rtwos_limbs);
- rp = PTR(r);
+ ralloc, bsize, blimb, cnt));
+ rp = MPZ_REALLOC (r, ralloc + rtwos_limbs);
/* Low zero limbs resulting from powers of 2. */
MPN_ZERO (rp, rtwos_limbs);
if (e == 0)
{
/* Any e==0 other than via bsize==1 or bsize==2 is covered at the
- start. */
+ start. */
rp[0] = rl;
rsize = 1;
#if HAVE_NATIVE_mpn_mul_2
mp_size_t talloc;
/* In the mpn_mul_1 or mpn_mul_2 loops or in the mpn_mul loop when the
- low bit of e is zero, tp only has to hold the second last power
- step, which is half the size of the final result. There's no need
- to round up the divide by 2, since ralloc includes a +2 for rl
- which not needed by tp. In the mpn_mul loop when the low bit of e
- is 1, tp must hold nearly the full result, so just size it the same
- as rp. */
+ low bit of e is zero, tp only has to hold the second last power
+ step, which is half the size of the final result. There's no need
+ to round up the divide by 2, since ralloc includes a +2 for rl
+ which not needed by tp. In the mpn_mul loop when the low bit of e
+ is 1, tp must hold nearly the full result, so just size it the same
+ as rp. */
talloc = ralloc;
#if HAVE_NATIVE_mpn_mul_2
if (bsize <= 2 || (e & 1) == 0)
- talloc /= 2;
+ talloc /= 2;
#else
if (bsize <= 1 || (e & 1) == 0)
- talloc /= 2;
+ talloc /= 2;
#endif
TRACE (printf ("talloc %ld\n", talloc));
tp = TMP_ALLOC_LIMBS (talloc);
/* Go from high to low over the bits of e, starting with i pointing at
- the bit below the highest 1 (which will mean i==-1 if e==1). */
- count_leading_zeros (cnt, e);
+ the bit below the highest 1 (which will mean i==-1 if e==1). */
+ count_leading_zeros (cnt, (mp_limb_t) e);
i = GMP_LIMB_BITS - cnt - 2;
#if HAVE_NATIVE_mpn_mul_2
if (bsize <= 2)
- {
- mp_limb_t mult[2];
-
- /* Any bsize==1 will have been powered above to be two limbs. */
- ASSERT (bsize == 2);
- ASSERT (blimb != 0);
-
- /* Arrange the final result ends up in r, not in the temp space */
- if ((i & 1) == 0)
- SWAP_RP_TP;
-
- rp[0] = blimb_low;
- rp[1] = blimb;
- rsize = 2;
-
- mult[0] = blimb_low;
- mult[1] = blimb;
-
- for ( ; i >= 0; i--)
- {
- TRACE (printf ("mul_2 loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n",
- i, e, rsize, ralloc, talloc);
- mpn_trace ("r", rp, rsize));
-
- MPN_SQR (tp, talloc, rp, rsize);
- SWAP_RP_TP;
- if ((e & (1L << i)) != 0)
- MPN_MUL_2 (rp, rsize, ralloc, mult);
- }
-
- TRACE (mpn_trace ("mul_2 before rl, r", rp, rsize));
- if (rl_high != 0)
- {
- mult[0] = rl;
- mult[1] = rl_high;
- MPN_MUL_2 (rp, rsize, ralloc, mult);
- }
- else if (rl != 1)
- MPN_MUL_1 (rp, rsize, ralloc, rl);
- }
+ {
+ mp_limb_t mult[2];
+
+ /* Any bsize==1 will have been powered above to be two limbs. */
+ ASSERT (bsize == 2);
+ ASSERT (blimb != 0);
+
+ /* Arrange the final result ends up in r, not in the temp space */
+ if ((i & 1) == 0)
+ SWAP_RP_TP;
+
+ rp[0] = blimb_low;
+ rp[1] = blimb;
+ rsize = 2;
+
+ mult[0] = blimb_low;
+ mult[1] = blimb;
+
+ for ( ; i >= 0; i--)
+ {
+ TRACE (printf ("mul_2 loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n",
+ i, e, rsize, ralloc, talloc);
+ mpn_trace ("r", rp, rsize));
+
+ MPN_SQR (tp, talloc, rp, rsize);
+ SWAP_RP_TP;
+ if ((e & (1L << i)) != 0)
+ MPN_MUL_2 (rp, rsize, ralloc, mult);
+ }
+
+ TRACE (mpn_trace ("mul_2 before rl, r", rp, rsize));
+ if (rl_high != 0)
+ {
+ mult[0] = rl;
+ mult[1] = rl_high;
+ MPN_MUL_2 (rp, rsize, ralloc, mult);
+ }
+ else if (rl != 1)
+ MPN_MUL_1 (rp, rsize, ralloc, rl);
+ }
#else
if (bsize == 1)
- {
- /* Arrange the final result ends up in r, not in the temp space */
- if ((i & 1) == 0)
- SWAP_RP_TP;
-
- rp[0] = blimb;
- rsize = 1;
-
- for ( ; i >= 0; i--)
- {
- TRACE (printf ("mul_1 loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n",
- i, e, rsize, ralloc, talloc);
- mpn_trace ("r", rp, rsize));
-
- MPN_SQR (tp, talloc, rp, rsize);
- SWAP_RP_TP;
- if ((e & (1L << i)) != 0)
- MPN_MUL_1 (rp, rsize, ralloc, blimb);
- }
-
- TRACE (mpn_trace ("mul_1 before rl, r", rp, rsize));
- if (rl != 1)
- MPN_MUL_1 (rp, rsize, ralloc, rl);
- }
+ {
+ /* Arrange the final result ends up in r, not in the temp space */
+ if ((i & 1) == 0)
+ SWAP_RP_TP;
+
+ rp[0] = blimb;
+ rsize = 1;
+
+ for ( ; i >= 0; i--)
+ {
+ TRACE (printf ("mul_1 loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n",
+ i, e, rsize, ralloc, talloc);
+ mpn_trace ("r", rp, rsize));
+
+ MPN_SQR (tp, talloc, rp, rsize);
+ SWAP_RP_TP;
+ if ((e & (1L << i)) != 0)
+ MPN_MUL_1 (rp, rsize, ralloc, blimb);
+ }
+
+ TRACE (mpn_trace ("mul_1 before rl, r", rp, rsize));
+ if (rl != 1)
+ MPN_MUL_1 (rp, rsize, ralloc, rl);
+ }
#endif
else
- {
- int parity;
-
- /* Arrange the final result ends up in r, not in the temp space */
- ULONG_PARITY (parity, e);
- if (((parity ^ i) & 1) != 0)
- SWAP_RP_TP;
-
- MPN_COPY (rp, bp, bsize);
- rsize = bsize;
-
- for ( ; i >= 0; i--)
- {
- TRACE (printf ("mul loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n",
- i, e, rsize, ralloc, talloc);
- mpn_trace ("r", rp, rsize));
-
- MPN_SQR (tp, talloc, rp, rsize);
- SWAP_RP_TP;
- if ((e & (1L << i)) != 0)
- {
- MPN_MUL (tp, talloc, rp, rsize, bp, bsize);
- SWAP_RP_TP;
- }
- }
- }
+ {
+ int parity;
+
+ /* Arrange the final result ends up in r, not in the temp space */
+ ULONG_PARITY (parity, e);
+ if (((parity ^ i) & 1) != 0)
+ SWAP_RP_TP;
+
+ MPN_COPY (rp, bp, bsize);
+ rsize = bsize;
+
+ for ( ; i >= 0; i--)
+ {
+ TRACE (printf ("mul loop i=%d e=0x%lX, rsize=%ld ralloc=%ld talloc=%ld\n",
+ i, e, rsize, ralloc, talloc);
+ mpn_trace ("r", rp, rsize));
+
+ MPN_SQR (tp, talloc, rp, rsize);
+ SWAP_RP_TP;
+ if ((e & (1L << i)) != 0)
+ {
+ MPN_MUL (tp, talloc, rp, rsize, bp, bsize);
+ SWAP_RP_TP;
+ }
+ }
+ }
}
ASSERT (rp == PTR(r) + rtwos_limbs);
/* mpz_neg(mpz_ptr dst, mpz_ptr src) -- Assign the negated value of SRC to DST.
-Copyright 1991, 1993, 1994, 1995, 2001 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1995, 2001, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
void
mpz_neg (mpz_ptr w, mpz_srcptr u)
{
- mp_ptr wp, up;
+ mp_ptr wp;
+ mp_srcptr up;
mp_size_t usize, size;
- usize = u->_mp_size;
+ usize = SIZ (u);
if (u != w)
{
size = ABS (usize);
- if (w->_mp_alloc < size)
- _mpz_realloc (w, size);
+ wp = MPZ_NEWALLOC (w, size);
- wp = w->_mp_d;
- up = u->_mp_d;
+ up = PTR (u);
MPN_COPY (wp, up, size);
}
- w->_mp_size = -usize;
+ SIZ (w) = -usize;
}
/* mpz_nextprime(p,t) - compute the next prime > t and store that in p.
-Copyright 1999, 2000, 2001, 2008, 2009 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2008, 2009, 2012 Free Software Foundation, Inc.
Contributed to the GNU project by Niels Möller and Torbjorn Granlund.
int i;
unsigned prime_limit;
unsigned long prime;
- int cnt;
mp_size_t pn;
mp_bitcnt_t nbits;
unsigned incr;
return;
pn = SIZ(p);
- count_leading_zeros (cnt, PTR(p)[pn - 1]);
- nbits = pn * GMP_NUMB_BITS - (cnt - GMP_NAIL_BITS);
+ MPN_SIZEINBASE_2EXP(nbits, PTR(p), pn, 1);
if (nbits / 2 >= NUMBER_OF_PRIMES)
prime_limit = NUMBER_OF_PRIMES - 1;
else
--- /dev/null
+/* mpz_oddfac_1(RESULT, N) -- Set RESULT to the odd factor of N!.
+
+Contributed to the GNU project by Marco Bodrato.
+
+THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.
+IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
+IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR
+DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2010, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+/* TODO:
+ - split this file in smaller parts with functions that can be recycled for different computations.
+ */
+
+/**************************************************************/
+/* Section macros: common macros, for mswing/fac/bin (&sieve) */
+/**************************************************************/
+
+#define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I) \
+ if ((PR) > (MAX_PR)) { \
+ (VEC)[(I)++] = (PR); \
+ (PR) = 1; \
+ }
+
+#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \
+ do { \
+ if ((PR) > (MAX_PR)) { \
+ (VEC)[(I)++] = (PR); \
+ (PR) = (P); \
+ } else \
+ (PR) *= (P); \
+ } while (0)
+
+#define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) \
+ __max_i = (end); \
+ \
+ do { \
+ ++__i; \
+ if (((sieve)[__index] & __mask) == 0) \
+ { \
+ (prime) = id_to_n(__i)
+
+#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \
+ do { \
+ mp_limb_t __mask, __index, __max_i, __i; \
+ \
+ __i = (start)-(off); \
+ __index = __i / GMP_LIMB_BITS; \
+ __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \
+ __i += (off); \
+ \
+ LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)
+
+#define LOOP_ON_SIEVE_STOP \
+ } \
+ __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \
+ __index += __mask & 1; \
+ } while (__i <= __max_i) \
+
+#define LOOP_ON_SIEVE_END \
+ LOOP_ON_SIEVE_STOP; \
+ } while (0)
+
+/*********************************************************/
+/* Section sieve: sieving functions and tools for primes */
+/*********************************************************/
+
+#if WANT_ASSERT
+static mp_limb_t
+bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
+#endif
+
+/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
+static mp_limb_t
+id_to_n (mp_limb_t id) { return id*3+1+(id&1); }
+
+/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
+static mp_limb_t
+n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
+
+#if WANT_ASSERT
+static mp_size_t
+primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
+#endif
+
+/*********************************************************/
+/* Section mswing: 2-multiswing factorial */
+/*********************************************************/
+
+/* Returns an approximation of the sqare root of x. *
+ * It gives: x <= limb_apprsqrt (x) ^ 2 < x * 9/4 */
+static mp_limb_t
+limb_apprsqrt (mp_limb_t x)
+{
+ int s;
+
+ ASSERT (x > 2);
+ count_leading_zeros (s, x - 1);
+ s = GMP_LIMB_BITS - 1 - s;
+ return (CNST_LIMB(1) << (s >> 1)) + (CNST_LIMB(1) << ((s - 1) >> 1));
+}
+
+#if 0
+/* A count-then-exponentiate variant for SWING_A_PRIME */
+#define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
+ do { \
+ mp_limb_t __q, __prime; \
+ int __exp; \
+ __prime = (P); \
+ __exp = 0; \
+ __q = (N); \
+ do { \
+ __q /= __prime; \
+ __exp += __q & 1; \
+ } while (__q >= __prime); \
+ if (__exp) { /* Store $prime^{exp}$ */ \
+ for (__q = __prime; --__exp; __q *= __prime); \
+ FACTOR_LIST_STORE(__q, PR, MAX_PR, VEC, I); \
+ }; \
+ } while (0)
+#else
+#define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
+ do { \
+ mp_limb_t __q, __prime; \
+ __prime = (P); \
+ FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \
+ __q = (N); \
+ do { \
+ __q /= __prime; \
+ if ((__q & 1) != 0) (PR) *= __prime; \
+ } while (__q >= __prime); \
+ } while (0)
+#endif
+
+#define SH_SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
+ do { \
+ mp_limb_t __prime; \
+ __prime = (P); \
+ if ((((N) / __prime) & 1) != 0) \
+ FACTOR_LIST_STORE(__prime, PR, MAX_PR, VEC, I); \
+ } while (0)
+
+/* mpz_2multiswing_1 computes the odd part of the 2-multiswing
+ factorial of the parameter n. The result x is an odd positive
+ integer so that multiswing(n,2) = x 2^a.
+
+ Uses the algorithm described by Peter Luschny in "Divide, Swing and
+ Conquer the Factorial!".
+
+ The pointer sieve points to primesieve_size(n) limbs containing a
+ bit-array where primes are marked as 0.
+ Enough (FIXME: explain :-) limbs must be pointed by factors.
+ */
+
+static void
+mpz_2multiswing_1 (mpz_ptr x, mp_limb_t n, mp_ptr sieve, mp_ptr factors)
+{
+ mp_limb_t prod, max_prod;
+ mp_size_t j;
+
+ ASSERT (n >= 26);
+
+ j = 0;
+ prod = -(n & 1);
+ n &= ~ CNST_LIMB(1); /* n-1, if n is odd */
+
+ prod = (prod & n) + 1; /* the original n, if it was odd, 1 otherwise */
+ max_prod = GMP_NUMB_MAX / (n-1);
+
+ /* Handle prime = 3 separately. */
+ SWING_A_PRIME (3, n, prod, max_prod, factors, j);
+
+ /* Swing primes from 5 to n/3 */
+ {
+ mp_limb_t s;
+
+ {
+ mp_limb_t prime;
+
+ s = limb_apprsqrt(n);
+ ASSERT (s >= 5);
+ s = n_to_bit (s);
+ LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);
+ SWING_A_PRIME (prime, n, prod, max_prod, factors, j);
+ LOOP_ON_SIEVE_END;
+ s++;
+ }
+
+ ASSERT (max_prod <= GMP_NUMB_MAX / 3);
+ ASSERT (bit_to_n (s) * bit_to_n (s) > n);
+ ASSERT (s <= n_to_bit (n / 3));
+ {
+ mp_limb_t prime;
+ mp_limb_t l_max_prod = max_prod * 3;
+
+ LOOP_ON_SIEVE_BEGIN (prime, s, n_to_bit (n/3), 0, sieve);
+ SH_SWING_A_PRIME (prime, n, prod, l_max_prod, factors, j);
+ LOOP_ON_SIEVE_END;
+ }
+ }
+
+ /* Store primes from (n+1)/2 to n */
+ {
+ mp_limb_t prime;
+ LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n >> 1) + 1, n_to_bit (n), 0,sieve);
+ FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
+ LOOP_ON_SIEVE_END;
+ }
+
+ if (LIKELY (j != 0))
+ {
+ factors[j++] = prod;
+ mpz_prodlimbs (x, factors, j);
+ }
+ else
+ {
+ PTR (x)[0] = prod;
+ SIZ (x) = 1;
+ }
+}
+
+#undef SWING_A_PRIME
+#undef SH_SWING_A_PRIME
+#undef LOOP_ON_SIEVE_END
+#undef LOOP_ON_SIEVE_STOP
+#undef LOOP_ON_SIEVE_BEGIN
+#undef LOOP_ON_SIEVE_CONTINUE
+#undef FACTOR_LIST_APPEND
+
+/*********************************************************/
+/* Section oddfac: odd factorial, needed also by binomial*/
+/*********************************************************/
+
+#if TUNE_PROGRAM_BUILD
+#define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD_LIMIT-1)+1))
+#else
+#define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD-1)+1))
+#endif
+
+/* mpz_oddfac_1 computes the odd part of the factorial of the
+ parameter n. I.e. n! = x 2^a, where x is the returned value: an
+ odd positive integer.
+
+ If flag != 0 a square is skipped in the DSC part, e.g.
+ if n is odd, n > FAC_DSC_THRESHOLD and flag = 1, x is set to n!!.
+
+ If n is too small, flag is ignored, and an ASSERT can be triggered.
+
+ TODO: FAC_DSC_THRESHOLD is used here with two different roles:
+ - to decide when prime factorisation is needed,
+ - to stop the recursion, once sieving is done.
+ Maybe two thresholds can do a better job.
+ */
+void
+mpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag)
+{
+ ASSERT (n <= GMP_NUMB_MAX);
+ ASSERT (flag == 0 || (flag == 1 && n > ODD_FACTORIAL_TABLE_LIMIT && ABOVE_THRESHOLD (n, FAC_DSC_THRESHOLD)));
+
+ if (n <= ODD_FACTORIAL_TABLE_LIMIT)
+ {
+ PTR (x)[0] = __gmp_oddfac_table[n];
+ SIZ (x) = 1;
+ }
+ else if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1)
+ {
+ mp_ptr px;
+
+ px = MPZ_NEWALLOC (x, 2);
+ umul_ppmm (px[1], px[0], __gmp_odd2fac_table[(n - 1) >> 1], __gmp_oddfac_table[n >> 1]);
+ SIZ (x) = 2;
+ }
+ else
+ {
+ unsigned s;
+ mp_ptr factors;
+
+ s = 0;
+ {
+ mp_limb_t tn;
+ mp_limb_t prod, max_prod, i;
+ mp_size_t j;
+ TMP_SDECL;
+
+#if TUNE_PROGRAM_BUILD
+ ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD);
+ ASSERT (FAC_DSC_THRESHOLD >= 2 * (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2));
+#endif
+
+ /* Compute the number of recursive steps for the DSC algorithm. */
+ for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++)
+ tn >>= 1;
+
+ j = 0;
+
+ TMP_SMARK;
+ factors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB);
+ ASSERT (tn >= FACTORS_PER_LIMB);
+
+ prod = 1;
+#if TUNE_PROGRAM_BUILD
+ max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD_LIMIT;
+#else
+ max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD;
+#endif
+
+ ASSERT (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
+ do {
+ i = ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2;
+ factors[j++] = ODD_DOUBLEFACTORIAL_TABLE_MAX;
+ do {
+ FACTOR_LIST_STORE (i, prod, max_prod, factors, j);
+ i += 2;
+ } while (i <= tn);
+ max_prod <<= 1;
+ tn >>= 1;
+ } while (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
+
+ factors[j++] = prod;
+ factors[j++] = __gmp_odd2fac_table[(tn - 1) >> 1];
+ factors[j++] = __gmp_oddfac_table[tn >> 1];
+ mpz_prodlimbs (x, factors, j);
+
+ TMP_SFREE;
+ }
+
+ if (s != 0)
+ /* Use the algorithm described by Peter Luschny in "Divide,
+ Swing and Conquer the Factorial!".
+
+ Improvement: there are two temporary buffers, factors and
+ square, that are never used together; with a good estimate
+ of the maximal needed size, they could share a single
+ allocation.
+ */
+ {
+ mpz_t mswing;
+ mp_ptr sieve;
+ mp_size_t size;
+ TMP_DECL;
+
+ TMP_MARK;
+
+ flag--;
+ size = n / GMP_NUMB_BITS + 4;
+ ASSERT (primesieve_size (n - 1) <= size - (size / 2 + 1));
+ /* 2-multiswing(n) < 2^(n-1)*sqrt(n/pi) < 2^(n+GMP_NUMB_BITS);
+ one more can be overwritten by mul, another for the sieve */
+ MPZ_TMP_INIT (mswing, size);
+ /* Initialize size, so that ASSERT can check it correctly. */
+ ASSERT_CODE (SIZ (mswing) = 0);
+
+ /* Put the sieve on the second half, it will be overwritten by the last mswing. */
+ sieve = PTR (mswing) + size / 2 + 1;
+
+ size = (gmp_primesieve (sieve, n - 1) + 1) / log_n_max (n) + 1;
+
+ factors = TMP_ALLOC_LIMBS (size);
+ do {
+ mp_ptr square, px;
+ mp_size_t nx, ns;
+ mp_limb_t cy;
+ TMP_DECL;
+
+ s--;
+ ASSERT (ABSIZ (mswing) < ALLOC (mswing) / 2); /* Check: sieve has not been overwritten */
+ mpz_2multiswing_1 (mswing, n >> s, sieve, factors);
+
+ TMP_MARK;
+ nx = SIZ (x);
+ if (s == flag) {
+ size = nx;
+ square = TMP_ALLOC_LIMBS (size);
+ MPN_COPY (square, PTR (x), nx);
+ } else {
+ size = nx << 1;
+ square = TMP_ALLOC_LIMBS (size);
+ mpn_sqr (square, PTR (x), nx);
+ size -= (square[size - 1] == 0);
+ }
+ ns = SIZ (mswing);
+ nx = size + ns;
+ px = MPZ_NEWALLOC (x, nx);
+ ASSERT (ns <= size);
+ cy = mpn_mul (px, square, size, PTR(mswing), ns); /* n!= n$ * floor(n/2)!^2 */
+
+ TMP_FREE;
+ SIZ(x) = nx - (cy == 0);
+ } while (s != 0);
+ TMP_FREE;
+ }
+ }
+}
+
+#undef FACTORS_PER_LIMB
+#undef FACTOR_LIST_STORE
i = abs_xsize;
if (GMP_NAIL_BITS == 0)
- {
- /* reverse limb order, and byte swap if necessary */
+ {
+ /* reverse limb order, and byte swap if necessary */
#ifdef _CRAY
- _Pragma ("_CRI ivdep");
+ _Pragma ("_CRI ivdep");
#endif
- do
- {
- bp -= BYTES_PER_MP_LIMB;
- xlimb = *xp;
- HTON_LIMB_STORE ((mp_ptr) bp, xlimb);
- xp++;
- }
- while (--i > 0);
-
- /* strip high zero bytes (without fetching from bp) */
- count_leading_zeros (zeros, xlimb);
- zeros /= 8;
- bp += zeros;
- bytes -= zeros;
- }
+ do
+ {
+ bp -= BYTES_PER_MP_LIMB;
+ xlimb = *xp;
+ HTON_LIMB_STORE ((mp_ptr) bp, xlimb);
+ xp++;
+ }
+ while (--i > 0);
+
+ /* strip high zero bytes (without fetching from bp) */
+ count_leading_zeros (zeros, xlimb);
+ zeros /= 8;
+ bp += zeros;
+ bytes -= zeros;
+ }
else
- {
- mp_limb_t new_xlimb;
- int bits;
- ASSERT_CODE (char *bp_orig = bp - bytes);
-
- ASSERT_ALWAYS (GMP_NUMB_BITS >= 8);
-
- bits = 0;
- xlimb = 0;
- for (;;)
- {
- while (bits >= 8)
- {
- ASSERT (bp > bp_orig);
- *--bp = xlimb & 0xFF;
- xlimb >>= 8;
- bits -= 8;
- }
-
- if (i == 0)
- break;
-
- new_xlimb = *xp++;
- i--;
- ASSERT (bp > bp_orig);
- *--bp = (xlimb | (new_xlimb << bits)) & 0xFF;
- xlimb = new_xlimb >> (8 - bits);
- bits += GMP_NUMB_BITS - 8;
- }
-
- if (bits != 0)
- {
- ASSERT (bp > bp_orig);
- *--bp = xlimb;
- }
-
- ASSERT (bp == bp_orig);
- while (*bp == 0)
- {
- bp++;
- bytes--;
- }
- }
+ {
+ mp_limb_t new_xlimb;
+ int bits;
+ ASSERT_CODE (char *bp_orig = bp - bytes);
+
+ ASSERT_ALWAYS (GMP_NUMB_BITS >= 8);
+
+ bits = 0;
+ xlimb = 0;
+ for (;;)
+ {
+ while (bits >= 8)
+ {
+ ASSERT (bp > bp_orig);
+ *--bp = xlimb & 0xFF;
+ xlimb >>= 8;
+ bits -= 8;
+ }
+
+ if (i == 0)
+ break;
+
+ new_xlimb = *xp++;
+ i--;
+ ASSERT (bp > bp_orig);
+ *--bp = (xlimb | (new_xlimb << bits)) & 0xFF;
+ xlimb = new_xlimb >> (8 - bits);
+ bits += GMP_NUMB_BITS - 8;
+ }
+
+ if (bits != 0)
+ {
+ ASSERT (bp > bp_orig);
+ *--bp = xlimb;
+ }
+
+ ASSERT (bp == bp_orig);
+ while (*bp == 0)
+ {
+ bp++;
+ bytes--;
+ }
+ }
}
/* total bytes to be written */
/* mpz_out_str(stream, base, integer) -- Output to STREAM the multi prec.
integer INTEGER in base BASE.
-Copyright 1991, 1993, 1994, 1996, 2001, 2005 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 2001, 2005, 2011, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
#include <stdio.h>
#include "gmp.h"
#include "gmp-impl.h"
+#include "longlong.h"
size_t
mpz_out_str (FILE *stream, int base, mpz_srcptr x)
{
mp_ptr xp;
- mp_size_t x_size = x->_mp_size;
+ mp_size_t x_size = SIZ (x);
unsigned char *str;
size_t str_size;
size_t i;
size_t written;
- char *num_to_text;
+ const char *num_to_text;
TMP_DECL;
if (stream == 0)
if (base >= 0)
{
num_to_text = "0123456789abcdefghijklmnopqrstuvwxyz";
- if (base == 0)
+ if (base <= 1)
base = 10;
else if (base > 36)
{
else
{
base = -base;
+ if (base <= 1)
+ base = 10;
+ else if (base > 36)
+ return 0;
num_to_text = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
}
- if (x_size == 0)
- {
- fputc ('0', stream);
- return ferror (stream) ? 0 : 1;
- }
-
written = 0;
if (x_size < 0)
}
TMP_MARK;
- str_size = ((size_t) (x_size * GMP_LIMB_BITS
- * mp_bases[base].chars_per_bit_exactly)) + 3;
- str = (unsigned char *) TMP_ALLOC (str_size);
- /* Move the number to convert into temporary space, since mpn_get_str
- clobbers its argument + needs one extra high limb.... */
- xp = TMP_ALLOC_LIMBS (x_size + 1);
- MPN_COPY (xp, x->_mp_d, x_size);
-
- str_size = mpn_get_str (str, base, xp, x_size);
+ DIGITS_IN_BASE_PER_LIMB (str_size, x_size, base);
+ str_size += 3;
+ str = (unsigned char *) TMP_ALLOC (str_size);
- /* mpn_get_str might make some leading zeros. Skip them. */
- while (*str == 0)
+ xp = PTR (x);
+ if (! POW2_P (base))
{
- str_size--;
- str++;
+ xp = TMP_ALLOC_LIMBS (x_size | 1); /* |1 in case x_size==0 */
+ MPN_COPY (xp, PTR (x), x_size);
}
- /* Translate to printable chars. */
+ str_size = mpn_get_str (str, base, xp, x_size);
+
+ /* Convert result to printable chars. */
for (i = 0; i < str_size; i++)
str[i] = num_to_text[str[i]];
str[str_size] = 0;
Contributed to the GNU project by Torbjorn Granlund.
-Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005, 2008, 2009
-Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005, 2008,
+2009, 2011, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#include "gmp.h"
#include "gmp-impl.h"
#include "longlong.h"
-#ifdef BERKELEY_MP
-#include "mp.h"
-#endif
/* TODO
#define HANDLE_NEGATIVE_EXPONENT 1
void
-#ifndef BERKELEY_MP
mpz_powm (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m)
-#else /* BERKELEY_MP */
-pow (mpz_srcptr b, mpz_srcptr e, mpz_srcptr m, mpz_ptr r)
-#endif /* BERKELEY_MP */
{
mp_size_t n, nodd, ncnt;
int cnt;
mp_ptr rp, tp;
mp_srcptr bp, ep, mp;
mp_size_t rn, bn, es, en, itch;
+ mpz_t new_b; /* note: value lives long via 'b' */
TMP_DECL;
n = ABSIZ(m);
- if (n == 0)
+ if (UNLIKELY (n == 0))
DIVIDE_BY_ZERO;
mp = PTR(m);
es = SIZ(e);
if (UNLIKELY (es <= 0))
{
- mpz_t new_b;
if (es == 0)
{
/* b^0 mod m, b is anything and m is non-zero.
#if HANDLE_NEGATIVE_EXPONENT
MPZ_TMP_INIT (new_b, n + 1);
- if (! mpz_invert (new_b, b, m))
+ if (UNLIKELY (! mpz_invert (new_b, b, m)))
DIVIDE_BY_ZERO;
b = new_b;
es = -es;
cnt = 0;
if (mp[0] % 2 == 0)
{
- mp_ptr new = TMP_ALLOC_LIMBS (nodd);
+ mp_ptr newmp = TMP_ALLOC_LIMBS (nodd);
count_trailing_zeros (cnt, mp[0]);
- mpn_rshift (new, mp, nodd, cnt);
- nodd -= new[nodd - 1] == 0;
- mp = new;
+ mpn_rshift (newmp, mp, nodd, cnt);
+ nodd -= newmp[nodd - 1] == 0;
+ mp = newmp;
ncnt++;
}
if (bn < ncnt)
{
- mp_ptr new = TMP_ALLOC_LIMBS (ncnt);
- MPN_COPY (new, bp, bn);
- MPN_ZERO (new + bn, ncnt - bn);
- bp = new;
+ mp_ptr newbp = TMP_ALLOC_LIMBS (ncnt);
+ MPN_COPY (newbp, bp, bn);
+ MPN_ZERO (newbp + bn, ncnt - bn);
+ bp = newbp;
}
r2 = tp;
zero:
if (nodd < ncnt)
{
- mp_ptr new = TMP_ALLOC_LIMBS (ncnt);
- MPN_COPY (new, mp, nodd);
- MPN_ZERO (new + nodd, ncnt - nodd);
- mp = new;
+ mp_ptr newmp = TMP_ALLOC_LIMBS (ncnt);
+ MPN_COPY (newmp, mp, nodd);
+ MPN_ZERO (newmp + nodd, ncnt - nodd);
+ mp = newmp;
}
odd_inv_2exp = tp + n;
Contributed to the GNU project by Torbjorn Granlund.
-Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005, 2008, 2009
-Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005, 2008, 2009,
+2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
TMP_DECL;
n = ABSIZ(m);
- if (n == 0)
- DIVIDE_BY_ZERO;
mp = PTR(m);
- if (mp[0] % 2 == 0)
+ if (UNLIKELY ((n == 0) || (mp[0] % 2 == 0)))
DIVIDE_BY_ZERO;
es = SIZ(e);
if (UNLIKELY (es <= 0))
{
- mpz_t new_b;
if (es == 0)
{
/* b^0 mod m, b is anything and m is non-zero.
-/* mpz_powm_ui(res,base,exp,mod) -- Set RES to (base**exp) mod MOD.
+/* mpz_powm_ui(res,base,exp,mod) -- Set R to (U^E) mod M.
-Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005 Free Software
-Foundation, Inc.
+ Contributed to the GNU project by Torbjorn Granlund.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2005, 2008, 2009,
+2011, 2012, 2013 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#include "gmp-impl.h"
#include "longlong.h"
-/* Compute t = a mod m, a is defined by (ap,an), m is defined by (mp,mn), and
- t is defined by (tp,mn). */
+
+/* This code is very old, and should be rewritten to current GMP standard. It
+ is slower than mpz_powm for large exponents, but also for small exponents
+ when the mod argument is small.
+
+ As an intermediate solution, we now deflect to mpz_powm for exponents >= 20.
+*/
+
+/*
+ b ^ e mod m res
+ 0 0 0 ?
+ 0 e 0 ?
+ 0 0 m ?
+ 0 e m 0
+ b 0 0 ?
+ b e 0 ?
+ b 0 m 1 mod m
+ b e m b^e mod m
+*/
+
static void
-reduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn)
+mod (mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, gmp_pi1_t *dinv, mp_ptr tp)
{
mp_ptr qp;
TMP_DECL;
-
TMP_MARK;
- qp = TMP_ALLOC_LIMBS (an - mn + 1);
- mpn_tdiv_qr (qp, tp, 0L, ap, an, mp, mn);
+ qp = tp;
+
+ if (dn == 1)
+ np[0] = mpn_divrem_1 (qp, (mp_size_t) 0, np, nn, dp[0]);
+ else if (dn == 2)
+ mpn_div_qr_2n_pi1 (qp, np, np, nn, dp[1], dp[0], dinv->inv32);
+ else if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD) ||
+ BELOW_THRESHOLD (nn - dn, DC_DIV_QR_THRESHOLD))
+ mpn_sbpi1_div_qr (qp, np, nn, dp, dn, dinv->inv32);
+ else if (BELOW_THRESHOLD (dn, MUPI_DIV_QR_THRESHOLD) || /* fast condition */
+ BELOW_THRESHOLD (nn, 2 * MU_DIV_QR_THRESHOLD) || /* fast condition */
+ (double) (2 * (MU_DIV_QR_THRESHOLD - MUPI_DIV_QR_THRESHOLD)) * dn /* slow... */
+ + (double) MUPI_DIV_QR_THRESHOLD * nn > (double) dn * nn) /* ...condition */
+ {
+ mpn_dcpi1_div_qr (qp, np, nn, dp, dn, dinv);
+ }
+ else
+ {
+ /* We need to allocate separate remainder area, since mpn_mu_div_qr does
+ not handle overlap between the numerator and remainder areas.
+ FIXME: Make it handle such overlap. */
+ mp_ptr rp = TMP_ALLOC_LIMBS (dn);
+ mp_size_t itch = mpn_mu_div_qr_itch (nn, dn, 0);
+ mp_ptr scratch = TMP_ALLOC_LIMBS (itch);
+ mpn_mu_div_qr (qp, rp, np, nn, dp, dn, scratch);
+ MPN_COPY (np, rp, dn);
+ }
TMP_FREE;
}
-void
-mpz_powm_ui (mpz_ptr r, mpz_srcptr b, unsigned long int el, mpz_srcptr m)
+/* Compute t = a mod m, a is defined by (ap,an), m is defined by (mp,mn), and
+ t is defined by (tp,mn). */
+static void
+reduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn, gmp_pi1_t *dinv)
{
- mp_ptr xp, tp, qp, mp, bp;
- mp_size_t xn, tn, mn, bn;
- int m_zero_cnt;
- int c;
- mp_limb_t e;
+ mp_ptr rp, scratch;
TMP_DECL;
+ TMP_MARK;
+
+ rp = TMP_ALLOC_LIMBS (an);
+ scratch = TMP_ALLOC_LIMBS (an - mn + 1);
+ MPN_COPY (rp, ap, an);
+ mod (rp, an, mp, mn, dinv, scratch);
+ MPN_COPY (tp, rp, mn);
- mp = PTR(m);
- mn = ABSIZ(m);
- if (mn == 0)
- DIVIDE_BY_ZERO;
+ TMP_FREE;
+}
- if (el == 0)
+void
+mpz_powm_ui (mpz_ptr r, mpz_srcptr b, unsigned long int el, mpz_srcptr m)
+{
+ if (el < 20)
{
- /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0
- depending on if MOD equals 1. */
- SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1;
- PTR(r)[0] = 1;
- return;
- }
+ mp_ptr xp, tp, mp, bp, scratch;
+ mp_size_t xn, tn, mn, bn;
+ int m_zero_cnt;
+ int c;
+ mp_limb_t e, m2;
+ gmp_pi1_t dinv;
+ TMP_DECL;
- TMP_MARK;
+ mp = PTR(m);
+ mn = ABSIZ(m);
+ if (UNLIKELY (mn == 0))
+ DIVIDE_BY_ZERO;
- /* Normalize m (i.e. make its most significant bit set) as required by
- division functions below. */
- count_leading_zeros (m_zero_cnt, mp[mn - 1]);
- m_zero_cnt -= GMP_NAIL_BITS;
- if (m_zero_cnt != 0)
- {
- mp_ptr new_mp = TMP_ALLOC_LIMBS (mn);
- mpn_lshift (new_mp, mp, mn, m_zero_cnt);
- mp = new_mp;
- }
+ if (el == 0)
+ {
+ /* Exponent is zero, result is 1 mod M, i.e., 1 or 0 depending on if
+ M equals 1. */
+ SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1;
+ PTR(r)[0] = 1;
+ return;
+ }
- bn = ABSIZ(b);
- bp = PTR(b);
- if (bn > mn)
- {
- /* Reduce possibly huge base. Use a function call to reduce, since we
- don't want the quotient allocation to live until function return. */
- mp_ptr new_bp = TMP_ALLOC_LIMBS (mn);
- reduce (new_bp, bp, bn, mp, mn);
- bp = new_bp;
- bn = mn;
- /* Canonicalize the base, since we are potentially going to multiply with
- it quite a few times. */
- MPN_NORMALIZE (bp, bn);
- }
+ TMP_MARK;
- if (bn == 0)
- {
- SIZ(r) = 0;
- TMP_FREE;
- return;
- }
+ /* Normalize m (i.e. make its most significant bit set) as required by
+ division functions below. */
+ count_leading_zeros (m_zero_cnt, mp[mn - 1]);
+ m_zero_cnt -= GMP_NAIL_BITS;
+ if (m_zero_cnt != 0)
+ {
+ mp_ptr new_mp = TMP_ALLOC_LIMBS (mn);
+ mpn_lshift (new_mp, mp, mn, m_zero_cnt);
+ mp = new_mp;
+ }
- tp = TMP_ALLOC_LIMBS (2 * mn + 1);
- xp = TMP_ALLOC_LIMBS (mn);
+ m2 = mn == 1 ? 0 : mp[mn - 2];
+ invert_pi1 (dinv, mp[mn - 1], m2);
- qp = TMP_ALLOC_LIMBS (mn + 1);
+ bn = ABSIZ(b);
+ bp = PTR(b);
+ if (bn > mn)
+ {
+ /* Reduce possibly huge base. Use a function call to reduce, since we
+ don't want the quotient allocation to live until function return. */
+ mp_ptr new_bp = TMP_ALLOC_LIMBS (mn);
+ reduce (new_bp, bp, bn, mp, mn, &dinv);
+ bp = new_bp;
+ bn = mn;
+ /* Canonicalize the base, since we are potentially going to multiply with
+ it quite a few times. */
+ MPN_NORMALIZE (bp, bn);
+ }
- MPN_COPY (xp, bp, bn);
- xn = bn;
+ if (bn == 0)
+ {
+ SIZ(r) = 0;
+ TMP_FREE;
+ return;
+ }
- e = el;
- count_leading_zeros (c, e);
- e = (e << c) << 1; /* shift the exp bits to the left, lose msb */
- c = GMP_LIMB_BITS - 1 - c;
+ tp = TMP_ALLOC_LIMBS (2 * mn + 1);
+ xp = TMP_ALLOC_LIMBS (mn);
+ scratch = TMP_ALLOC_LIMBS (mn + 1);
- /* Main loop. */
+ MPN_COPY (xp, bp, bn);
+ xn = bn;
- /* If m is already normalized (high bit of high limb set), and b is the
- same size, but a bigger value, and e==1, then there's no modular
- reductions done and we can end up with a result out of range at the
- end. */
- if (c == 0)
- {
- if (xn == mn && mpn_cmp (xp, mp, mn) >= 0)
- mpn_sub_n (xp, xp, mp, mn);
- goto finishup;
- }
+ e = el;
+ count_leading_zeros (c, e);
+ e = (e << c) << 1; /* shift the exp bits to the left, lose msb */
+ c = GMP_LIMB_BITS - 1 - c;
- while (c != 0)
- {
- mpn_sqr (tp, xp, xn);
- tn = 2 * xn; tn -= tp[tn - 1] == 0;
- if (tn < mn)
+ if (c == 0)
{
- MPN_COPY (xp, tp, tn);
- xn = tn;
+ /* If m is already normalized (high bit of high limb set), and b is
+ the same size, but a bigger value, and e==1, then there's no
+ modular reductions done and we can end up with a result out of
+ range at the end. */
+ if (xn == mn && mpn_cmp (xp, mp, mn) >= 0)
+ mpn_sub_n (xp, xp, mp, mn);
}
else
{
- mpn_tdiv_qr (qp, xp, 0L, tp, tn, mp, mn);
- xn = mn;
+ /* Main loop. */
+ do
+ {
+ mpn_sqr (tp, xp, xn);
+ tn = 2 * xn; tn -= tp[tn - 1] == 0;
+ if (tn < mn)
+ {
+ MPN_COPY (xp, tp, tn);
+ xn = tn;
+ }
+ else
+ {
+ mod (tp, tn, mp, mn, &dinv, scratch);
+ MPN_COPY (xp, tp, mn);
+ xn = mn;
+ }
+
+ if ((mp_limb_signed_t) e < 0)
+ {
+ mpn_mul (tp, xp, xn, bp, bn);
+ tn = xn + bn; tn -= tp[tn - 1] == 0;
+ if (tn < mn)
+ {
+ MPN_COPY (xp, tp, tn);
+ xn = tn;
+ }
+ else
+ {
+ mod (tp, tn, mp, mn, &dinv, scratch);
+ MPN_COPY (xp, tp, mn);
+ xn = mn;
+ }
+ }
+ e <<= 1;
+ c--;
+ }
+ while (c != 0);
}
- if ((mp_limb_signed_t) e < 0)
+ /* We shifted m left m_zero_cnt steps. Adjust the result by reducing it
+ with the original M. */
+ if (m_zero_cnt != 0)
{
- mpn_mul (tp, xp, xn, bp, bn);
- tn = xn + bn; tn -= tp[tn - 1] == 0;
- if (tn < mn)
+ mp_limb_t cy;
+ cy = mpn_lshift (tp, xp, xn, m_zero_cnt);
+ tp[xn] = cy; xn += cy != 0;
+
+ if (xn < mn)
{
- MPN_COPY (xp, tp, tn);
- xn = tn;
+ MPN_COPY (xp, tp, xn);
}
else
{
- mpn_tdiv_qr (qp, xp, 0L, tp, tn, mp, mn);
+ mod (tp, xn, mp, mn, &dinv, scratch);
+ MPN_COPY (xp, tp, mn);
xn = mn;
}
+ mpn_rshift (xp, xp, xn, m_zero_cnt);
}
- e <<= 1;
- c--;
- }
-
- finishup:
- /* We shifted m left m_zero_cnt steps. Adjust the result by reducing
- it with the original MOD. */
- if (m_zero_cnt != 0)
- {
- mp_limb_t cy;
- cy = mpn_lshift (tp, xp, xn, m_zero_cnt);
- tp[xn] = cy; xn += cy != 0;
+ MPN_NORMALIZE (xp, xn);
- if (xn < mn)
+ if ((el & 1) != 0 && SIZ(b) < 0 && xn != 0)
{
- MPN_COPY (xp, tp, xn);
- }
- else
- {
- mpn_tdiv_qr (qp, xp, 0L, tp, xn, mp, mn);
+ mp = PTR(m); /* want original, unnormalized m */
+ mpn_sub (xp, mp, mn, xp, xn);
xn = mn;
+ MPN_NORMALIZE (xp, xn);
}
- mpn_rshift (xp, xp, xn, m_zero_cnt);
- }
- MPN_NORMALIZE (xp, xn);
+ MPZ_REALLOC (r, xn);
+ SIZ (r) = xn;
+ MPN_COPY (PTR(r), xp, xn);
- if ((el & 1) != 0 && SIZ(b) < 0 && xn != 0)
+ TMP_FREE;
+ }
+ else
{
- mp = PTR(m); /* want original, unnormalized m */
- mpn_sub (xp, mp, mn, xp, xn);
- xn = mn;
- MPN_NORMALIZE (xp, xn);
+ /* For large exponents, fake a mpz_t exponent and deflect to the more
+ sophisticated mpz_powm. */
+ mpz_t e;
+ mp_limb_t ep[LIMBS_PER_ULONG];
+ MPZ_FAKE_UI (e, ep, el);
+ mpz_powm (r, b, e, m);
}
- MPZ_REALLOC (r, xn);
- SIZ (r) = xn;
- MPN_COPY (PTR(r), xp, xn);
-
- TMP_FREE;
}
#include "gmp-impl.h"
#include "longlong.h"
-static int isprime __GMP_PROTO ((unsigned long int));
+static int isprime (unsigned long int);
/* MPN_MOD_OR_MODEXACT_1_ODD can be used instead of mpn_mod_1 for the trial
/* Check if n has small factors. */
#if defined (PP_INVERTED)
r = MPN_MOD_OR_PREINV_MOD_1 (PTR(n), (mp_size_t) SIZ(n), (mp_limb_t) PP,
- (mp_limb_t) PP_INVERTED);
+ (mp_limb_t) PP_INVERTED);
#else
r = mpn_mod_1 (PTR(n), (mp_size_t) SIZ(n), (mp_limb_t) PP);
#endif
--- /dev/null
+/* mpz_primorial_ui(RESULT, N) -- Set RESULT to N# the product of primes <= N.
+
+Contributed to the GNU project by Marco Bodrato.
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/* TODO: Remove duplicated constants / macros / static functions...
+ */
+
+/*************************************************************/
+/* Section macros: common macros, for swing/fac/bin (&sieve) */
+/*************************************************************/
+
+#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \
+ do { \
+ if ((PR) > (MAX_PR)) { \
+ (VEC)[(I)++] = (PR); \
+ (PR) = (P); \
+ } else \
+ (PR) *= (P); \
+ } while (0)
+
+#define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) \
+ __max_i = (end); \
+ \
+ do { \
+ ++__i; \
+ if (((sieve)[__index] & __mask) == 0) \
+ { \
+ (prime) = id_to_n(__i)
+
+#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \
+ do { \
+ mp_limb_t __mask, __index, __max_i, __i; \
+ \
+ __i = (start)-(off); \
+ __index = __i / GMP_LIMB_BITS; \
+ __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \
+ __i += (off); \
+ \
+ LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)
+
+#define LOOP_ON_SIEVE_STOP \
+ } \
+ __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \
+ __index += __mask & 1; \
+ } while (__i <= __max_i) \
+
+#define LOOP_ON_SIEVE_END \
+ LOOP_ON_SIEVE_STOP; \
+ } while (0)
+
+/*********************************************************/
+/* Section sieve: sieving functions and tools for primes */
+/*********************************************************/
+
+#if WANT_ASSERT
+static mp_limb_t
+bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
+#endif
+
+/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
+static mp_limb_t
+id_to_n (mp_limb_t id) { return id*3+1+(id&1); }
+
+/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
+static mp_limb_t
+n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
+
+#if WANT_ASSERT
+static mp_size_t
+primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
+#endif
+
+/*********************************************************/
+/* Section primorial: implementation */
+/*********************************************************/
+
+void
+mpz_primorial_ui (mpz_ptr x, unsigned long n)
+{
+ static const mp_limb_t table[] = { 1, 1, 2, 6, 6 };
+
+ ASSERT (n <= GMP_NUMB_MAX);
+
+ if (n < numberof (table))
+ {
+ PTR (x)[0] = table[n];
+ SIZ (x) = 1;
+ }
+ else
+ {
+ mp_limb_t *sieve, *factors;
+ mp_size_t size;
+ mp_limb_t prod;
+ mp_limb_t j;
+ TMP_DECL;
+
+ size = 1 + n / GMP_NUMB_BITS + n / (2*GMP_NUMB_BITS);
+ ASSERT (size >= primesieve_size (n));
+ sieve = MPZ_REALLOC (x, size);
+ size = (gmp_primesieve (sieve, n) + 1) / log_n_max (n) + 1;
+
+ TMP_MARK;
+ factors = TMP_ALLOC_LIMBS (size);
+
+ j = 0;
+
+ prod = table[numberof (table)-1];
+
+ /* Store primes from 5 to n */
+ {
+ mp_limb_t prime, max_prod;
+
+ max_prod = GMP_NUMB_MAX / n;
+
+ LOOP_ON_SIEVE_BEGIN (prime, n_to_bit(numberof (table)), n_to_bit (n), 0, sieve);
+ FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
+ LOOP_ON_SIEVE_END;
+ }
+
+ if (j != 0)
+ {
+ factors[j++] = prod;
+ mpz_prodlimbs (x, factors, j);
+ }
+ else
+ {
+ PTR (x)[0] = prod;
+ SIZ (x) = 1;
+ }
+
+ TMP_FREE;
+ }
+}
--- /dev/null
+/* mpz_prodlimps(RESULT, V, LEN) -- Set RESULT to V[0]*V[1]*...*V[LEN-1].
+
+Contributed to the GNU project by Marco Bodrato.
+
+THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.
+IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
+IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR
+DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2010, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/*********************************************************/
+/* Section list-prod: product of a list -> mpz_t */
+/*********************************************************/
+
+/* FIXME: should be tuned */
+#ifndef RECURSIVE_PROD_THRESHOLD
+#define RECURSIVE_PROD_THRESHOLD (MUL_TOOM22_THRESHOLD)
+#endif
+
+/* Computes the product of the j>1 limbs pointed by factors, puts the
+ * result in x. It assumes that all limbs are non-zero. Above
+ * Karatsuba's threshold it uses a binary splitting startegy, to gain
+ * speed by the asymptotically fast multiplication algorithms.
+ *
+ * The list in {factors, j} is overwritten.
+ * Returns the size of the result
+ */
+
+mp_size_t
+mpz_prodlimbs (mpz_ptr x, mp_ptr factors, mp_size_t j)
+{
+ mp_limb_t cy;
+ mp_size_t size, i;
+ mp_ptr prod;
+
+ ASSERT (j > 1);
+ ASSERT (RECURSIVE_PROD_THRESHOLD > 3);
+
+ if (BELOW_THRESHOLD (j, RECURSIVE_PROD_THRESHOLD)) {
+ j--;
+ size = 1;
+
+ for (i = 1; i < j; i++)
+ {
+ cy = mpn_mul_1 (factors, factors, size, factors[i]);
+ factors[size] = cy;
+ size += cy != 0;
+ };
+
+ prod = MPZ_NEWALLOC (x, size + 1);
+
+ cy = mpn_mul_1 (prod, factors, size, factors[i]);
+ prod[size] = cy;
+ return SIZ (x) = size + (cy != 0);
+ } else {
+ mpz_t x1, x2;
+ TMP_DECL;
+
+ i = j >> 1;
+ j -= i;
+ TMP_MARK;
+
+ MPZ_TMP_INIT (x2, j);
+
+ PTR (x1) = factors + i;
+ ALLOC (x1) = j;
+ j = mpz_prodlimbs (x2, factors + i, j);
+ i = mpz_prodlimbs (x1, factors, i);
+ size = i + j;
+ prod = MPZ_NEWALLOC (x, size);
+ if (i >= j)
+ cy = mpn_mul (prod, PTR(x1), i, PTR(x2), j);
+ else
+ cy = mpn_mul (prod, PTR(x2), j, PTR(x1), i);
+ TMP_FREE;
+
+ return SIZ (x) = size - (cy == 0);
+ }
+}
long runs of consecutive ones and zeros in the binary representation.
Meant for testing of other MP routines.
-Copyright 1991, 1993, 1994, 1996, 2001 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 2001, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
mpz_random2 (mpz_ptr x, mp_size_t size)
{
mp_size_t abs_size;
+ mp_ptr xp;
abs_size = ABS (size);
if (abs_size != 0)
{
- if (x->_mp_alloc < abs_size)
- _mpz_realloc (x, abs_size);
+ xp = MPZ_REALLOC (x, abs_size);
- mpn_random2 (x->_mp_d, abs_size);
+ mpn_random2 (xp, abs_size);
}
- x->_mp_size = size;
+ SIZ (x) = size;
}
/* mpz_remove -- divide out a factor and return its multiplicity.
-Copyright 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+Copyright 1998, 1999, 2000, 2001, 2002, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
mp_bitcnt_t
mpz_remove (mpz_ptr dest, mpz_srcptr src, mpz_srcptr f)
{
- mpz_t fpow[GMP_LIMB_BITS]; /* Really MP_SIZE_T_BITS */
- mpz_t x, rem;
mp_bitcnt_t pwr;
- int p;
-
- if (mpz_cmp_ui (f, 1) <= 0)
- DIVIDE_BY_ZERO;
-
- if (SIZ (src) == 0)
+ mp_srcptr fp;
+ mp_size_t sn, fn, afn;
+ mp_limb_t fp0;
+
+ sn = SIZ (src);
+ fn = SIZ (f);
+ fp = PTR (f);
+ afn = ABS (fn);
+ fp0 = fp[0];
+
+ if (UNLIKELY ((afn <= (fp0 == 1)) /* mpz_cmpabs_ui (f, 1) <= 0 */
+ | (sn == 0)))
{
- if (src != dest)
- mpz_set (dest, src);
+ /* f = 0 or f = +- 1 or src = 0 */
+ if (afn == 0)
+ DIVIDE_BY_ZERO;
+ mpz_set (dest, src);
return 0;
}
- if (mpz_cmp_ui (f, 2) == 0)
- {
- mp_bitcnt_t s0;
- s0 = mpz_scan1 (src, 0);
- mpz_div_2exp (dest, src, s0);
- return s0;
- }
-
- /* We could perhaps compute mpz_scan1(src,0)/mpz_scan1(f,0). It is an
- upper bound of the result we're seeking. We could also shift down the
- operands so that they become odd, to make intermediate values smaller. */
+ if ((fp0 & 1) != 0)
+ { /* f is odd */
+ mp_ptr dp;
+ mp_size_t dn;
- mpz_init (rem);
- mpz_init (x);
+ dn = ABS (sn);
+ dp = MPZ_REALLOC (dest, dn);
- pwr = 0;
- mpz_init (fpow[0]);
- mpz_set (fpow[0], f);
- mpz_set (dest, src);
+ pwr = mpn_remove (dp, &dn, PTR(src), dn, PTR(f), afn, ~(mp_bitcnt_t) 0);
- /* Divide by f, f^2, ..., f^(2^k) until we get a remainder for f^(2^k). */
- for (p = 0;; p++)
- {
- mpz_tdiv_qr (x, rem, dest, fpow[p]);
- if (SIZ (rem) != 0)
- break;
- mpz_init (fpow[p + 1]);
- mpz_mul (fpow[p + 1], fpow[p], fpow[p]);
- mpz_set (dest, x);
+ SIZ (dest) = ((pwr & (fn < 0)) ^ (sn < 0)) ? -dn : dn;
}
+ else if (afn == (fp0 == 2))
+ { /* mpz_cmpabs_ui (f, 2) == 0 */
+ pwr = mpz_scan1 (src, 0);
+ mpz_div_2exp (dest, src, pwr);
+ if (pwr & (fn < 0)) /*((pwr % 2 == 1) && (SIZ (f) < 0))*/
+ mpz_neg (dest, dest);
+ }
+ else
+ { /* f != +-2 */
+ mpz_t fpow[GMP_LIMB_BITS]; /* Really MP_SIZE_T_BITS */
+ mpz_t x, rem;
+ int p;
+
+ /* We could perhaps compute mpz_scan1(src,0)/mpz_scan1(f,0). It is an
+ upper bound of the result we're seeking. We could also shift down the
+ operands so that they become odd, to make intermediate values
+ smaller. */
+
+ mpz_init (rem);
+ mpz_init (x);
+
+ pwr = 0;
+ mpz_init_set (fpow[0], f);
+ mpz_set (dest, src);
+
+ /* Divide by f, f^2 ... f^(2^k) until we get a remainder for f^(2^k). */
+ for (p = 0;; p++)
+ {
+ mpz_tdiv_qr (x, rem, dest, fpow[p]);
+ if (SIZ (rem) != 0)
+ break;
+ mpz_init (fpow[p + 1]);
+ mpz_mul (fpow[p + 1], fpow[p], fpow[p]);
+ mpz_set (dest, x);
+ }
- pwr = (1L << p) - 1;
+ pwr = ((mp_bitcnt_t)1 << p) - 1;
- mpz_clear (fpow[p]);
+ mpz_clear (fpow[p]);
- /* Divide by f^(2^(k-1)), f^(2^(k-2)), ..., f for all divisors that give a
- zero remainder. */
- while (--p >= 0)
- {
- mpz_tdiv_qr (x, rem, dest, fpow[p]);
- if (SIZ (rem) == 0)
+ /* Divide by f^(2^(k-1)), f^(2^(k-2)), ..., f for all divisors that give
+ a zero remainder. */
+ while (--p >= 0)
{
- pwr += 1L << p;
- mpz_set (dest, x);
+ mpz_tdiv_qr (x, rem, dest, fpow[p]);
+ if (SIZ (rem) == 0)
+ {
+ pwr += (mp_bitcnt_t)1 << p;
+ mpz_set (dest, x);
+ }
+ mpz_clear (fpow[p]);
}
- mpz_clear (fpow[p]);
+
+ mpz_clear (x);
+ mpz_clear (rem);
}
- mpz_clear (x);
- mpz_clear (rem);
return pwr;
}
/* mpz_root(root, u, nth) -- Set ROOT to floor(U^(1/nth)).
Return an indication if the result is exact.
-Copyright 1999, 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
us = SIZ(u);
/* even roots of negatives provoke an exception */
- if (us < 0 && (nth & 1) == 0)
+ if (UNLIKELY (us < 0 && (nth & 1) == 0))
SQRT_OF_NEGATIVE;
/* root extraction interpreted as c^(1/nth) means a zeroth root should
provoke a divide by zero, do this even if c==0 */
- if (nth == 0)
+ if (UNLIKELY (nth == 0))
DIVIDE_BY_ZERO;
if (us == 0)
-/* mpz_rootrem(root, rem, u, nth) -- Set ROOT to floor(U^(1/nth)) and
+/* mpz_rootrem(root, rem, u, nth) -- Set ROOT to trunc(U^(1/nth)) and
set REM to the remainder.
-Copyright 1999, 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
us = SIZ(u);
/* even roots of negatives provoke an exception */
- if (us < 0 && (nth & 1) == 0)
+ if (UNLIKELY (us < 0 && (nth & 1) == 0))
SQRT_OF_NEGATIVE;
/* root extraction interpreted as c^(1/nth) means a zeroth root should
provoke a divide by zero, do this even if c==0 */
- if (nth == 0)
+ if (UNLIKELY (nth == 0))
DIVIDE_BY_ZERO;
if (us == 0)
SIZ(root) = us >= 0 ? rootn : -rootn;
if (u == root)
MPN_COPY (up, rootp, rootn);
- else if (u == rem)
- MPN_COPY (up, remp, remn);
}
- SIZ(rem) = remn;
+ if (u == rem)
+ MPN_COPY (up, remp, remn);
+ SIZ(rem) = us >= 0 ? remn : -remn;
TMP_FREE;
}
long runs of consecutive ones and zeros in the binary representation.
Meant for testing of other MP routines.
-Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2004, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#include "gmp.h"
#include "gmp-impl.h"
-static void gmp_rrandomb __GMP_PROTO ((mp_ptr, gmp_randstate_t, mp_bitcnt_t));
+static void gmp_rrandomb (mp_ptr, gmp_randstate_t, mp_bitcnt_t);
void
mpz_rrandomb (mpz_ptr x, gmp_randstate_t rstate, mp_bitcnt_t nbits)
{
mp_size_t nl;
+ mp_ptr xp;
nl = (nbits + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS;
if (nbits != 0)
{
- MPZ_REALLOC (x, nl);
- gmp_rrandomb (PTR(x), rstate, nbits);
+ xp = MPZ_REALLOC (x, nl);
+ gmp_rrandomb (xp, rstate, nbits);
}
SIZ(x) = nl;
mp_srcptr u_ptr = PTR(u);
mp_size_t size = SIZ(u);
mp_size_t abs_size = ABS(size);
- mp_srcptr u_end = u_ptr + abs_size;
+ mp_srcptr u_end = u_ptr + abs_size - 1;
mp_size_t starting_limb = starting_bit / GMP_NUMB_BITS;
mp_srcptr p = u_ptr + starting_limb;
mp_limb_t limb;
if (starting_limb >= abs_size)
return (size >= 0 ? ~(mp_bitcnt_t) 0 : starting_bit);
+ /* This is an important case, where sign is not relevant! */
+ if (starting_bit == 0)
+ goto short_cut;
+
limb = *p;
if (size >= 0)
{
/* If it's the high limb which is zero after masking, then there's
no 1 bits after starting_bit. */
- p++;
if (p == u_end)
return ~(mp_bitcnt_t) 0;
/* Otherwise search further for a non-zero limb. The high limb is
non-zero, if nothing else. */
- for (;;)
+ search_nonzero:
+ do
{
- limb = *p;
- if (limb != 0)
- break;
+ ASSERT (p != u_end);
p++;
- ASSERT (p < u_end);
+ short_cut:
+ limb = *p;
}
+ while (limb == 0);
}
}
else
{
- mp_srcptr q;
-
/* If there's a non-zero limb before ours then we're in the ones
- complement region. Search from *(p-1) downwards since that might
- give better cache locality, and since a non-zero in the middle of a
- number is perhaps a touch more likely than at the end. */
- q = p;
- while (q != u_ptr)
- {
- q--;
- if (*q != 0)
- goto inverted;
- }
-
- if (limb == 0)
- {
- /* Skip zero limbs, to find the start of twos complement. The
- high limb is non-zero, if nothing else. This search is
- necessary so the -limb is applied at the right spot. */
- do
- {
- p++;
- ASSERT (p < u_end);
- limb = *p;
- }
- while (limb == 0);
-
- /* Apply twos complement, and look for a 1 bit in that. Since
- limb!=0 here, also have (-limb)!=0 so there's certainly a 1
- bit. */
- limb = -limb;
- goto got_limb;
- }
+ complement region. */
+ if (mpn_zero_p (u_ptr, starting_limb)) {
+ if (limb == 0)
+ /* Seeking for the first non-zero bit, it is the same for u and -u. */
+ goto search_nonzero;
- /* Adjust so ~limb implied by searching for 0 bit becomes -limb. */
- limb--;
+ /* Adjust so ~limb implied by searching for 0 bit becomes -limb. */
+ limb--;
+ }
- inverted:
/* Now seeking a 0 bit. */
/* Mask to 1 all bits before starting_bit, thus ignoring them. */
then the zero immediately past the end is the result. */
while (limb == GMP_NUMB_MAX)
{
- p++;
if (p == u_end)
return (mp_bitcnt_t) abs_size * GMP_NUMB_BITS;
+ p++;
limb = *p;
}
limb = ~limb;
}
- got_limb:
ASSERT (limb != 0);
count_trailing_zeros (cnt, limb);
return (mp_bitcnt_t) (p - u_ptr) * GMP_NUMB_BITS + cnt;
/* mpz_set (dest_integer, src_integer) -- Assign DEST_INTEGER from SRC_INTEGER.
-Copyright 1991, 1993, 1994, 1995, 2000 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1995, 2000, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#include "gmp-impl.h"
-#ifdef BERKELEY_MP
-#include "mp.h"
-#define FUNCTION move
-#define ARGUMENTS mpz_srcptr u, mpz_ptr w
-
-#else
-#define FUNCTION mpz_set
-#define ARGUMENTS mpz_ptr w, mpz_srcptr u
-
-#endif
-
-
void
-FUNCTION (ARGUMENTS)
+mpz_set (mpz_ptr w, mpz_srcptr u)
{
mp_ptr wp, up;
mp_size_t usize, size;
- usize = u->_mp_size;
+ usize = SIZ(u);
size = ABS (usize);
- if (w->_mp_alloc < size)
- _mpz_realloc (w, size);
+ wp = MPZ_REALLOC (w, size);
- wp = w->_mp_d;
- up = u->_mp_d;
+ up = PTR(u);
MPN_COPY (wp, up, size);
- w->_mp_size = usize;
+ SIZ(w) = usize;
}
mp_size_t rn;
DOUBLE_NAN_INF_ACTION (d,
- __gmp_invalid_operation (),
- __gmp_invalid_operation ());
+ __gmp_invalid_operation (),
+ __gmp_invalid_operation ());
negative = d < 0;
d = ABS (d);
/* mpz_set_f (dest_integer, src_float) -- Assign DEST_INTEGER from SRC_FLOAT.
-Copyright 1996, 2001 Free Software Foundation, Inc.
+Copyright 1996, 2001, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
return;
}
- MPZ_REALLOC (w, exp);
- wp = PTR(w);
+ wp = MPZ_REALLOC (w, exp);
up = PTR(u);
size = SIZ (u);
/* mpz_set_si(dest,val) -- Assign DEST with a small value VAL.
-Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1995, 2000, 2001, 2002, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
vl = (mp_limb_t) ABS_CAST (unsigned long int, val);
- dest->_mp_d[0] = vl & GMP_NUMB_MASK;
+ PTR (dest)[0] = vl & GMP_NUMB_MASK;
size = vl != 0;
#if GMP_NAIL_BITS != 0
if (vl > GMP_NUMB_MAX)
{
MPZ_REALLOC (dest, 2);
- dest->_mp_d[1] = vl >> GMP_NUMB_BITS;
+ PTR (dest)[1] = vl >> GMP_NUMB_BITS;
size = 2;
}
#endif
- dest->_mp_size = val >= 0 ? size : -size;
+ SIZ (dest) = val >= 0 ? size : -size;
}
the base in the C standard way, i.e. 0xhh...h means base 16,
0oo...o means base 8, otherwise assume base 10.
-Copyright 1991, 1993, 1994, 1996, 1997, 1998, 2000, 2001, 2002, 2003, 2005
-Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 1997, 1998, 2000, 2001, 2002, 2003, 2005,
+2011, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
#include <ctype.h>
#include "gmp.h"
#include "gmp-impl.h"
+#include "longlong.h"
-extern const unsigned char __gmp_digit_value_tab[];
#define digit_value_tab __gmp_digit_value_tab
int
/* Make sure the string does not become empty, mpn_set_str would fail. */
if (c == 0)
{
- x->_mp_size = 0;
+ SIZ (x) = 0;
return 0;
}
str_size = s - begs;
- xsize = 2 + (mp_size_t)
- (str_size / (GMP_NUMB_BITS * mp_bases[base].chars_per_bit_exactly));
+ LIMBS_PER_DIGIT_IN_BASE (xsize, str_size, base);
MPZ_REALLOC (x, xsize);
/* Convert the byte array in base BASE to our bignum format. */
- xsize = mpn_set_str (x->_mp_d, (unsigned char *) begs, str_size, base);
- x->_mp_size = negative ? -xsize : xsize;
+ xsize = mpn_set_str (PTR (x), (unsigned char *) begs, str_size, base);
+ SIZ (x) = negative ? -xsize : xsize;
TMP_FREE;
return 0;
/* mpz_set_ui(integer, val) -- Assign INTEGER with a small value VAL.
-Copyright 1991, 1993, 1994, 1995, 2001, 2002, 2004 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1995, 2001, 2002, 2004, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
{
mp_size_t size;
- dest->_mp_d[0] = val & GMP_NUMB_MASK;
+ PTR (dest)[0] = val & GMP_NUMB_MASK;
size = val != 0;
#if BITS_PER_ULONG > GMP_NUMB_BITS /* avoid warnings about shift amount */
if (val > GMP_NUMB_MAX)
{
MPZ_REALLOC (dest, 2);
- dest->_mp_d[1] = val >> GMP_NUMB_BITS;
+ PTR (dest)[1] = val >> GMP_NUMB_BITS;
size = 2;
}
#endif
- dest->_mp_size = size;
+ SIZ (dest) = size;
}
/* mpz_setbit -- set a specified bit.
-Copyright 1991, 1993, 1994, 1995, 1997, 1999, 2001, 2002 Free Software
+Copyright 1991, 1993, 1994, 1995, 1997, 1999, 2001, 2002, 2012 Free Software
Foundation, Inc.
This file is part of the GNU MP Library.
#include "gmp-impl.h"
void
-mpz_setbit (mpz_ptr d, mp_bitcnt_t bit_index)
+mpz_setbit (mpz_ptr d, mp_bitcnt_t bit_idx)
{
- mp_size_t dsize = d->_mp_size;
- mp_ptr dp = d->_mp_d;
- mp_size_t limb_index;
+ mp_size_t dsize = SIZ (d);
+ mp_ptr dp = PTR (d);
+ mp_size_t limb_idx;
+ mp_limb_t mask;
- limb_index = bit_index / GMP_NUMB_BITS;
+ limb_idx = bit_idx / GMP_NUMB_BITS;
+ mask = CNST_LIMB(1) << (bit_idx % GMP_NUMB_BITS);
if (dsize >= 0)
{
- if (limb_index < dsize)
+ if (limb_idx < dsize)
{
- dp[limb_index] |= (mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS);
- d->_mp_size = dsize;
+ dp[limb_idx] |= mask;
}
else
{
/* Ugh. The bit should be set outside of the end of the
number. We have to increase the size of the number. */
- if (UNLIKELY (d->_mp_alloc < limb_index + 1))
- dp = _mpz_realloc (d, limb_index + 1);
- MPN_ZERO (dp + dsize, limb_index - dsize);
- dp[limb_index] = (mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS);
- d->_mp_size = limb_index + 1;
+ dp = MPZ_REALLOC (d, limb_idx + 1);
+ SIZ (d) = limb_idx + 1;
+ MPN_ZERO (dp + dsize, limb_idx - dsize);
+ dp[limb_idx] = mask;
}
}
else
dsize = -dsize;
- /* No upper bound on this loop, we're sure there's a non-zero limb
- sooner ot later. */
- for (zero_bound = 0; ; zero_bound++)
- if (dp[zero_bound] != 0)
- break;
+ /* No index upper bound on this loop, we're sure there's a non-zero limb
+ sooner or later. */
+ zero_bound = 0;
+ while (dp[zero_bound] == 0)
+ zero_bound++;
- if (limb_index > zero_bound)
+ if (limb_idx > zero_bound)
{
- if (limb_index < dsize)
- {
- mp_limb_t dlimb;
- dlimb = dp[limb_index];
- dlimb &= ~((mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS));
- dp[limb_index] = dlimb;
-
- if (UNLIKELY (dlimb == 0 && limb_index == dsize-1))
- {
- /* high limb became zero, must normalize */
- do {
- dsize--;
- } while (dsize > 0 && dp[dsize-1] == 0);
- d->_mp_size = -dsize;
- }
- }
- }
- else if (limb_index == zero_bound)
- {
- dp[limb_index] = ((dp[limb_index] - 1)
- & ~((mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS))) + 1;
- if (dp[limb_index] == 0)
+ if (limb_idx < dsize)
{
- mp_size_t i;
- for (i = limb_index + 1; i < dsize; i++)
+ mp_limb_t dlimb;
+ dlimb = dp[limb_idx] & ~mask;
+ dp[limb_idx] = dlimb;
+
+ if (UNLIKELY (dlimb == 0 && limb_idx == dsize-1))
{
- dp[i] += 1;
- if (dp[i] != 0)
- goto fin;
+ /* high limb became zero, must normalize */
+ do {
+ dsize--;
+ } while (dsize > 0 && dp[dsize-1] == 0);
+ SIZ (d) = -dsize;
}
- /* We got carry all way out beyond the end of D. Increase
- its size (and allocation if necessary). */
- dsize++;
- if (UNLIKELY (d->_mp_alloc < dsize))
- dp = _mpz_realloc (d, dsize);
- dp[i] = 1;
- d->_mp_size = -dsize;
- fin:;
}
}
+ else if (limb_idx == zero_bound)
+ {
+ dp[limb_idx] = ((dp[limb_idx] - 1) & ~mask) + 1;
+ ASSERT (dp[limb_idx] != 0);
+ }
else
{
- mpn_decr_u (dp + limb_index,
- (mp_limb_t) 1 << (bit_index % GMP_NUMB_BITS));
+ MPN_DECR_U (dp + limb_idx, dsize - limb_idx, mask);
dsize -= dp[dsize - 1] == 0;
- d->_mp_size = -dsize;
+ SIZ (d) = -dsize;
}
}
}
/* mpz_sqrt(root, u) -- Set ROOT to floor(sqrt(U)).
-Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
{
mp_size_t op_size, root_size;
mp_ptr root_ptr, op_ptr;
- mp_ptr free_me = NULL;
- mp_size_t free_me_size;
- TMP_DECL;
- TMP_MARK;
- op_size = op->_mp_size;
- if (op_size <= 0)
+ op_size = SIZ (op);
+ if (UNLIKELY (op_size <= 0))
{
if (op_size < 0)
- SQRT_OF_NEGATIVE;
+ SQRT_OF_NEGATIVE;
SIZ(root) = 0;
return;
}
/* The size of the root is accurate after this simple calculation. */
root_size = (op_size + 1) / 2;
+ SIZ (root) = root_size;
- root_ptr = root->_mp_d;
- op_ptr = op->_mp_d;
+ op_ptr = PTR (op);
- if (root->_mp_alloc < root_size)
+ if (root == op)
{
- if (root_ptr == op_ptr)
- {
- free_me = root_ptr;
- free_me_size = root->_mp_alloc;
- }
- else
- (*__gmp_free_func) (root_ptr, root->_mp_alloc * BYTES_PER_MP_LIMB);
+ /* Allocate temp space for the root, which we then copy to the
+ shared OP/ROOT variable. */
+ TMP_DECL;
+ TMP_MARK;
- root->_mp_alloc = root_size;
- root_ptr = (mp_ptr) (*__gmp_allocate_func) (root_size * BYTES_PER_MP_LIMB);
- root->_mp_d = root_ptr;
+ root_ptr = TMP_ALLOC_LIMBS (root_size);
+ mpn_sqrtrem (root_ptr, NULL, op_ptr, op_size);
+
+ MPN_COPY (op_ptr, root_ptr, root_size);
+
+ TMP_FREE;
}
else
{
- /* Make OP not overlap with ROOT. */
- if (root_ptr == op_ptr)
- {
- /* ROOT and OP are identical. Allocate temporary space for OP. */
- op_ptr = TMP_ALLOC_LIMBS (op_size);
- /* Copy to the temporary space. Hack: Avoid temporary variable
- by using ROOT_PTR. */
- MPN_COPY (op_ptr, root_ptr, op_size);
- }
- }
+ root_ptr = MPZ_REALLOC (root, root_size);
- mpn_sqrtrem (root_ptr, NULL, op_ptr, op_size);
-
- root->_mp_size = root_size;
-
- if (free_me != NULL)
- (*__gmp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB);
- TMP_FREE;
+ mpn_sqrtrem (root_ptr, NULL, op_ptr, op_size);
+ }
}
/* mpz_sqrtrem(root,rem,x) -- Set ROOT to floor(sqrt(X)) and REM
to the remainder, i.e. X - ROOT**2.
-Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005, 2011, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-#include <stdio.h> /* for NULL */
#include "gmp.h"
#include "gmp-impl.h"
-#ifdef BERKELEY_MP
-#include "mp.h"
-#endif
void
-#ifndef BERKELEY_MP
mpz_sqrtrem (mpz_ptr root, mpz_ptr rem, mpz_srcptr op)
-#else /* BERKELEY_MP */
-msqrt (mpz_srcptr op, mpz_ptr root, mpz_ptr rem)
-#endif /* BERKELEY_MP */
{
mp_size_t op_size, root_size, rem_size;
- mp_ptr root_ptr, op_ptr;
- mp_ptr free_me = NULL;
- mp_size_t free_me_size;
- TMP_DECL;
-
- TMP_MARK;
- op_size = op->_mp_size;
- if (op_size <= 0)
+ mp_ptr root_ptr, op_ptr, rem_ptr;
+
+ op_size = SIZ (op);
+ if (UNLIKELY (op_size <= 0))
{
if (op_size < 0)
- SQRT_OF_NEGATIVE;
+ SQRT_OF_NEGATIVE;
SIZ(root) = 0;
SIZ(rem) = 0;
return;
}
- if (rem->_mp_alloc < op_size)
- _mpz_realloc (rem, op_size);
+ rem_ptr = MPZ_REALLOC (rem, op_size);
/* The size of the root is accurate after this simple calculation. */
root_size = (op_size + 1) / 2;
+ SIZ (root) = root_size;
- root_ptr = root->_mp_d;
- op_ptr = op->_mp_d;
+ op_ptr = PTR (op);
- if (root->_mp_alloc < root_size)
+ if (root == op)
{
- if (root_ptr == op_ptr)
- {
- free_me = root_ptr;
- free_me_size = root->_mp_alloc;
- }
- else
- (*__gmp_free_func) (root_ptr, root->_mp_alloc * BYTES_PER_MP_LIMB);
-
- root->_mp_alloc = root_size;
- root_ptr = (mp_ptr) (*__gmp_allocate_func) (root_size * BYTES_PER_MP_LIMB);
- root->_mp_d = root_ptr;
+ /* Allocate temp space for the root, which we then copy to the
+ shared OP/ROOT variable. */
+ TMP_DECL;
+ TMP_MARK;
+
+ root_ptr = TMP_ALLOC_LIMBS (root_size);
+ rem_size = mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size);
+
+ if (rem != root) /* Don't overwrite remainder */
+ MPN_COPY (op_ptr, root_ptr, root_size);
+
+ TMP_FREE;
}
else
{
- /* Make OP not overlap with ROOT. */
- if (root_ptr == op_ptr)
- {
- /* ROOT and OP are identical. Allocate temporary space for OP. */
- op_ptr = TMP_ALLOC_LIMBS (op_size);
- /* Copy to the temporary space. Hack: Avoid temporary variable
- by using ROOT_PTR. */
- MPN_COPY (op_ptr, root_ptr, op_size);
- }
- }
-
- rem_size = mpn_sqrtrem (root_ptr, rem->_mp_d, op_ptr, op_size);
+ root_ptr = MPZ_REALLOC (root, root_size);
- root->_mp_size = root_size;
-
- /* Write remainder size last, to enable us to define this function to
- give only the square root remainder, if the user calls if with
- ROOT == REM. */
- rem->_mp_size = rem_size;
+ rem_size = mpn_sqrtrem (root_ptr, rem_ptr, op_ptr, op_size);
+ }
- if (free_me != NULL)
- (*__gmp_free_func) (free_me, free_me_size * BYTES_PER_MP_LIMB);
- TMP_FREE;
+ /* Write remainder size last, to make this function give only the square root
+ remainder, when passed ROOT == REM. */
+ SIZ (rem) = rem_size;
}
/* mpz_swap (dest_integer, src_integer) -- Swap U and V.
-Copyright 1997, 1998, 2001 Free Software Foundation, Inc.
+Copyright 1997, 1998, 2001, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
mp_size_t usize, vsize;
mp_size_t ualloc, valloc;
- ualloc = u->_mp_alloc;
- valloc = v->_mp_alloc;
- v->_mp_alloc = ualloc;
- u->_mp_alloc = valloc;
+ ualloc = ALLOC (u);
+ valloc = ALLOC (v);
+ ALLOC (v) = ualloc;
+ ALLOC (u) = valloc;
- usize = u->_mp_size;
- vsize = v->_mp_size;
- v->_mp_size = usize;
- u->_mp_size = vsize;
+ usize = SIZ (u);
+ vsize = SIZ (v);
+ SIZ (v) = usize;
+ SIZ (u) = vsize;
- up = u->_mp_d;
- vp = v->_mp_d;
- v->_mp_d = up;
- u->_mp_d = vp;
+ up = PTR (u);
+ vp = PTR (v);
+ PTR (v) = up;
+ PTR (u) = vp;
}
/* mpz_tdiv_q -- divide two integers and produce a quotient.
-Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005, 2010 Free Software Foundation,
-Inc.
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2005, 2010, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
dl = ABS (ds);
ql = nl - dl + 1;
- if (dl == 0)
+ if (UNLIKELY (dl == 0))
DIVIDE_BY_ZERO;
if (ql <= 0)
return;
}
- MPZ_REALLOC (quot, ql);
+ qp = MPZ_REALLOC (quot, ql);
TMP_MARK;
- qp = PTR (quot);
np = PTR (num);
dp = PTR (den);
/* mpz_tdiv_q_2exp -- Divide an integer by 2**CNT. Round the quotient
towards -infinity.
-Copyright 1991, 1993, 1994, 1996, 2001, 2002 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 2001, 2002, 2012 Free Software Foundation,
+Inc.
This file is part of the GNU MP Library.
#include "gmp-impl.h"
void
-mpz_tdiv_q_2exp (mpz_ptr w, mpz_srcptr u, mp_bitcnt_t cnt)
+mpz_tdiv_q_2exp (mpz_ptr r, mpz_srcptr u, mp_bitcnt_t cnt)
{
- mp_size_t usize, wsize;
+ mp_size_t un, rn;
mp_size_t limb_cnt;
+ mp_ptr rp;
+ mp_srcptr up;
- usize = u->_mp_size;
+ un = SIZ(u);
limb_cnt = cnt / GMP_NUMB_BITS;
- wsize = ABS (usize) - limb_cnt;
- if (wsize <= 0)
- w->_mp_size = 0;
+ rn = ABS (un) - limb_cnt;
+
+ if (rn <= 0)
+ rn = 0;
else
{
- mp_ptr wp;
- mp_srcptr up;
-
- if (w->_mp_alloc < wsize)
- _mpz_realloc (w, wsize);
-
- wp = w->_mp_d;
- up = u->_mp_d;
+ rp = MPZ_REALLOC (r, rn);
+ up = PTR(u) + limb_cnt;
cnt %= GMP_NUMB_BITS;
if (cnt != 0)
{
- mpn_rshift (wp, up + limb_cnt, wsize, cnt);
- wsize -= wp[wsize - 1] == 0;
+ mpn_rshift (rp, up, rn, cnt);
+ rn -= rp[rn - 1] == 0;
}
else
{
- MPN_COPY_INCR (wp, up + limb_cnt, wsize);
+ MPN_COPY_INCR (rp, up, rn);
}
-
- w->_mp_size = usize >= 0 ? wsize : -wsize;
}
+
+ SIZ(r) = un >= 0 ? rn : -rn;
}
/* mpz_tdiv_q_ui(quot, dividend, divisor_limb)
-- Divide DIVIDEND by DIVISOR_LIMB and store the result in QUOT.
-Copyright 1991, 1993, 1994, 1996, 1998, 2001, 2002, 2004 Free Software
+Copyright 1991, 1993, 1994, 1996, 1998, 2001, 2002, 2004, 2012 Free Software
Foundation, Inc.
This file is part of the GNU MP Library.
mp_ptr np, qp;
mp_limb_t rl;
- if (divisor == 0)
+ if (UNLIKELY (divisor == 0))
DIVIDE_BY_ZERO;
ns = SIZ(dividend);
}
nn = ABS(ns);
- MPZ_REALLOC (quot, nn);
- qp = PTR(quot);
+ qp = MPZ_REALLOC (quot, nn);
np = PTR(dividend);
#if BITS_PER_ULONG > GMP_NUMB_BITS /* avoid warnings about shift amount */
/* mpz_tdiv_qr(quot,rem,dividend,divisor) -- Set QUOT to DIVIDEND/DIVISOR,
and REM to DIVIDEND mod DIVISOR.
-Copyright 1991, 1993, 1994, 2000, 2001, 2005 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 2000, 2001, 2005, 2011, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
#include "gmp.h"
#include "gmp-impl.h"
#include "longlong.h"
-#ifdef BERKELEY_MP
-#include "mp.h"
-#endif
void
-#ifndef BERKELEY_MP
mpz_tdiv_qr (mpz_ptr quot, mpz_ptr rem, mpz_srcptr num, mpz_srcptr den)
-#else /* BERKELEY_MP */
-mdiv (mpz_srcptr num, mpz_srcptr den, mpz_ptr quot, mpz_ptr rem)
-#endif /* BERKELEY_MP */
{
mp_size_t ql;
mp_size_t ns, ds, nl, dl;
dl = ABS (ds);
ql = nl - dl + 1;
- if (dl == 0)
+ if (UNLIKELY (dl == 0))
DIVIDE_BY_ZERO;
- MPZ_REALLOC (rem, dl);
+ rp = MPZ_REALLOC (rem, dl);
if (ql <= 0)
{
if (num != rem)
{
- mp_ptr np, rp;
np = PTR (num);
- rp = PTR (rem);
MPN_COPY (rp, np, nl);
SIZ (rem) = SIZ (num);
}
return;
}
- MPZ_REALLOC (quot, ql);
+ qp = MPZ_REALLOC (quot, ql);
TMP_MARK;
- qp = PTR (quot);
- rp = PTR (rem);
np = PTR (num);
dp = PTR (den);
Set QUOT to DIVIDEND / SHORT_DIVISOR
and REM to DIVIDEND mod SHORT_DIVISOR.
-Copyright 1991, 1993, 1994, 1996, 1998, 2001, 2002, 2004 Free Software
+Copyright 1991, 1993, 1994, 1996, 1998, 2001, 2002, 2004, 2012 Free Software
Foundation, Inc.
This file is part of the GNU MP Library.
mp_ptr np, qp;
mp_limb_t rl;
- if (divisor == 0)
+ if (UNLIKELY (divisor == 0))
DIVIDE_BY_ZERO;
ns = SIZ(dividend);
}
nn = ABS(ns);
- MPZ_REALLOC (quot, nn);
- qp = PTR(quot);
+ qp = MPZ_REALLOC (quot, nn);
np = PTR(dividend);
#if BITS_PER_ULONG > GMP_NUMB_BITS /* avoid warnings about shift amount */
return rl;
}
- MPZ_REALLOC (rem, 2);
- rp = PTR(rem);
+ rp = MPZ_REALLOC (rem, 2);
dp[0] = divisor & GMP_NUMB_MASK;
dp[1] = divisor >> GMP_NUMB_BITS;
/* mpz_tdiv_r(rem, dividend, divisor) -- Set REM to DIVIDEND mod DIVISOR.
-Copyright 1991, 1993, 1994, 2000, 2001, 2005 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 2000, 2001, 2005, 2012 Free Software Foundation,
+Inc.
This file is part of the GNU MP Library.
dl = ABS (ds);
ql = nl - dl + 1;
- if (dl == 0)
+ if (UNLIKELY (dl == 0))
DIVIDE_BY_ZERO;
- MPZ_REALLOC (rem, dl);
+ rp = MPZ_REALLOC (rem, dl);
if (ql <= 0)
{
if (num != rem)
{
- mp_ptr np, rp;
np = PTR (num);
- rp = PTR (rem);
MPN_COPY (rp, np, nl);
SIZ (rem) = SIZ (num);
}
TMP_MARK;
qp = TMP_ALLOC_LIMBS (ql);
- rp = PTR (rem);
np = PTR (num);
dp = PTR (den);
-/* mpz_tdiv_r_2exp -- Divide a integer by 2**CNT and produce a remainder.
+/* mpz_tdiv_r_2exp -- Divide an integer by 2**CNT and produce a remainder.
-Copyright 1991, 1993, 1994, 1995, 2001, 2002 Free Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1995, 2001, 2002, 2012 Free Software Foundation,
+Inc.
This file is part of the GNU MP Library.
void
mpz_tdiv_r_2exp (mpz_ptr res, mpz_srcptr in, mp_bitcnt_t cnt)
{
- mp_size_t in_size = ABS (in->_mp_size);
+ mp_size_t in_size = ABSIZ (in);
mp_size_t res_size;
mp_size_t limb_cnt = cnt / GMP_NUMB_BITS;
- mp_srcptr in_ptr = in->_mp_d;
+ mp_srcptr in_ptr = PTR (in);
if (in_size > limb_cnt)
{
if (x != 0)
{
res_size = limb_cnt + 1;
- if (res->_mp_alloc < res_size)
- _mpz_realloc (res, res_size);
+ MPZ_REALLOC (res, res_size);
- res->_mp_d[limb_cnt] = x;
+ PTR (res)[limb_cnt] = x;
}
else
{
res_size = limb_cnt;
MPN_NORMALIZE (in_ptr, res_size);
- if (res->_mp_alloc < res_size)
- _mpz_realloc (res, res_size);
+ MPZ_REALLOC (res, res_size);
limb_cnt = res_size;
}
/* The input operand is smaller than 2**CNT. We perform a no-op,
apart from that we might need to copy IN to RES. */
res_size = in_size;
- if (res->_mp_alloc < res_size)
- _mpz_realloc (res, res_size);
+ MPZ_REALLOC (res, res_size);
limb_cnt = res_size;
}
if (res != in)
- MPN_COPY (res->_mp_d, in->_mp_d, limb_cnt);
- res->_mp_size = in->_mp_size >= 0 ? res_size : -res_size;
+ MPN_COPY (PTR (res), PTR (in), limb_cnt);
+ SIZ (res) = SIZ (in) >= 0 ? res_size : -res_size;
}
/* mpz_tdiv_r_ui(rem, dividend, divisor_limb)
-- Set REM to DIVDEND mod DIVISOR_LIMB.
-Copyright 1991, 1993, 1994, 1996, 1998, 2001, 2002, 2004, 2005 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 1998, 2001, 2002, 2004, 2005, 2012 Free
+Software Foundation, Inc.
This file is part of the GNU MP Library.
mp_ptr np;
mp_limb_t rl;
- if (divisor == 0)
+ if (UNLIKELY (divisor == 0))
DIVIDE_BY_ZERO;
ns = SIZ(dividend);
return rl;
}
- MPZ_REALLOC (rem, 2);
- rp = PTR(rem);
+ rp = MPZ_REALLOC (rem, 2);
TMP_MARK;
dp[0] = divisor & GMP_NUMB_MASK;
/* mpz_tdiv_ui(dividend, divisor_limb) -- Return DIVDEND mod DIVISOR_LIMB.
-Copyright 1991, 1993, 1994, 1996, 1997, 1998, 2001, 2002, 2004, 2005 Free
-Software Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 1997, 1998, 2001, 2002, 2004, 2005, 2012
+Free Software Foundation, Inc.
This file is part of the GNU MP Library.
mp_ptr np;
mp_limb_t rl;
- if (divisor == 0)
+ if (UNLIKELY (divisor == 0))
DIVIDE_BY_ZERO;
ns = SIZ(dividend);
limb = -limb; /* twos complement */
while (p != u_ptr)
- {
- p--;
- if (*p != 0)
- {
- limb--; /* make it a ones complement instead */
- break;
- }
- }
+ {
+ p--;
+ if (*p != 0)
+ {
+ limb--; /* make it a ones complement instead */
+ break;
+ }
+ }
}
return (limb >> (bit_index % GMP_NUMB_BITS)) & 1;
integer in the range 0 to N-1, using STATE as the random state
previously initialized by a call to gmp_randinit().
-Copyright 2000, 2002 Free Software Foundation, Inc.
+Copyright 2000, 2002, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
TMP_DECL;
size = ABSIZ (n);
- if (size == 0)
+ if (UNLIKELY (size == 0))
DIVIDE_BY_ZERO;
nlast = &PTR (n)[size - 1];
/* mpz_xor -- Logical xor.
-Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2005 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2005, 2012 Free
+Software Foundation, Inc.
This file is part of the GNU MP Library.
mp_size_t op1_size, op2_size;
mp_ptr res_ptr;
mp_size_t res_size, res_alloc;
- mp_size_t i;
TMP_DECL;
TMP_MARK;
if (res_ptr != op1_ptr)
MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,
op1_size - op2_size);
- for (i = op2_size - 1; i >= 0; i--)
- res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
+ if (LIKELY (op2_size != 0))
+ mpn_xor_n (res_ptr, op1_ptr, op2_ptr, op2_size);
res_size = op1_size;
}
else
if (res_ptr != op2_ptr)
MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
op2_size - op1_size);
- for (i = op1_size - 1; i >= 0; i--)
- res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
+ if (LIKELY (op1_size != 0))
+ mpn_xor_n (res_ptr, op1_ptr, op2_ptr, op1_size);
res_size = op2_size;
}
{
if (op2_size < 0)
{
- mp_ptr opx;
+ mp_ptr opx, opy;
/* Both operands are negative, the result will be positive.
(-OP1) ^ (-OP2) =
/* Possible optimization: Decrease mpn_sub precision,
as we won't use the entire res of both. */
- opx = TMP_ALLOC_LIMBS (op1_size);
+ TMP_ALLOC_LIMBS_2 (opx, op1_size, opy, op2_size);
mpn_sub_1 (opx, op1_ptr, op1_size, (mp_limb_t) 1);
op1_ptr = opx;
- opx = TMP_ALLOC_LIMBS (op2_size);
- mpn_sub_1 (opx, op2_ptr, op2_size, (mp_limb_t) 1);
- op2_ptr = opx;
-
- res_alloc = MAX (op1_size, op2_size);
- if (ALLOC(res) < res_alloc)
- {
- _mpz_realloc (res, res_alloc);
- res_ptr = PTR(res);
- /* op1_ptr and op2_ptr point to temporary space. */
- }
+ mpn_sub_1 (opy, op2_ptr, op2_size, (mp_limb_t) 1);
+ op2_ptr = opy;
if (op1_size > op2_size)
- {
- MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size,
- op1_size - op2_size);
- for (i = op2_size - 1; i >= 0; i--)
- res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
- res_size = op1_size;
- }
- else
- {
- MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
- op2_size - op1_size);
- for (i = op1_size - 1; i >= 0; i--)
- res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
- res_size = op2_size;
- }
+ MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size);
+
+ res_alloc = op2_size;
+ res_ptr = MPZ_REALLOC (res, res_alloc);
+
+ MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size,
+ op2_size - op1_size);
+ mpn_xor_n (res_ptr, op1_ptr, op2_ptr, op1_size);
+ res_size = op2_size;
MPN_NORMALIZE (res_ptr, res_size);
SIZ(res) = res_size;
{
/* We should compute -OP1 ^ OP2. Swap OP1 and OP2 and fall
through to the code that handles OP1 ^ -OP2. */
- MPZ_SRCPTR_SWAP (op1, op2);
- MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size);
+ MPZ_SRCPTR_SWAP (op1, op2);
+ MPN_SRCPTR_SWAP (op1_ptr,op1_size, op2_ptr,op2_size);
}
}
if (op1_size > op2_size)
{
MPN_COPY (res_ptr + op2_size, op1_ptr + op2_size, op1_size - op2_size);
- for (i = op2_size - 1; i >= 0; i--)
- res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
+ mpn_xor_n (res_ptr, op1_ptr, op2_ptr, op2_size);
res_size = op1_size;
}
else
{
MPN_COPY (res_ptr + op1_size, op2_ptr + op1_size, op2_size - op1_size);
- for (i = op1_size - 1; i >= 0; i--)
- res_ptr[i] = op1_ptr[i] ^ op2_ptr[i];
+ if (LIKELY (op1_size != 0))
+ mpn_xor_n (res_ptr, op1_ptr, op2_ptr, op1_size);
res_size = op2_size;
}
cy = mpn_add_1 (res_ptr, res_ptr, res_size, (mp_limb_t) 1);
- if (cy)
- {
- res_ptr[res_size] = cy;
- res_size++;
- }
+ res_ptr[res_size] = cy;
+ res_size += (cy != 0);
MPN_NORMALIZE (res_ptr, res_size);
SIZ(res) = -res_size;
License for more details.
You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
/*
Optimisation ideas:
--- /dev/null
+/* primesieve (BIT_ARRAY, N) -- Fills the BIT_ARRAY with a mask for primes up to N.
+
+Contributed to the GNU project by Marco Bodrato.
+
+THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.
+IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
+IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR
+DISAPPEAR IN A FUTURE GNU MP RELEASE.
+
+Copyright 2010, 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+/**************************************************************/
+/* Section macros: common macros, for mswing/fac/bin (&sieve) */
+/**************************************************************/
+
+#define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) \
+ __max_i = (end); \
+ \
+ do { \
+ ++__i; \
+ if (((sieve)[__index] & __mask) == 0) \
+ { \
+ (prime) = id_to_n(__i)
+
+#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \
+ do { \
+ mp_limb_t __mask, __index, __max_i, __i; \
+ \
+ __i = (start)-(off); \
+ __index = __i / GMP_LIMB_BITS; \
+ __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \
+ __i += (off); \
+ \
+ LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)
+
+#define LOOP_ON_SIEVE_STOP \
+ } \
+ __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \
+ __index += __mask & 1; \
+ } while (__i <= __max_i) \
+
+#define LOOP_ON_SIEVE_END \
+ LOOP_ON_SIEVE_STOP; \
+ } while (0)
+
+/*********************************************************/
+/* Section sieve: sieving functions and tools for primes */
+/*********************************************************/
+
+#if 0
+static mp_limb_t
+bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
+#endif
+
+/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
+static mp_limb_t
+id_to_n (mp_limb_t id) { return id*3+1+(id&1); }
+
+/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
+static mp_limb_t
+n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
+
+#if 0
+static mp_size_t
+primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
+#endif
+
+#if GMP_LIMB_BITS > 61
+#define SIEVE_SEED CNST_LIMB(0x3294C9E069128480)
+#define SEED_LIMIT 202
+#else
+#if GMP_LIMB_BITS > 30
+#define SIEVE_SEED CNST_LIMB(0x69128480)
+#define SEED_LIMIT 114
+#else
+#if GMP_LIMB_BITS > 15
+#define SIEVE_SEED CNST_LIMB(0x8480)
+#define SEED_LIMIT 54
+#else
+#if GMP_LIMB_BITS > 7
+#define SIEVE_SEED CNST_LIMB(0x80)
+#define SEED_LIMIT 34
+#else
+#define SIEVE_SEED CNST_LIMB(0x0)
+#define SEED_LIMIT 24
+#endif /* 7 */
+#endif /* 15 */
+#endif /* 30 */
+#endif /* 61 */
+
+static void
+first_block_primesieve (mp_ptr bit_array, mp_limb_t n)
+{
+ mp_size_t bits, limbs;
+
+ ASSERT (n > 4);
+
+ bits = n_to_bit(n);
+ limbs = bits / GMP_LIMB_BITS + 1;
+
+ /* FIXME: We can skip 5 too, filling with a 5-part pattern. */
+ MPN_ZERO (bit_array, limbs);
+ bit_array[0] = SIEVE_SEED;
+
+ if ((bits + 1) % GMP_LIMB_BITS != 0)
+ bit_array[limbs-1] |= MP_LIMB_T_MAX << ((bits + 1) % GMP_LIMB_BITS);
+
+ if (n > SEED_LIMIT) {
+ mp_limb_t mask, index, i;
+
+ ASSERT (n > 49);
+
+ mask = 1;
+ index = 0;
+ i = 1;
+ do {
+ if ((bit_array[index] & mask) == 0)
+ {
+ mp_size_t step, lindex;
+ mp_limb_t lmask;
+ unsigned maskrot;
+
+ step = id_to_n(i);
+/* lindex = n_to_bit(id_to_n(i)*id_to_n(i)); */
+ lindex = i*(step+1)-1+(-(i&1)&(i+1));
+/* lindex = i*(step+1+(i&1))-1+(i&1); */
+ if (lindex > bits)
+ break;
+
+ step <<= 1;
+ maskrot = step % GMP_LIMB_BITS;
+
+ lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS);
+ do {
+ bit_array[lindex / GMP_LIMB_BITS] |= lmask;
+ lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot);
+ lindex += step;
+ } while (lindex <= bits);
+
+/* lindex = n_to_bit(id_to_n(i)*bit_to_n(i)); */
+ lindex = i*(i*3+6)+(i&1);
+
+ lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS);
+ for ( ; lindex <= bits; lindex += step) {
+ bit_array[lindex / GMP_LIMB_BITS] |= lmask;
+ lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot);
+ };
+ }
+ mask = mask << 1 | mask >> (GMP_LIMB_BITS-1);
+ index += mask & 1;
+ i++;
+ } while (1);
+ }
+}
+
+static void
+block_resieve (mp_ptr bit_array, mp_size_t limbs, mp_limb_t offset,
+ mp_srcptr sieve, mp_limb_t sieve_bits)
+{
+ mp_size_t bits, step;
+
+ ASSERT (limbs > 0);
+
+ bits = limbs * GMP_LIMB_BITS - 1;
+
+ /* FIXME: We can skip 5 too, filling with a 5-part pattern. */
+ MPN_ZERO (bit_array, limbs);
+
+ LOOP_ON_SIEVE_BEGIN(step,0,sieve_bits,0,sieve);
+ {
+ mp_size_t lindex;
+ mp_limb_t lmask;
+ unsigned maskrot;
+
+/* lindex = n_to_bit(id_to_n(i)*id_to_n(i)); */
+ lindex = __i*(step+1)-1+(-(__i&1)&(__i+1));
+/* lindex = __i*(step+1+(__i&1))-1+(__i&1); */
+ if (lindex > bits + offset)
+ break;
+
+ step <<= 1;
+ maskrot = step % GMP_LIMB_BITS;
+
+ if (lindex < offset)
+ lindex += step * ((offset - lindex - 1) / step + 1);
+
+ lindex -= offset;
+
+ lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS);
+ for ( ; lindex <= bits; lindex += step) {
+ bit_array[lindex / GMP_LIMB_BITS] |= lmask;
+ lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot);
+ };
+
+/* lindex = n_to_bit(id_to_n(i)*bit_to_n(i)); */
+ lindex = __i*(__i*3+6)+(__i&1);
+ if (lindex > bits + offset)
+ continue;
+
+ if (lindex < offset)
+ lindex += step * ((offset - lindex - 1) / step + 1);
+
+ lindex -= offset;
+
+ lmask = CNST_LIMB(1) << (lindex % GMP_LIMB_BITS);
+ for ( ; lindex <= bits; lindex += step) {
+ bit_array[lindex / GMP_LIMB_BITS] |= lmask;
+ lmask = lmask << maskrot | lmask >> (GMP_LIMB_BITS - maskrot);
+ };
+ }
+ LOOP_ON_SIEVE_END;
+}
+
+#define BLOCK_SIZE 2048
+
+/* Fills bit_array with the characteristic function of composite
+ numbers up to the parameter n. I.e. a bit set to "1" represent a
+ composite, a "0" represent a prime.
+
+ The primesieve_size(n) limbs pointed to by bit_array are
+ overwritten. The returned value counts prime integers in the
+ interval [4, n]. Note that n > 4.
+
+ Even numbers and multiples of 3 are excluded "a priori", only
+ numbers equivalent to +/- 1 mod 6 have their bit in the array.
+
+ Once sieved, if the bit b is ZERO it represent a prime, the
+ represented prime is bit_to_n(b), if the LSbit is bit 0, or
+ id_to_n(b), if you call "1" the first bit.
+ */
+
+mp_limb_t
+gmp_primesieve (mp_ptr bit_array, mp_limb_t n)
+{
+ mp_size_t size;
+ mp_limb_t bits;
+
+ ASSERT (n > 4);
+
+ bits = n_to_bit(n);
+ size = bits / GMP_LIMB_BITS + 1;
+
+ if (size > BLOCK_SIZE * 2) {
+ mp_size_t off;
+ off = BLOCK_SIZE + (size % BLOCK_SIZE);
+ first_block_primesieve (bit_array, id_to_n (off * GMP_LIMB_BITS));
+ for ( ; off < size; off += BLOCK_SIZE)
+ block_resieve (bit_array + off, BLOCK_SIZE, off * GMP_LIMB_BITS, bit_array, off * GMP_LIMB_BITS - 1);
+ } else {
+ first_block_primesieve (bit_array, n);
+ }
+
+ if ((bits + 1) % GMP_LIMB_BITS != 0)
+ bit_array[size-1] |= MP_LIMB_T_MAX << ((bits + 1) % GMP_LIMB_BITS);
+
+
+ return size * GMP_LIMB_BITS - mpn_popcount (bit_array, size);
+}
+
+#undef BLOCK_SIZE
+#undef SEED_LIMIT
+#undef SIEVE_SEED
+#undef LOOP_ON_SIEVE_END
+#undef LOOP_ON_SIEVE_STOP
+#undef LOOP_ON_SIEVE_BEGIN
+#undef LOOP_ON_SIEVE_CONTINUE
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
VPATH = @srcdir@
+am__make_dryrun = \
+ { \
+ am__dry=no; \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
+ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+ *) \
+ for am__flg in $$MAKEFLAGS; do \
+ case $$am__flg in \
+ *=*|--*) ;; \
+ *n*) am__dry=yes; break;; \
+ esac; \
+ done;; \
+ esac; \
+ test $$am__dry = yes; \
+ }
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
subdir = printf
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/configure.in
+ $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_CLEAN_VPATH_FILES =
LTLIBRARIES = $(noinst_LTLIBRARIES)
libprintf_la_LIBADD =
-am_libprintf_la_OBJECTS = asprintf$U.lo asprntffuns$U.lo doprnt$U.lo \
- doprntf$U.lo doprnti$U.lo fprintf$U.lo obprintf$U.lo \
- obvprintf$U.lo obprntffuns$U.lo printf$U.lo printffuns$U.lo \
- snprintf$U.lo snprntffuns$U.lo sprintf$U.lo sprintffuns$U.lo \
- vasprintf$U.lo vfprintf$U.lo vprintf$U.lo vsnprintf$U.lo \
- vsprintf$U.lo repl-vsnprintf$U.lo
+am_libprintf_la_OBJECTS = asprintf.lo asprntffuns.lo doprnt.lo \
+ doprntf.lo doprnti.lo fprintf.lo obprintf.lo obvprintf.lo \
+ obprntffuns.lo printf.lo printffuns.lo snprintf.lo \
+ snprntffuns.lo sprintf.lo sprintffuns.lo vasprintf.lo \
+ vfprintf.lo vprintf.lo vsnprintf.lo vsprintf.lo \
+ repl-vsnprintf.lo
libprintf_la_OBJECTS = $(am_libprintf_la_OBJECTS)
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
depcomp =
$(LDFLAGS) -o $@
SOURCES = $(libprintf_la_SOURCES)
DIST_SOURCES = $(libprintf_la_SOURCES)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
STRIP = @STRIP@
TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
U_FOR_BUILD = @U_FOR_BUILD@
VERSION = @VERSION@
WITH_READLINE_01 = @WITH_READLINE_01@
mkdir_p = @mkdir_p@
mpn_objects = @mpn_objects@
mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-libprintf.la: $(libprintf_la_OBJECTS) $(libprintf_la_DEPENDENCIES)
+libprintf.la: $(libprintf_la_OBJECTS) $(libprintf_la_DEPENDENCIES) $(EXTRA_libprintf_la_DEPENDENCIES)
$(LINK) $(libprintf_la_OBJECTS) $(libprintf_la_LIBADD) $(LIBS)
mostlyclean-compile:
distclean-compile:
-rm -f *.tab.c
-$(top_builddir)/ansi2knr:
- $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
- -test "$U" = "" || rm -f *_.c
.c.o:
$(COMPILE) -c $<
.c.lo:
$(LTCOMPILE) -c -o $@ $<
-asprintf_.c: asprintf.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/asprintf.c; then echo $(srcdir)/asprintf.c; else echo asprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-asprntffuns_.c: asprntffuns.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/asprntffuns.c; then echo $(srcdir)/asprntffuns.c; else echo asprntffuns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-doprnt_.c: doprnt.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/doprnt.c; then echo $(srcdir)/doprnt.c; else echo doprnt.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-doprntf_.c: doprntf.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/doprntf.c; then echo $(srcdir)/doprntf.c; else echo doprntf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-doprnti_.c: doprnti.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/doprnti.c; then echo $(srcdir)/doprnti.c; else echo doprnti.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fprintf_.c: fprintf.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fprintf.c; then echo $(srcdir)/fprintf.c; else echo fprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-obprintf_.c: obprintf.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/obprintf.c; then echo $(srcdir)/obprintf.c; else echo obprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-obprntffuns_.c: obprntffuns.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/obprntffuns.c; then echo $(srcdir)/obprntffuns.c; else echo obprntffuns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-obvprintf_.c: obvprintf.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/obvprintf.c; then echo $(srcdir)/obvprintf.c; else echo obvprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-printf_.c: printf.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/printf.c; then echo $(srcdir)/printf.c; else echo printf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-printffuns_.c: printffuns.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/printffuns.c; then echo $(srcdir)/printffuns.c; else echo printffuns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-repl-vsnprintf_.c: repl-vsnprintf.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/repl-vsnprintf.c; then echo $(srcdir)/repl-vsnprintf.c; else echo repl-vsnprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-snprintf_.c: snprintf.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/snprintf.c; then echo $(srcdir)/snprintf.c; else echo snprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-snprntffuns_.c: snprntffuns.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/snprntffuns.c; then echo $(srcdir)/snprntffuns.c; else echo snprntffuns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sprintf_.c: sprintf.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sprintf.c; then echo $(srcdir)/sprintf.c; else echo sprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sprintffuns_.c: sprintffuns.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sprintffuns.c; then echo $(srcdir)/sprintffuns.c; else echo sprintffuns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-vasprintf_.c: vasprintf.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/vasprintf.c; then echo $(srcdir)/vasprintf.c; else echo vasprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-vfprintf_.c: vfprintf.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/vfprintf.c; then echo $(srcdir)/vfprintf.c; else echo vfprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-vprintf_.c: vprintf.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/vprintf.c; then echo $(srcdir)/vprintf.c; else echo vprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-vsnprintf_.c: vsnprintf.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/vsnprintf.c; then echo $(srcdir)/vsnprintf.c; else echo vsnprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-vsprintf_.c: vsprintf.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/vsprintf.c; then echo $(srcdir)/vsprintf.c; else echo vsprintf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-asprintf_.$(OBJEXT) asprintf_.lo asprntffuns_.$(OBJEXT) \
-asprntffuns_.lo doprnt_.$(OBJEXT) doprnt_.lo doprntf_.$(OBJEXT) \
-doprntf_.lo doprnti_.$(OBJEXT) doprnti_.lo fprintf_.$(OBJEXT) \
-fprintf_.lo obprintf_.$(OBJEXT) obprintf_.lo obprntffuns_.$(OBJEXT) \
-obprntffuns_.lo obvprintf_.$(OBJEXT) obvprintf_.lo printf_.$(OBJEXT) \
-printf_.lo printffuns_.$(OBJEXT) printffuns_.lo \
-repl-vsnprintf_.$(OBJEXT) repl-vsnprintf_.lo snprintf_.$(OBJEXT) \
-snprintf_.lo snprntffuns_.$(OBJEXT) snprntffuns_.lo sprintf_.$(OBJEXT) \
-sprintf_.lo sprintffuns_.$(OBJEXT) sprintffuns_.lo \
-vasprintf_.$(OBJEXT) vasprintf_.lo vfprintf_.$(OBJEXT) vfprintf_.lo \
-vprintf_.$(OBJEXT) vprintf_.lo vsnprintf_.$(OBJEXT) vsnprintf_.lo \
-vsprintf_.$(OBJEXT) vsprintf_.lo : $(ANSI2KNR)
mostlyclean-libtool:
-rm -f *.lo
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
mostlyclean: mostlyclean-am
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
uninstall-am:
-.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+.MAKE: install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
clean-libtool clean-noinstLTLIBRARIES ctags distclean \
install-pdf install-pdf-am install-ps install-ps-am \
install-strip installcheck installcheck-am installdirs \
maintainer-clean maintainer-clean-generic mostlyclean \
- mostlyclean-compile mostlyclean-generic mostlyclean-kr \
- mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
- uninstall-am
+ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+ pdf pdf-am ps ps-am tags uninstall uninstall-am
# Tell versions [3.59,3.63) of GNU make to not export all variables.
#include "gmp.h"
#include "gmp-impl.h"
+#include "longlong.h"
/* change this to "#define TRACE(x) x" for diagnostics */
/* SunOS 4 stdio.h doesn't provide a prototype for this */
#if ! HAVE_DECL_VFPRINTF
-int vfprintf __GMP_PROTO ((FILE *, const char *, va_list));
+int vfprintf (FILE *, const char *, va_list);
#endif
+++ /dev/null
-/* gmp_randinit (state, algorithm, ...) -- Initialize a random state.
-
-Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include "config.h"
-
-#include <stdio.h> /* for NULL */
-
-#if HAVE_STDARG
-#include <stdarg.h>
-#else
-#include <varargs.h>
-#endif
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-#if HAVE_STDARG
-gmp_randinit (gmp_randstate_t rstate,
- gmp_randalg_t alg,
- ...)
-#else
-gmp_randinit (va_alist)
- va_dcl
-#endif
-{
- va_list ap;
-#if HAVE_STDARG
- va_start (ap, alg);
-#else
- __gmp_randstate_struct *rstate;
- gmp_randalg_t alg;
- va_start (ap);
- rstate = va_arg (ap, __gmp_randstate_struct *);
- alg = va_arg (ap, gmp_randalg_t);
-#endif
-
- switch (alg) {
- case GMP_RAND_ALG_LC:
- if (! gmp_randinit_lc_2exp_size (rstate, va_arg (ap, unsigned long)))
- gmp_errno |= GMP_ERROR_INVALID_ARGUMENT;
- break;
- default:
- gmp_errno |= GMP_ERROR_UNSUPPORTED_ARGUMENT;
- break;
- }
- va_end (ap);
-}
--- /dev/null
+## Process this file with automake to generate Makefile.in
+
+# Copyright 2001, 2002, 2010 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+
+INCLUDES = -D__GMP_WITHIN_GMP -I$(top_srcdir)
+
+noinst_LTLIBRARIES = librandom.la
+
+librandom_la_SOURCES = randmt.h \
+ rand.c randclr.c randdef.c randiset.c randlc2s.c randlc2x.c randmt.c \
+ randmts.c rands.c randsd.c randsdui.c randbui.c randmui.c
--- /dev/null
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# Copyright 2001, 2002, 2010 Free Software Foundation, Inc.
+#
+# This file is part of the GNU MP Library.
+#
+# The GNU MP Library is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# The GNU MP Library is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+# License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public License
+# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+VPATH = @srcdir@
+am__make_dryrun = \
+ { \
+ am__dry=no; \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
+ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+ *) \
+ for am__flg in $$MAKEFLAGS; do \
+ case $$am__flg in \
+ *=*|--*) ;; \
+ *n*) am__dry=yes; break;; \
+ esac; \
+ done;; \
+ esac; \
+ test $$am__dry = yes; \
+ }
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = rand
+DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
+ $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+LTLIBRARIES = $(noinst_LTLIBRARIES)
+librandom_la_LIBADD =
+am_librandom_la_OBJECTS = rand.lo randclr.lo randdef.lo randiset.lo \
+ randlc2s.lo randlc2x.lo randmt.lo randmts.lo rands.lo \
+ randsd.lo randsdui.lo randbui.lo randmui.lo
+librandom_la_OBJECTS = $(am_librandom_la_OBJECTS)
+DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
+depcomp =
+am__depfiles_maybe =
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
+ $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+CCLD = $(CC)
+LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
+ --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
+ $(LDFLAGS) -o $@
+SOURCES = $(librandom_la_SOURCES)
+DIST_SOURCES = $(librandom_la_SOURCES)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
+ETAGS = etags
+CTAGS = ctags
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ABI = @ABI@
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+ASMFLAGS = @ASMFLAGS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
+CC = @CC@
+CCAS = @CCAS@
+CC_FOR_BUILD = @CC_FOR_BUILD@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CPP_FOR_BUILD = @CPP_FOR_BUILD@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
+DEFS = @DEFS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
+FGREP = @FGREP@
+GMP_LDFLAGS = @GMP_LDFLAGS@
+GMP_LIMB_BITS = @GMP_LIMB_BITS@
+GMP_NAIL_BITS = @GMP_NAIL_BITS@
+GREP = @GREP@
+HAVE_CLOCK_01 = @HAVE_CLOCK_01@
+HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
+HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
+HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
+HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
+HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
+HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
+HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
+HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
+HAVE_STACK_T_01 = @HAVE_STACK_T_01@
+HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LEX = @LEX@
+LEXLIB = @LEXLIB@
+LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
+LIBCURSES = @LIBCURSES@
+LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
+LIBGMP_DLL = @LIBGMP_DLL@
+LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
+LIBM = @LIBM@
+LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
+LIBOBJS = @LIBOBJS@
+LIBREADLINE = @LIBREADLINE@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+M4 = @M4@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
+STRIP = @STRIP@
+TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
+TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
+U_FOR_BUILD = @U_FOR_BUILD@
+VERSION = @VERSION@
+WITH_READLINE_01 = @WITH_READLINE_01@
+YACC = @YACC@
+YFLAGS = @YFLAGS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__leading_dot = @am__leading_dot@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+gmp_srclinks = @gmp_srclinks@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+mpn_objects = @mpn_objects@
+mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+INCLUDES = -D__GMP_WITHIN_GMP -I$(top_srcdir)
+noinst_LTLIBRARIES = librandom.la
+librandom_la_SOURCES = randmt.h \
+ rand.c randclr.c randdef.c randiset.c randlc2s.c randlc2x.c randmt.c \
+ randmts.c rands.c randsd.c randsdui.c randbui.c randmui.c
+
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps rand/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --gnu --ignore-deps rand/Makefile
+.PRECIOUS: Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+clean-noinstLTLIBRARIES:
+ -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES)
+ @list='$(noinst_LTLIBRARIES)'; for p in $$list; do \
+ dir="`echo $$p | sed -e 's|/[^/]*$$||'`"; \
+ test "$$dir" != "$$p" || dir=.; \
+ echo "rm -f \"$${dir}/so_locations\""; \
+ rm -f "$${dir}/so_locations"; \
+ done
+librandom.la: $(librandom_la_OBJECTS) $(librandom_la_DEPENDENCIES) $(EXTRA_librandom_la_DEPENDENCIES)
+ $(LINK) $(librandom_la_OBJECTS) $(librandom_la_LIBADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+.c.o:
+ $(COMPILE) -c $<
+
+.c.obj:
+ $(COMPILE) -c `$(CYGPATH_W) '$<'`
+
+.c.lo:
+ $(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ mkid -fID $$unique
+tags: TAGS
+
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ set x; \
+ here=`pwd`; \
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: CTAGS
+CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
+ $(TAGS_FILES) $(LISP)
+ list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | \
+ $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in files) print i; }; }'`; \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(LTLIBRARIES)
+installdirs:
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \
+ mostlyclean-am
+
+distclean: distclean-am
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
+ clean-libtool clean-noinstLTLIBRARIES ctags distclean \
+ distclean-compile distclean-generic distclean-libtool \
+ distclean-tags distdir dvi dvi-am html html-am info info-am \
+ install install-am install-data install-data-am install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am install-man \
+ install-pdf install-pdf-am install-ps install-ps-am \
+ install-strip installcheck installcheck-am installdirs \
+ maintainer-clean maintainer-clean-generic mostlyclean \
+ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+ pdf pdf-am ps ps-am tags uninstall uninstall-am
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
--- /dev/null
+/* gmp_randinit (state, algorithm, ...) -- Initialize a random state.
+
+Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "config.h"
+
+#include <stdio.h> /* for NULL */
+
+#if HAVE_STDARG
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+#if HAVE_STDARG
+gmp_randinit (gmp_randstate_t rstate,
+ gmp_randalg_t alg,
+ ...)
+#else
+gmp_randinit (va_alist)
+ va_dcl
+#endif
+{
+ va_list ap;
+#if HAVE_STDARG
+ va_start (ap, alg);
+#else
+ __gmp_randstate_struct *rstate;
+ gmp_randalg_t alg;
+ va_start (ap);
+ rstate = va_arg (ap, __gmp_randstate_struct *);
+ alg = va_arg (ap, gmp_randalg_t);
+#endif
+
+ switch (alg) {
+ case GMP_RAND_ALG_LC:
+ if (! gmp_randinit_lc_2exp_size (rstate, va_arg (ap, unsigned long)))
+ gmp_errno |= GMP_ERROR_INVALID_ARGUMENT;
+ break;
+ default:
+ gmp_errno |= GMP_ERROR_UNSUPPORTED_ARGUMENT;
+ break;
+ }
+ va_end (ap);
+}
--- /dev/null
+/* gmp_urandomb_ui -- random bits returned in a ulong.
+
+Copyright 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Currently bits>=BITS_PER_ULONG is quietly truncated to BITS_PER_ULONG,
+ maybe this should raise an exception or something. */
+
+unsigned long
+gmp_urandomb_ui (gmp_randstate_ptr rstate, unsigned long bits)
+{
+ mp_limb_t a[LIMBS_PER_ULONG];
+
+ /* start with zeros, since if bits==0 then _gmp_rand will store nothing at
+ all, or if bits <= GMP_NUMB_BITS then it will store only a[0] */
+ a[0] = 0;
+#if LIMBS_PER_ULONG > 1
+ a[1] = 0;
+#endif
+
+ _gmp_rand (a, rstate, MIN (bits, BITS_PER_ULONG));
+
+#if LIMBS_PER_ULONG == 1
+ return a[0];
+#else
+ return a[0] | (a[1] << GMP_NUMB_BITS);
+#endif
+}
--- /dev/null
+/* gmp_randclear (state) -- Clear and deallocate random state STATE.
+
+Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+gmp_randclear (gmp_randstate_t rstate)
+{
+ (*((gmp_randfnptr_t *) RNG_FNPTR (rstate))->randclear_fn) (rstate);
+}
--- /dev/null
+/* gmp_randinit_default -- initialize a random state with a default algorithm.
+
+Copyright 2001, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+gmp_randinit_default (gmp_randstate_t rstate)
+{
+ gmp_randinit_mt (rstate);
+}
--- /dev/null
+/* gmp_randinit_set -- initialize with a copy of another gmp_randstate_t.
+
+Copyright 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+void
+gmp_randinit_set (gmp_randstate_ptr dst, gmp_randstate_srcptr src)
+{
+ (*((gmp_randfnptr_t *) RNG_FNPTR (src))->randiset_fn) (dst, src);
+}
--- /dev/null
+/* gmp_randinit_lc_2exp_size -- initialize a random state with a linear
+ congruential generator of a requested size.
+
+Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include <stdio.h> /* for NULL */
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Array of LC-schemes, ordered in increasing order of the first
+ member (the 'm2exp' value). The end of the array is indicated with
+ an entry containing all zeros. */
+
+/* All multipliers are in the range 0.01*m and 0.99*m, and are
+congruent to 5 (mod 8).
+They all pass the spectral test with Vt >= 2^(30/t) and merit >= 1.
+(Up to and including 196 bits, merit is >= 3.) */
+
+struct __gmp_rand_lc_scheme_struct
+{
+ unsigned long int m2exp; /* Modulus is 2 ^ m2exp. */
+ const char *astr; /* Multiplier in string form. */
+ unsigned long int c; /* Addend. */
+};
+
+static const struct __gmp_rand_lc_scheme_struct __gmp_rand_lc_scheme[] =
+{
+ {32, "29CF535", 1},
+ {33, "51F666D", 1},
+ {34, "A3D73AD", 1},
+ {35, "147E5B85", 1},
+ {36, "28F725C5", 1},
+ {37, "51EE3105", 1},
+ {38, "A3DD5CDD", 1},
+ {39, "147AF833D", 1},
+ {40, "28F5DA175", 1},
+ {56, "AA7D735234C0DD", 1},
+ {64, "BAECD515DAF0B49D", 1},
+ {100, "292787EBD3329AD7E7575E2FD", 1},
+ {128, "48A74F367FA7B5C8ACBB36901308FA85", 1},
+ {156, "78A7FDDDC43611B527C3F1D760F36E5D7FC7C45", 1},
+ {196, "41BA2E104EE34C66B3520CE706A56498DE6D44721E5E24F5", 1},
+ {200, "4E5A24C38B981EAFE84CD9D0BEC48E83911362C114F30072C5", 1},
+ {256, "AF66BA932AAF58A071FD8F0742A99A0C76982D648509973DB802303128A14CB5", 1},
+ {0, NULL, 0} /* End of array. */
+};
+
+int
+gmp_randinit_lc_2exp_size (gmp_randstate_t rstate, mp_bitcnt_t size)
+{
+ const struct __gmp_rand_lc_scheme_struct *sp;
+ mpz_t a;
+
+ /* Pick a scheme. */
+ for (sp = __gmp_rand_lc_scheme; sp->m2exp != 0; sp++)
+ if (sp->m2exp / 2 >= size)
+ goto found;
+ return 0;
+
+ found:
+ /* Install scheme. */
+ mpz_init_set_str (a, sp->astr, 16);
+ gmp_randinit_lc_2exp (rstate, a, sp->c, sp->m2exp);
+ mpz_clear (a);
+ return 1;
+}
--- /dev/null
+/* Linear Congruential pseudo-random number generator functions.
+
+Copyright 1999, 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* State structure for LC, the RNG_STATE() pointer in a gmp_randstate_t.
+
+ _mp_seed holds the current seed value, in the range 0 to 2^m2exp-1.
+ SIZ(_mp_seed) is fixed at BITS_TO_LIMBS(_mp_m2exp) and the value is
+ padded with high zero limbs if necessary. ALLOC(_mp_seed) is the current
+ size of PTR(_mp_seed) in the usual way. There only needs to be
+ BITS_TO_LIMBS(_mp_m2exp) allocated, but the mpz functions in the
+ initialization and seeding end up making it a bit more than this.
+
+ _mp_a is the "a" multiplier, in the range 0 to 2^m2exp-1. SIZ(_mp_a) is
+ the size of the value in the normal way for an mpz_t, except that a value
+ of zero is held with SIZ(_mp_a)==1 and PTR(_mp_a)[0]==0. This makes it
+ easy to call mpn_mul, and the case of a==0 is highly un-random and not
+ worth any trouble to optimize.
+
+ {_cp,_cn} is the "c" addend. Normally _cn is 1, but when nails are in
+ use a ulong can be bigger than one limb, and in this case _cn is 2 if
+ necessary. c==0 is stored as _cp[0]==0 and _cn==1, which makes it easy
+ to call __GMPN_ADD. c==0 is fairly un-random so isn't worth optimizing.
+
+ _mp_m2exp gives the modulus, namely 2^m2exp. We demand m2exp>=1, since
+ m2exp==0 would mean no bits at all out of each iteration, which makes no
+ sense. */
+
+typedef struct {
+ mpz_t _mp_seed;
+ mpz_t _mp_a;
+ mp_size_t _cn;
+ mp_limb_t _cp[LIMBS_PER_ULONG];
+ unsigned long _mp_m2exp;
+} gmp_rand_lc_struct;
+
+
+/* lc (rp, state) -- Generate next number in LC sequence. Return the
+ number of valid bits in the result. Discards the lower half of the
+ result. */
+
+static unsigned long int
+lc (mp_ptr rp, gmp_randstate_t rstate)
+{
+ mp_ptr tp, seedp, ap;
+ mp_size_t ta;
+ mp_size_t tn, seedn, an;
+ unsigned long int m2exp;
+ unsigned long int bits;
+ int cy;
+ mp_size_t xn;
+ gmp_rand_lc_struct *p;
+ TMP_DECL;
+
+ p = (gmp_rand_lc_struct *) RNG_STATE (rstate);
+
+ m2exp = p->_mp_m2exp;
+
+ seedp = PTR (p->_mp_seed);
+ seedn = SIZ (p->_mp_seed);
+
+ ap = PTR (p->_mp_a);
+ an = SIZ (p->_mp_a);
+
+ /* Allocate temporary storage. Let there be room for calculation of
+ (A * seed + C) % M, or M if bigger than that. */
+
+ TMP_MARK;
+
+ ta = an + seedn + 1;
+ tn = BITS_TO_LIMBS (m2exp);
+ if (ta <= tn) /* that is, if (ta < tn + 1) */
+ {
+ mp_size_t tmp = an + seedn;
+ ta = tn + 1;
+ tp = TMP_ALLOC_LIMBS (ta);
+ MPN_ZERO (&tp[tmp], ta - tmp); /* mpn_mul won't zero it out. */
+ }
+ else
+ tp = TMP_ALLOC_LIMBS (ta);
+
+ /* t = a * seed. NOTE: an is always > 0; see initialization. */
+ ASSERT (seedn >= an && an > 0);
+ mpn_mul (tp, seedp, seedn, ap, an);
+
+ /* t = t + c. NOTE: tn is always >= p->_cn (precondition for __GMPN_ADD);
+ see initialization. */
+ ASSERT (tn >= p->_cn);
+ __GMPN_ADD (cy, tp, tp, tn, p->_cp, p->_cn);
+
+ /* t = t % m */
+ tp[m2exp / GMP_NUMB_BITS] &= (CNST_LIMB (1) << m2exp % GMP_NUMB_BITS) - 1;
+
+ /* Save result as next seed. */
+ MPN_COPY (PTR (p->_mp_seed), tp, tn);
+
+ /* Discard the lower m2exp/2 of the result. */
+ bits = m2exp / 2;
+ xn = bits / GMP_NUMB_BITS;
+
+ tn -= xn;
+ if (tn > 0)
+ {
+ unsigned int cnt = bits % GMP_NUMB_BITS;
+ if (cnt != 0)
+ {
+ mpn_rshift (tp, tp + xn, tn, cnt);
+ MPN_COPY_INCR (rp, tp, xn + 1);
+ }
+ else /* Even limb boundary. */
+ MPN_COPY_INCR (rp, tp + xn, tn);
+ }
+
+ TMP_FREE;
+
+ /* Return number of valid bits in the result. */
+ return (m2exp + 1) / 2;
+}
+
+
+/* Obtain a sequence of random numbers. */
+static void
+randget_lc (gmp_randstate_t rstate, mp_ptr rp, unsigned long int nbits)
+{
+ unsigned long int rbitpos;
+ int chunk_nbits;
+ mp_ptr tp;
+ mp_size_t tn;
+ gmp_rand_lc_struct *p;
+ TMP_DECL;
+
+ p = (gmp_rand_lc_struct *) RNG_STATE (rstate);
+
+ TMP_MARK;
+
+ chunk_nbits = p->_mp_m2exp / 2;
+ tn = BITS_TO_LIMBS (chunk_nbits);
+
+ tp = TMP_ALLOC_LIMBS (tn);
+
+ rbitpos = 0;
+ while (rbitpos + chunk_nbits <= nbits)
+ {
+ mp_ptr r2p = rp + rbitpos / GMP_NUMB_BITS;
+
+ if (rbitpos % GMP_NUMB_BITS != 0)
+ {
+ mp_limb_t savelimb, rcy;
+ /* Target of new chunk is not bit aligned. Use temp space
+ and align things by shifting it up. */
+ lc (tp, rstate);
+ savelimb = r2p[0];
+ rcy = mpn_lshift (r2p, tp, tn, rbitpos % GMP_NUMB_BITS);
+ r2p[0] |= savelimb;
+ /* bogus */
+ if ((chunk_nbits % GMP_NUMB_BITS + rbitpos % GMP_NUMB_BITS)
+ > GMP_NUMB_BITS)
+ r2p[tn] = rcy;
+ }
+ else
+ {
+ /* Target of new chunk is bit aligned. Let `lc' put bits
+ directly into our target variable. */
+ lc (r2p, rstate);
+ }
+ rbitpos += chunk_nbits;
+ }
+
+ /* Handle last [0..chunk_nbits) bits. */
+ if (rbitpos != nbits)
+ {
+ mp_ptr r2p = rp + rbitpos / GMP_NUMB_BITS;
+ int last_nbits = nbits - rbitpos;
+ tn = BITS_TO_LIMBS (last_nbits);
+ lc (tp, rstate);
+ if (rbitpos % GMP_NUMB_BITS != 0)
+ {
+ mp_limb_t savelimb, rcy;
+ /* Target of new chunk is not bit aligned. Use temp space
+ and align things by shifting it up. */
+ savelimb = r2p[0];
+ rcy = mpn_lshift (r2p, tp, tn, rbitpos % GMP_NUMB_BITS);
+ r2p[0] |= savelimb;
+ if (rbitpos + tn * GMP_NUMB_BITS - rbitpos % GMP_NUMB_BITS < nbits)
+ r2p[tn] = rcy;
+ }
+ else
+ {
+ MPN_COPY (r2p, tp, tn);
+ }
+ /* Mask off top bits if needed. */
+ if (nbits % GMP_NUMB_BITS != 0)
+ rp[nbits / GMP_NUMB_BITS]
+ &= ~(~CNST_LIMB (0) << nbits % GMP_NUMB_BITS);
+ }
+
+ TMP_FREE;
+}
+
+
+static void
+randseed_lc (gmp_randstate_t rstate, mpz_srcptr seed)
+{
+ gmp_rand_lc_struct *p = (gmp_rand_lc_struct *) RNG_STATE (rstate);
+ mpz_ptr seedz = p->_mp_seed;
+ mp_size_t seedn = BITS_TO_LIMBS (p->_mp_m2exp);
+
+ /* Store p->_mp_seed as an unnormalized integer with size enough
+ for numbers up to 2^m2exp-1. That size can't be zero. */
+ mpz_fdiv_r_2exp (seedz, seed, p->_mp_m2exp);
+ MPN_ZERO (&PTR (seedz)[SIZ (seedz)], seedn - SIZ (seedz));
+ SIZ (seedz) = seedn;
+}
+
+
+static void
+randclear_lc (gmp_randstate_t rstate)
+{
+ gmp_rand_lc_struct *p = (gmp_rand_lc_struct *) RNG_STATE (rstate);
+
+ mpz_clear (p->_mp_seed);
+ mpz_clear (p->_mp_a);
+ (*__gmp_free_func) (p, sizeof (gmp_rand_lc_struct));
+}
+
+static void randiset_lc (gmp_randstate_ptr, gmp_randstate_srcptr);
+
+static const gmp_randfnptr_t Linear_Congruential_Generator = {
+ randseed_lc,
+ randget_lc,
+ randclear_lc,
+ randiset_lc
+};
+
+static void
+randiset_lc (gmp_randstate_ptr dst, gmp_randstate_srcptr src)
+{
+ gmp_rand_lc_struct *dstp, *srcp;
+
+ srcp = (gmp_rand_lc_struct *) RNG_STATE (src);
+ dstp = (gmp_rand_lc_struct *) (*__gmp_allocate_func) (sizeof (gmp_rand_lc_struct));
+
+ RNG_STATE (dst) = (mp_limb_t *) (void *) dstp;
+ RNG_FNPTR (dst) = (void *) &Linear_Congruential_Generator;
+
+ /* _mp_seed and _mp_a might be unnormalized (high zero limbs), but
+ mpz_init_set won't worry about that */
+ mpz_init_set (dstp->_mp_seed, srcp->_mp_seed);
+ mpz_init_set (dstp->_mp_a, srcp->_mp_a);
+
+ dstp->_cn = srcp->_cn;
+
+ dstp->_cp[0] = srcp->_cp[0];
+ if (LIMBS_PER_ULONG > 1)
+ dstp->_cp[1] = srcp->_cp[1];
+ if (LIMBS_PER_ULONG > 2) /* usually there's only 1 or 2 */
+ MPN_COPY (dstp->_cp + 2, srcp->_cp + 2, LIMBS_PER_ULONG - 2);
+
+ dstp->_mp_m2exp = srcp->_mp_m2exp;
+}
+
+
+void
+gmp_randinit_lc_2exp (gmp_randstate_t rstate,
+ mpz_srcptr a,
+ unsigned long int c,
+ mp_bitcnt_t m2exp)
+{
+ gmp_rand_lc_struct *p;
+ mp_size_t seedn = BITS_TO_LIMBS (m2exp);
+
+ ASSERT_ALWAYS (m2exp != 0);
+
+ p = __GMP_ALLOCATE_FUNC_TYPE (1, gmp_rand_lc_struct);
+ RNG_STATE (rstate) = (mp_limb_t *) (void *) p;
+ RNG_FNPTR (rstate) = (void *) &Linear_Congruential_Generator;
+
+ /* allocate m2exp bits of space for p->_mp_seed, and initial seed "1" */
+ mpz_init2 (p->_mp_seed, m2exp);
+ MPN_ZERO (PTR (p->_mp_seed), seedn);
+ SIZ (p->_mp_seed) = seedn;
+ PTR (p->_mp_seed)[0] = 1;
+
+ /* "a", forced to 0 to 2^m2exp-1 */
+ mpz_init (p->_mp_a);
+ mpz_fdiv_r_2exp (p->_mp_a, a, m2exp);
+
+ /* Avoid SIZ(a) == 0 to avoid checking for special case in lc(). */
+ if (SIZ (p->_mp_a) == 0)
+ {
+ SIZ (p->_mp_a) = 1;
+ PTR (p->_mp_a)[0] = CNST_LIMB (0);
+ }
+
+ MPN_SET_UI (p->_cp, p->_cn, c);
+
+ /* Internally we may discard any bits of c above m2exp. The following
+ code ensures that __GMPN_ADD in lc() will always work. */
+ if (seedn < p->_cn)
+ p->_cn = (p->_cp[0] != 0);
+
+ p->_mp_m2exp = m2exp;
+}
--- /dev/null
+/* Mersenne Twister pseudo-random number generator functions.
+
+ THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
+ CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
+ FUTURE GNU MP RELEASES.
+
+Copyright 2002, 2003, 2006 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include <stdio.h> /* for NULL */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "randmt.h"
+
+
+/* This code implements the Mersenne Twister pseudorandom number generator
+ by Takuji Nishimura and Makoto Matsumoto. The buffer initialization
+ function is different in order to permit seeds greater than 2^32-1.
+
+ This file contains a special __gmp_randinit_mt_noseed which excludes the
+ seeding function from the gmp_randfnptr_t routines. This is for use by
+ mpn_random and mpn_random2 on the global random generator. MT seeding
+ uses mpz functions, and we don't want mpn routines dragging mpz functions
+ into the link. */
+
+
+/* Default seed to use when the generator is not initialized. */
+#define DEFAULT_SEED 5489 /* was 4357 */
+
+/* Tempering masks. */
+#define MASK_1 0x9D2C5680
+#define MASK_2 0xEFC60000
+
+/* Initial state of buffer when initialized with default seed. */
+static const gmp_uint_least32_t default_state[N] =
+{
+ 0xD247B233,0x9E5AA8F1,0x0FFA981B,0x9DCB0980,0x74200F2B,0xA576D044,
+ 0xE9F05ADF,0x1538BFF5,0x59818BBF,0xCF9E58D8,0x09FCE032,0x6A1C663F,
+ 0x5116E78A,0x69B3E0FA,0x6D92D665,0xD0A8BE98,0xF669B734,0x41AC1B68,
+ 0x630423F1,0x4B8D6B8A,0xC2C46DD7,0x5680747D,0x43703E8F,0x3B6103D2,
+ 0x49E5EB3F,0xCBDAB4C1,0x9C988E23,0x747BEE0B,0x9111E329,0x9F031B5A,
+ 0xECCA71B9,0x2AFE4EF8,0x8421C7ED,0xAC89AFF1,0xAED90DF3,0x2DD74F01,
+ 0x14906A13,0x75873FA9,0xFF83F877,0x5028A0C9,0x11B4C41D,0x7CAEDBC4,
+ 0x8672D0A7,0x48A7C109,0x8320E59F,0xBC0B3D5F,0x75A30886,0xF9E0D128,
+ 0x41AF7580,0x239BB94D,0xC67A3C81,0x74EEBD6E,0xBC02B53C,0x727EA449,
+ 0x6B8A2806,0x5853B0DA,0xBDE032F4,0xCE234885,0x320D6145,0x48CC053F,
+ 0x00DBC4D2,0xD55A2397,0xE1059B6F,0x1C3E05D1,0x09657C64,0xD07CB661,
+ 0x6E982E34,0x6DD1D777,0xEDED1071,0xD79DFD65,0xF816DDCE,0xB6FAF1E4,
+ 0x1C771074,0x311835BD,0x18F952F7,0xF8F40350,0x4ECED354,0x7C8AC12B,
+ 0x31A9994D,0x4FD47747,0xDC227A23,0x6DFAFDDF,0x6796E748,0x0C6F634F,
+ 0xF992FA1D,0x4CF670C9,0x067DFD31,0xA7A3E1A5,0x8CD7D9DF,0x972CCB34,
+ 0x67C82156,0xD548F6A8,0x045CEC21,0xF3240BFB,0xDEF656A7,0x43DE08C5,
+ 0xDAD1F92F,0x3726C56B,0x1409F19A,0x942FD147,0xB926749C,0xADDC31B8,
+ 0x53D0D869,0xD1BA52FE,0x6722DF8C,0x22D95A74,0x7DC1B52A,0x1DEC6FD5,
+ 0x7262874D,0x0A725DC9,0xE6A8193D,0xA052835A,0xDC9AD928,0xE59EBB90,
+ 0x70DBA9FF,0xD612749D,0x5A5A638C,0x6086EC37,0x2A579709,0x1449EA3A,
+ 0xBC8E3C06,0x2F900666,0xFBE74FD1,0x6B35B911,0xF8335008,0xEF1E979D,
+ 0x738AB29D,0xA2DC0FDC,0x7696305D,0xF5429DAC,0x8C41813B,0x8073E02E,
+ 0xBEF83CCD,0x7B50A95A,0x05EE5862,0x00829ECE,0x8CA1958C,0xBE4EA2E2,
+ 0x4293BB73,0x656F7B23,0x417316D8,0x4467D7CF,0x2200E63B,0x109050C8,
+ 0x814CBE47,0x36B1D4A8,0x36AF9305,0x308327B3,0xEBCD7344,0xA738DE27,
+ 0x5A10C399,0x4142371D,0x64A18528,0x0B31E8B2,0x641057B9,0x6AFC363B,
+ 0x108AD953,0x9D4DA234,0x0C2D9159,0x1C8A1A1F,0x310C66BA,0x87AA1070,
+ 0xDAC832FF,0x0A433422,0x7AF15812,0x2D8D9BD0,0x995A25E9,0x25326CAC,
+ 0xA34384DB,0x4C8421CC,0x4F0315EC,0x29E8649E,0xA7732D6F,0x2E94D3E3,
+ 0x7D98A340,0x397C4D74,0x659DB4DE,0x747D4E9A,0xD9DB8435,0x4659DBE9,
+ 0x313E6DC5,0x29D104DC,0x9F226CBA,0x452F18B0,0xD0BC5068,0x844CA299,
+ 0x782B294E,0x4AE2EB7B,0xA4C475F8,0x70A81311,0x4B3E8BCC,0x7E20D4BA,
+ 0xABCA33C9,0x57BE2960,0x44F9B419,0x2E567746,0x72EB757A,0x102CC0E8,
+ 0xB07F32B9,0xD0DABD59,0xBA85AD6B,0xF3E20667,0x98D77D81,0x197AFA47,
+ 0x518EE9AC,0xE10CE5A2,0x01CF2C2A,0xD3A3AF3D,0x16DDFD65,0x669232F8,
+ 0x1C50A301,0xB93D9151,0x9354D3F4,0x847D79D0,0xD5FE2EC6,0x1F7B0610,
+ 0xFA6B90A5,0xC5879041,0x2E7DC05E,0x423F1F32,0xEF623DDB,0x49C13280,
+ 0x98714E92,0xC7B6E4AD,0xC4318466,0x0737F312,0x4D3C003F,0x9ACC1F1F,
+ 0x5F1C926D,0x085FA771,0x185A83A2,0xF9AA159D,0x0B0B0132,0xF98E7A43,
+ 0xCD9EBDBE,0x0190CB29,0x10D93FB6,0x3B8A4D97,0x66A65A41,0xE43E766F,
+ 0x77BE3C41,0xB9686364,0xCB36994D,0x6846A287,0x567E77F7,0x36178DD8,
+ 0xBDE6B1F2,0xB6EFDC64,0x82950324,0x42053F47,0xC09BE51C,0x0942D762,
+ 0x35F92C7F,0x367DEC61,0x6EE3D983,0xDBAAF78A,0x265D2C47,0x8EB4BF5C,
+ 0x33B232D7,0xB0137E77,0x373C39A7,0x8D2B2E76,0xC7510F01,0x50F9E032,
+ 0x7B1FDDDB,0x724C2AAE,0xB10ECB31,0xCCA3D1B8,0x7F0BCF10,0x4254BBBD,
+ 0xE3F93B97,0x2305039B,0x53120E22,0x1A2F3B9A,0x0FDDBD97,0x0118561E,
+ 0x0A798E13,0x9E0B3ACD,0xDB6C9F15,0xF512D0A2,0x9E8C3A28,0xEE2184AE,
+ 0x0051EC2F,0x2432F74F,0xB0AA66EA,0x55128D88,0xF7D83A38,0x4DAE8E82,
+ 0x3FDC98D6,0x5F0BD341,0x7244BE1D,0xC7B48E78,0x2D473053,0x43892E20,
+ 0xBA0F1F2A,0x524D4895,0x2E10BCB1,0x4C372D81,0x5C3E50CD,0xCF61CC2E,
+ 0x931709AB,0x81B3AEFC,0x39E9405E,0x7FFE108C,0x4FBB3FF8,0x06ABE450,
+ 0x7F5BF51E,0xA4E3CDFD,0xDB0F6C6F,0x159A1227,0x3B9FED55,0xD20B6F7F,
+ 0xFBE9CC83,0x64856619,0xBF52B8AF,0x9D7006B0,0x71165BC6,0xAE324AEE,
+ 0x29D27F2C,0x794C2086,0x74445CE2,0x782915CC,0xD4CE6886,0x3289AE7C,
+ 0x53DEF297,0x4185F7ED,0x88B72400,0x3C09DC11,0xBCE3AAB6,0x6A75934A,
+ 0xB267E399,0x000DF1BF,0x193BA5E2,0xFA3E1977,0x179E14F6,0x1EEDE298,
+ 0x691F0B06,0xB84F78AC,0xC1C15316,0xFFFF3AD6,0x0B457383,0x518CD612,
+ 0x05A00F3E,0xD5B7D275,0x4C5ECCD7,0xE02CD0BE,0x5558E9F2,0x0C89BBF0,
+ 0xA3D96227,0x2832D2B2,0xF667B897,0xD4556554,0xF9D2F01F,0xFA1E3FAE,
+ 0x52C2E1EE,0xE5451F31,0x7E849729,0xDABDB67A,0x54BF5E7E,0xF831C271,
+ 0x5F1A17E3,0x9D140AFE,0x92741C47,0x48CFABCE,0x9CBBE477,0x9C3EE57F,
+ 0xB07D4C39,0xCC21BCE2,0x697708B1,0x58DA2A6B,0x2370DB16,0x6E641948,
+ 0xACC5BD52,0x868F24CC,0xCA1DB0F5,0x4CADA492,0x3F443E54,0xC4A4D5E9,
+ 0xF00AD670,0xE93C86E0,0xFE90651A,0xDDE532A3,0xA66458DF,0xAB7D7151,
+ 0x0E2E775F,0xC9109F99,0x8D96D59F,0x73CEF14C,0xC74E88E9,0x02712DC0,
+ 0x04F41735,0x2E5914A2,0x59F4B2FB,0x0287FC83,0x80BC0343,0xF6B32559,
+ 0xC74178D4,0xF1D99123,0x383CCC07,0xACC0637D,0x0863A548,0xA6FCAC85,
+ 0x2A13EFF0,0xAF2EEDB1,0x41E72750,0xE0C6B342,0x5DA22B46,0x635559E0,
+ 0xD2EA40AC,0x10AA98C0,0x19096497,0x112C542B,0x2C85040C,0xA868E7D0,
+ 0x6E260188,0xF596D390,0xC3BB5D7A,0x7A2AA937,0xDFD15032,0x6780AE3B,
+ 0xDB5F9CD8,0x8BD266B0,0x7744AF12,0xB463B1B0,0x589629C9,0xE30DBC6E,
+ 0x880F5569,0x209E6E16,0x9DECA50C,0x02987A57,0xBED3EA57,0xD3A678AA,
+ 0x70DD030D,0x0CFD9C5D,0x92A18E99,0xF5740619,0x7F6F0A7D,0x134CAF9A,
+ 0x70F5BAE4,0x23DCA7B5,0x4D788FCD,0xC7F07847,0xBCF77DA1,0x9071D568,
+ 0xFC627EA1,0xAE004B77,0x66B54BCB,0x7EF2DAAC,0xDCD5AC30,0xB9BDF730,
+ 0x505A97A7,0x9D881FD3,0xADB796CC,0x94A1D202,0x97535D7F,0x31EC20C0,
+ 0xB1887A98,0xC1475069,0xA6F73AF3,0x71E4E067,0x46A569DE,0xD2ADE430,
+ 0x6F0762C7,0xF50876F4,0x53510542,0x03741C3E,0x53502224,0xD8E54D60,
+ 0x3C44AB1A,0x34972B46,0x74BFA89D,0xD7D768E0,0x37E605DC,0xE13D1BDF,
+ 0x5051C421,0xB9E057BE,0xB717A14C,0xA1730C43,0xB99638BE,0xB5D5F36D,
+ 0xE960D9EA,0x6B1388D3,0xECB6D3B6,0xBDBE8B83,0x2E29AFC5,0x764D71EC,
+ 0x4B8F4F43,0xC21DDC00,0xA63F657F,0x82678130,0xDBF535AC,0xA594FC58,
+ 0x942686BC,0xBD9B657B,0x4A0F9B61,0x44FF184F,0x38E10A2F,0x61910626,
+ 0x5E247636,0x7106D137,0xC62802F0,0xBD1D1F00,0x7CC0DCB2,0xED634909,
+ 0xDC13B24E,0x9799C499,0xD77E3D6A,0x14773B68,0x967A4FB7,0x35EECFB1,
+ 0x2A5110B8,0xE2F0AF94,0x9D09DEA5,0x20255D27,0x5771D34B,0xE1089EE4,
+ 0x246F330B,0x8F7CAEE5,0xD3064712,0x75CAFBEE,0xB94F7028,0xED953666,
+ 0x5D1975B4,0x5AF81271,0x13BE2025,0x85194659,0x30805331,0xEC9D46C0,
+ 0xBC027C36,0x2AF84188,0xC2141B80,0xC02B1E4A,0x04D36177,0xFC50E9D7,
+ 0x39CE79DA,0x917E0A00,0xEF7A0BF4,0xA98BD8D1,0x19424DD2,0x9439DF1F,
+ 0xC42AF746,0xADDBE83E,0x85221F0D,0x45563E90,0x9095EC52,0x77887B25,
+ 0x8AE46064,0xBD43B71A,0xBB541956,0x7366CF9D,0xEE8E1737,0xB5A727C9,
+ 0x5076B3E7,0xFC70BACA,0xCE135B75,0xC4E91AA3,0xF0341911,0x53430C3F,
+ 0x886B0824,0x6BB5B8B7,0x33E21254,0xF193B456,0x5B09617F,0x215FFF50,
+ 0x48D97EF1,0x356479AB,0x6EA9DDC4,0x0D352746,0xA2F5CE43,0xB226A1B3,
+ 0x1329EA3C,0x7A337CC2,0xB5CCE13D,0x563E3B5B,0x534E8E8F,0x561399C9,
+ 0xE1596392,0xB0F03125,0x4586645B,0x1F371847,0x94EAABD1,0x41F97EDD,
+ 0xE3E5A39B,0x71C774E2,0x507296F4,0x5960133B,0x7852C494,0x3F5B2691,
+ 0xA3F87774,0x5A7AF89E,0x17DA3F28,0xE9D9516D,0xFCC1C1D5,0xE4618628,
+ 0x04081047,0xD8E4DB5F,0xDC380416,0x8C4933E2,0x95074D53,0xB1B0032D,
+ 0xCC8102EA,0x71641243,0x98D6EB6A,0x90FEC945,0xA0914345,0x6FAB037D,
+ 0x70F49C4D,0x05BF5B0E,0x927AAF7F,0xA1940F61,0xFEE0756F,0xF815369F,
+ 0x5C00253B,0xF2B9762F,0x4AEB3CCC,0x1069F386,0xFBA4E7B9,0x70332665,
+ 0x6BCA810E,0x85AB8058,0xAE4B2B2F,0x9D120712,0xBEE8EACB,0x776A1112
+};
+
+void
+__gmp_mt_recalc_buffer (gmp_uint_least32_t mt[])
+{
+ gmp_uint_least32_t y;
+ int kk;
+
+ for (kk = 0; kk < N - M; kk++)
+ {
+ y = (mt[kk] & 0x80000000) | (mt[kk + 1] & 0x7FFFFFFF);
+ mt[kk] = mt[kk + M] ^ (y >> 1) ^ ((y & 0x01) != 0 ? MATRIX_A : 0);
+ }
+ for (; kk < N - 1; kk++)
+ {
+ y = (mt[kk] & 0x80000000) | (mt[kk + 1] & 0x7FFFFFFF);
+ mt[kk] = mt[kk - (N - M)] ^ (y >> 1) ^ ((y & 0x01) != 0 ? MATRIX_A : 0);
+ }
+
+ y = (mt[N - 1] & 0x80000000) | (mt[0] & 0x7FFFFFFF);
+ mt[N - 1] = mt[M - 1] ^ (y >> 1) ^ ((y & 0x01) != 0 ? MATRIX_A : 0);
+}
+
+
+/* Get nbits bits of output from the generator into dest.
+ Note that Mersenne Twister is designed to produce outputs in
+ 32-bit words. */
+void
+__gmp_randget_mt (gmp_randstate_t rstate, mp_ptr dest, unsigned long int nbits)
+{
+ gmp_uint_least32_t y;
+ int rbits;
+ mp_size_t i;
+ mp_size_t nlimbs;
+ int *pmti;
+ gmp_uint_least32_t *mt;
+
+ pmti = &((gmp_rand_mt_struct *) RNG_STATE (rstate))->mti;
+ mt = ((gmp_rand_mt_struct *) RNG_STATE (rstate))->mt;
+
+ nlimbs = nbits / GMP_NUMB_BITS;
+ rbits = nbits % GMP_NUMB_BITS;
+
+#define NEXT_RANDOM \
+ do \
+ { \
+ if (*pmti >= N) \
+ { \
+ __gmp_mt_recalc_buffer (mt); \
+ *pmti = 0; \
+ } \
+ y = mt[(*pmti)++]; \
+ y ^= (y >> 11); \
+ y ^= (y << 7) & MASK_1; \
+ y ^= (y << 15) & MASK_2; \
+ y ^= (y >> 18); \
+ } \
+ while (0)
+
+
+ /* Handle the common cases of 32- or 64-bit limbs with fast,
+ optimized routines, and the rest of cases with a general
+ routine. In all cases, no more than 31 bits are rejected
+ for the last limb so that every version of the code is
+ consistent with the others. */
+
+#if (GMP_NUMB_BITS == 32)
+
+ for (i = 0; i < nlimbs; i++)
+ {
+ NEXT_RANDOM;
+ dest[i] = (mp_limb_t) y;
+ }
+ if (rbits)
+ {
+ NEXT_RANDOM;
+ dest[nlimbs] = (mp_limb_t) (y & ~(ULONG_MAX << rbits));
+ }
+
+#else /* GMP_NUMB_BITS != 32 */
+#if (GMP_NUMB_BITS == 64)
+
+ for (i = 0; i < nlimbs; i++)
+ {
+ NEXT_RANDOM;
+ dest[i] = (mp_limb_t) y;
+ NEXT_RANDOM;
+ dest[i] |= (mp_limb_t) y << 32;
+ }
+ if (rbits)
+ {
+ if (rbits < 32)
+ {
+ NEXT_RANDOM;
+ dest[nlimbs] = (mp_limb_t) (y & ~(ULONG_MAX << rbits));
+ }
+ else
+ {
+ NEXT_RANDOM;
+ dest[nlimbs] = (mp_limb_t) y;
+ if (rbits > 32)
+ {
+ NEXT_RANDOM;
+ dest[nlimbs] |=
+ ((mp_limb_t) (y & ~(ULONG_MAX << (rbits-32)))) << 32;
+ }
+ }
+ }
+
+#else /* GMP_NUMB_BITS != 64 */
+
+ {
+ /* Fall back to a general algorithm. This algorithm works by
+ keeping a pool of up to 64 bits (2 outputs from MT) acting
+ as a shift register from which bits are consumed as needed.
+ Bits are consumed using the LSB bits of bitpool_l, and
+ inserted via bitpool_h and shifted to the right place. */
+
+ gmp_uint_least32_t bitpool_h = 0;
+ gmp_uint_least32_t bitpool_l = 0;
+ int bits_in_pool = 0; /* Holds number of valid bits in the pool. */
+ int bits_to_fill; /* Holds total number of bits to put in
+ destination. */
+ int bitidx; /* Holds the destination bit position. */
+ mp_size_t nlimbs2; /* Number of whole+partial limbs to fill. */
+
+ nlimbs2 = nlimbs + (rbits != 0);
+
+ for (i = 0; i < nlimbs2; i++)
+ {
+ bitidx = 0;
+ if (i < nlimbs)
+ bits_to_fill = GMP_NUMB_BITS;
+ else
+ bits_to_fill = rbits;
+
+ dest[i] = CNST_LIMB (0);
+ while (bits_to_fill >= 32) /* Process whole 32-bit blocks first. */
+ {
+ if (bits_in_pool < 32) /* Need more bits. */
+ {
+ /* 64-bit right shift. */
+ NEXT_RANDOM;
+ bitpool_h = y;
+ bitpool_l |= (bitpool_h << bits_in_pool) & 0xFFFFFFFF;
+ if (bits_in_pool == 0)
+ bitpool_h = 0;
+ else
+ bitpool_h >>= 32 - bits_in_pool;
+ bits_in_pool += 32; /* We've got 32 more bits. */
+ }
+
+ /* Fill a 32-bit chunk. */
+ dest[i] |= ((mp_limb_t) bitpool_l) << bitidx;
+ bitpool_l = bitpool_h;
+ bits_in_pool -= 32;
+ bits_to_fill -= 32;
+ bitidx += 32;
+ }
+
+ /* Cover the case where GMP_NUMB_BITS is not a multiple of 32. */
+ if (bits_to_fill != 0)
+ {
+ if (bits_in_pool < bits_to_fill)
+ {
+ NEXT_RANDOM;
+ bitpool_h = y;
+ bitpool_l |= (bitpool_h << bits_in_pool) & 0xFFFFFFFF;
+ if (bits_in_pool == 0)
+ bitpool_h = 0;
+ else
+ bitpool_h >>= 32 - bits_in_pool;
+ bits_in_pool += 32;
+ }
+
+ dest[i] |= (((mp_limb_t) bitpool_l
+ & ~(~CNST_LIMB (0) << bits_to_fill))
+ << bitidx);
+ bitpool_l = ((bitpool_l >> bits_to_fill)
+ | (bitpool_h << (32 - bits_to_fill))) & 0xFFFFFFFF;
+ bitpool_h >>= bits_to_fill;
+ bits_in_pool -= bits_to_fill;
+ }
+ }
+ }
+
+#endif /* GMP_NUMB_BITS != 64 */
+#endif /* GMP_NUMB_BITS != 32 */
+}
+
+void
+__gmp_randclear_mt (gmp_randstate_t rstate)
+{
+ (*__gmp_free_func) ((void *) RNG_STATE (rstate),
+ ALLOC (rstate->_mp_seed) * BYTES_PER_MP_LIMB);
+}
+
+void __gmp_randiset_mt (gmp_randstate_ptr, gmp_randstate_srcptr);
+
+static const gmp_randfnptr_t Mersenne_Twister_Generator_Noseed = {
+ NULL,
+ __gmp_randget_mt,
+ __gmp_randclear_mt,
+ __gmp_randiset_mt
+};
+
+void
+__gmp_randiset_mt (gmp_randstate_ptr dst, gmp_randstate_srcptr src)
+{
+ const mp_size_t sz = ((sizeof (gmp_rand_mt_struct) - 1) / BYTES_PER_MP_LIMB) + 1;
+ gmp_rand_mt_struct *dstp, *srcp;
+ mp_size_t i;
+
+ /* Set the generator functions. */
+ RNG_FNPTR (dst) = (void *) &Mersenne_Twister_Generator_Noseed;
+
+ /* Allocate the MT-specific state. */
+ dstp = (gmp_rand_mt_struct *) __GMP_ALLOCATE_FUNC_LIMBS (sz);
+ RNG_STATE (dst) = (mp_ptr) dstp;
+ ALLOC (dst->_mp_seed) = sz; /* Initialize alloc field to placate Camm. */
+
+ /* Copy state. */
+ srcp = (gmp_rand_mt_struct *) RNG_STATE (src);
+ for (i = 0; i < N; i++)
+ dstp->mt[i] = srcp->mt[i];
+
+ dstp->mti = srcp->mti;
+}
+
+void
+__gmp_randinit_mt_noseed (gmp_randstate_ptr dst)
+{
+ const mp_size_t sz = ((sizeof (gmp_rand_mt_struct) - 1) / BYTES_PER_MP_LIMB) + 1;
+ gmp_rand_mt_struct *dstp;
+ mp_size_t i;
+
+ /* Set the generator functions. */
+ RNG_FNPTR (dst) = (void *) &Mersenne_Twister_Generator_Noseed;
+
+ /* Allocate the MT-specific state. */
+ dstp = (gmp_rand_mt_struct *) __GMP_ALLOCATE_FUNC_LIMBS (sz);
+ RNG_STATE (dst) = (mp_ptr) dstp;
+ ALLOC (dst->_mp_seed) = sz; /* Initialize alloc field to placate Camm. */
+
+ /* Set state for default seed. */
+ for (i = 0; i < N; i++)
+ dstp->mt[i] = default_state[i];
+
+ dstp->mti = WARM_UP % N;
+}
--- /dev/null
+/* Mersenne Twister pseudo-random number generator defines.
+
+Copyright 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+
+/* Number of extractions used to warm the buffer up. */
+#define WARM_UP 2000
+
+/* Period parameters. */
+#define N 624
+#define M 397
+#define MATRIX_A 0x9908B0DF /* Constant vector a. */
+
+/* State structure for MT. */
+typedef struct
+{
+ gmp_uint_least32_t mt[N]; /* State array. */
+ int mti; /* Index of current value. */
+} gmp_rand_mt_struct;
+
+
+void __gmp_mt_recalc_buffer (gmp_uint_least32_t *);
+void __gmp_randget_mt (gmp_randstate_t, mp_ptr, unsigned long int);
+void __gmp_randclear_mt (gmp_randstate_t);
+void __gmp_randiset_mt (gmp_randstate_ptr, gmp_randstate_srcptr);
--- /dev/null
+/* Mersenne Twister pseudo-random number generator functions.
+
+Copyright 2002, 2003 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "randmt.h"
+
+
+/* Calculate (b^e) mod (2^n-k) for e=1074888996, n=19937 and k=20023,
+ needed by the seeding function below. */
+static void
+mangle_seed (mpz_ptr r, mpz_srcptr b_orig)
+{
+ mpz_t t, b;
+ unsigned long e = 0x40118124;
+ unsigned long bit = 0x20000000;
+
+ mpz_init (t);
+ mpz_init_set (b, b_orig); /* in case r==b_orig */
+
+ mpz_set (r, b);
+ do
+ {
+ mpz_mul (r, r, r);
+
+ reduce:
+ for (;;)
+ {
+ mpz_tdiv_q_2exp (t, r, 19937L);
+ if (mpz_sgn (t) == 0)
+ break;
+ mpz_tdiv_r_2exp (r, r, 19937L);
+ mpz_addmul_ui (r, t, 20023L);
+ }
+
+ if ((e & bit) != 0)
+ {
+ e &= ~bit;
+ mpz_mul (r, r, b);
+ goto reduce;
+ }
+
+ bit >>= 1;
+ }
+ while (bit != 0);
+
+ mpz_clear (t);
+ mpz_clear (b);
+}
+
+
+/* Seeding function. Uses powering modulo a non-Mersenne prime to obtain
+ a permutation of the input seed space. The modulus is 2^19937-20023,
+ which is probably prime. The power is 1074888996. In order to avoid
+ seeds 0 and 1 generating invalid or strange output, the input seed is
+ first manipulated as follows:
+
+ seed1 = seed mod (2^19937-20027) + 2
+
+ so that seed1 lies between 2 and 2^19937-20026 inclusive. Then the
+ powering is performed as follows:
+
+ seed2 = (seed1^1074888996) mod (2^19937-20023)
+
+ and then seed2 is used to bootstrap the buffer.
+
+ This method aims to give guarantees that:
+ a) seed2 will never be zero,
+ b) seed2 will very seldom have a very low population of ones in its
+ binary representation, and
+ c) every seed between 0 and 2^19937-20028 (inclusive) will yield a
+ different sequence.
+
+ CAVEATS:
+
+ The period of the seeding function is 2^19937-20027. This means that
+ with seeds 2^19937-20027, 2^19937-20026, ... the exact same sequences
+ are obtained as with seeds 0, 1, etc.; it also means that seed -1
+ produces the same sequence as seed 2^19937-20028, etc.
+ */
+
+static void
+randseed_mt (gmp_randstate_t rstate, mpz_srcptr seed)
+{
+ int i;
+ size_t cnt;
+
+ gmp_rand_mt_struct *p;
+ mpz_t mod; /* Modulus. */
+ mpz_t seed1; /* Intermediate result. */
+
+ p = (gmp_rand_mt_struct *) RNG_STATE (rstate);
+
+ mpz_init (mod);
+ mpz_init (seed1);
+
+ mpz_set_ui (mod, 0L);
+ mpz_setbit (mod, 19937L);
+ mpz_sub_ui (mod, mod, 20027L);
+ mpz_mod (seed1, seed, mod); /* Reduce `seed' modulo `mod'. */
+ mpz_add_ui (seed1, seed1, 2L); /* seed1 is now ready. */
+ mangle_seed (seed1, seed1); /* Perform the mangling by powering. */
+
+ /* Copy the last bit into bit 31 of mt[0] and clear it. */
+ p->mt[0] = (mpz_tstbit (seed1, 19936L) != 0) ? 0x80000000 : 0;
+ mpz_clrbit (seed1, 19936L);
+
+ /* Split seed1 into N-1 32-bit chunks. */
+ mpz_export (&p->mt[1], &cnt, -1, sizeof (p->mt[1]), 0,
+ 8 * sizeof (p->mt[1]) - 32, seed1);
+ cnt++;
+ ASSERT (cnt <= N);
+ while (cnt < N)
+ p->mt[cnt++] = 0;
+
+ mpz_clear (mod);
+ mpz_clear (seed1);
+
+ /* Warm the generator up if necessary. */
+ if (WARM_UP != 0)
+ for (i = 0; i < WARM_UP / N; i++)
+ __gmp_mt_recalc_buffer (p->mt);
+
+ p->mti = WARM_UP % N;
+}
+
+
+static const gmp_randfnptr_t Mersenne_Twister_Generator = {
+ randseed_mt,
+ __gmp_randget_mt,
+ __gmp_randclear_mt,
+ __gmp_randiset_mt
+};
+
+/* Initialize MT-specific data. */
+void
+gmp_randinit_mt (gmp_randstate_t rstate)
+{
+ __gmp_randinit_mt_noseed (rstate);
+ RNG_FNPTR (rstate) = (void *) &Mersenne_Twister_Generator;
+}
--- /dev/null
+/* gmp_urandomm_ui -- uniform random number 0 to N-1 for ulong N.
+
+Copyright 2003, 2004 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+
+
+/* If n is a power of 2 then the test ret<n is always true and the loop is
+ unnecessary, but there's no need to add special code for this. Just get
+ the "bits" calculation correct and let it go through normally.
+
+ If n is 1 then will have bits==0 and _gmp_rand will produce no output and
+ we always return 0. Again there seems no need for a special case, just
+ initialize a[0]=0 and let it go through normally. */
+
+#define MAX_URANDOMM_ITER 80
+
+unsigned long
+gmp_urandomm_ui (gmp_randstate_ptr rstate, unsigned long n)
+{
+ mp_limb_t a[LIMBS_PER_ULONG];
+ unsigned long ret, bits, leading;
+ int i;
+
+ if (UNLIKELY (n == 0))
+ DIVIDE_BY_ZERO;
+
+ /* start with zeros, since if bits==0 then _gmp_rand will store nothing at
+ all (bits==0 arises when n==1), or if bits <= GMP_NUMB_BITS then it
+ will store only a[0]. */
+ a[0] = 0;
+#if LIMBS_PER_ULONG > 1
+ a[1] = 0;
+#endif
+
+ count_leading_zeros (leading, (mp_limb_t) n);
+ bits = GMP_LIMB_BITS - leading - (POW2_P(n) != 0);
+
+ for (i = 0; i < MAX_URANDOMM_ITER; i++)
+ {
+ _gmp_rand (a, rstate, bits);
+#if LIMBS_PER_ULONG == 1
+ ret = a[0];
+#else
+ ret = a[0] | (a[1] << GMP_NUMB_BITS);
+#endif
+ if (LIKELY (ret < n)) /* usually one iteration suffices */
+ goto done;
+ }
+
+ /* Too many iterations, there must be something degenerate about the
+ rstate algorithm. Return r%n. */
+ ret -= n;
+ ASSERT (ret < n);
+
+ done:
+ return ret;
+}
--- /dev/null
+/* __gmp_rands -- global random state for old-style random functions.
+
+ EVERYTHING IN THIS FILE IS FOR INTERNAL USE ONLY. IT'S ALMOST CERTAIN TO
+ BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN FUTURE GNU
+ MP RELEASES. */
+
+/*
+Copyright 2001 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+
+/* Use this via the RANDS macro in gmp-impl.h */
+char __gmp_rands_initialized = 0;
+gmp_randstate_t __gmp_rands;
--- /dev/null
+/* gmp_randseed (state, seed) -- Set initial seed SEED in random state STATE.
+
+Copyright 2000, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+gmp_randseed (gmp_randstate_t rstate,
+ mpz_srcptr seed)
+{
+ (*((gmp_randfnptr_t *) RNG_FNPTR (rstate))->randseed_fn) (rstate, seed);
+}
--- /dev/null
+/* gmp_randseed_ui (state, seed) -- Set initial seed SEED in random
+ state STATE.
+
+Copyright 2000, 2002 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+void
+gmp_randseed_ui (gmp_randstate_t rstate,
+ unsigned long int seed)
+{
+ mpz_t zseed;
+ mp_limb_t zlimbs[LIMBS_PER_ULONG];
+
+ MPZ_FAKE_UI (zseed, zlimbs, seed);
+ gmp_randseed (rstate, zseed);
+}
+++ /dev/null
-/* gmp_urandomb_ui -- random bits returned in a ulong.
-
-Copyright 2003, 2004 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-/* Currently bits>=BITS_PER_ULONG is quietly truncated to BITS_PER_ULONG,
- maybe this should raise an exception or something. */
-
-unsigned long
-gmp_urandomb_ui (gmp_randstate_ptr rstate, unsigned long bits)
-{
- mp_limb_t a[LIMBS_PER_ULONG];
-
- /* start with zeros, since if bits==0 then _gmp_rand will store nothing at
- all, or if bits <= GMP_NUMB_BITS then it will store only a[0] */
- a[0] = 0;
-#if LIMBS_PER_ULONG > 1
- a[1] = 0;
-#endif
-
- _gmp_rand (a, rstate, MIN (bits, BITS_PER_ULONG));
-
-#if LIMBS_PER_ULONG == 1
- return a[0];
-#else
- return a[0] | (a[1] << GMP_NUMB_BITS);
-#endif
-}
+++ /dev/null
-/* gmp_randclear (state) -- Clear and deallocate random state STATE.
-
-Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-gmp_randclear (gmp_randstate_t rstate)
-{
- (*((gmp_randfnptr_t *) RNG_FNPTR (rstate))->randclear_fn) (rstate);
-}
+++ /dev/null
-/* gmp_randinit_default -- initialize a random state with a default algorithm.
-
-Copyright 2001, 2002 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-gmp_randinit_default (gmp_randstate_t rstate)
-{
- gmp_randinit_mt (rstate);
-}
+++ /dev/null
-/* gmp_randinit_set -- initialize with a copy of another gmp_randstate_t.
-
-Copyright 2003 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-void
-gmp_randinit_set (gmp_randstate_ptr dst, gmp_randstate_srcptr src)
-{
- (*((gmp_randfnptr_t *) RNG_FNPTR (src))->randiset_fn) (dst, src);
-}
+++ /dev/null
-/* gmp_randinit_lc_2exp_size -- initialize a random state with a linear
- congruential generator of a requested size.
-
-Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include <stdio.h> /* for NULL */
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-/* Array of LC-schemes, ordered in increasing order of the first
- member (the 'm2exp' value). The end of the array is indicated with
- an entry containing all zeros. */
-
-/* All multipliers are in the range 0.01*m and 0.99*m, and are
-congruent to 5 (mod 8).
-They all pass the spectral test with Vt >= 2^(30/t) and merit >= 1.
-(Up to and including 196 bits, merit is >= 3.) */
-
-struct __gmp_rand_lc_scheme_struct
-{
- unsigned long int m2exp; /* Modulus is 2 ^ m2exp. */
- const char *astr; /* Multiplier in string form. */
- unsigned long int c; /* Addend. */
-};
-
-static const struct __gmp_rand_lc_scheme_struct __gmp_rand_lc_scheme[] =
-{
- {32, "29CF535", 1},
- {33, "51F666D", 1},
- {34, "A3D73AD", 1},
- {35, "147E5B85", 1},
- {36, "28F725C5", 1},
- {37, "51EE3105", 1},
- {38, "A3DD5CDD", 1},
- {39, "147AF833D", 1},
- {40, "28F5DA175", 1},
- {56, "AA7D735234C0DD", 1},
- {64, "BAECD515DAF0B49D", 1},
- {100, "292787EBD3329AD7E7575E2FD", 1},
- {128, "48A74F367FA7B5C8ACBB36901308FA85", 1},
- {156, "78A7FDDDC43611B527C3F1D760F36E5D7FC7C45", 1},
- {196, "41BA2E104EE34C66B3520CE706A56498DE6D44721E5E24F5", 1},
- {200, "4E5A24C38B981EAFE84CD9D0BEC48E83911362C114F30072C5", 1},
- {256, "AF66BA932AAF58A071FD8F0742A99A0C76982D648509973DB802303128A14CB5", 1},
- {0, NULL, 0} /* End of array. */
-};
-
-int
-gmp_randinit_lc_2exp_size (gmp_randstate_t rstate, mp_bitcnt_t size)
-{
- const struct __gmp_rand_lc_scheme_struct *sp;
- mpz_t a;
-
- /* Pick a scheme. */
- for (sp = __gmp_rand_lc_scheme; sp->m2exp != 0; sp++)
- if (sp->m2exp / 2 >= size)
- goto found;
- return 0;
-
- found:
- /* Install scheme. */
- mpz_init_set_str (a, sp->astr, 16);
- gmp_randinit_lc_2exp (rstate, a, sp->c, sp->m2exp);
- mpz_clear (a);
- return 1;
-}
+++ /dev/null
-/* Linear Congruential pseudo-random number generator functions.
-
-Copyright 1999, 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-/* State structure for LC, the RNG_STATE() pointer in a gmp_randstate_t.
-
- _mp_seed holds the current seed value, in the range 0 to 2^m2exp-1.
- SIZ(_mp_seed) is fixed at BITS_TO_LIMBS(_mp_m2exp) and the value is
- padded with high zero limbs if necessary. ALLOC(_mp_seed) is the current
- size of PTR(_mp_seed) in the usual way. There only needs to be
- BITS_TO_LIMBS(_mp_m2exp) allocated, but the mpz functions in the
- initialization and seeding end up making it a bit more than this.
-
- _mp_a is the "a" multiplier, in the range 0 to 2^m2exp-1. SIZ(_mp_a) is
- the size of the value in the normal way for an mpz_t, except that a value
- of zero is held with SIZ(_mp_a)==1 and PTR(_mp_a)[0]==0. This makes it
- easy to call mpn_mul, and the case of a==0 is highly un-random and not
- worth any trouble to optimize.
-
- {_cp,_cn} is the "c" addend. Normally _cn is 1, but when nails are in
- use a ulong can be bigger than one limb, and in this case _cn is 2 if
- necessary. c==0 is stored as _cp[0]==0 and _cn==1, which makes it easy
- to call __GMPN_ADD. c==0 is fairly un-random so isn't worth optimizing.
-
- _mp_m2exp gives the modulus, namely 2^m2exp. We demand m2exp>=1, since
- m2exp==0 would mean no bits at all out of each iteration, which makes no
- sense. */
-
-typedef struct {
- mpz_t _mp_seed;
- mpz_t _mp_a;
- mp_size_t _cn;
- mp_limb_t _cp[LIMBS_PER_ULONG];
- unsigned long _mp_m2exp;
-} gmp_rand_lc_struct;
-
-
-/* lc (rp, state) -- Generate next number in LC sequence. Return the
- number of valid bits in the result. Discards the lower half of the
- result. */
-
-static unsigned long int
-lc (mp_ptr rp, gmp_randstate_t rstate)
-{
- mp_ptr tp, seedp, ap;
- mp_size_t ta;
- mp_size_t tn, seedn, an;
- unsigned long int m2exp;
- unsigned long int bits;
- int cy;
- mp_size_t xn;
- gmp_rand_lc_struct *p;
- TMP_DECL;
-
- p = (gmp_rand_lc_struct *) RNG_STATE (rstate);
-
- m2exp = p->_mp_m2exp;
-
- seedp = PTR (p->_mp_seed);
- seedn = SIZ (p->_mp_seed);
-
- ap = PTR (p->_mp_a);
- an = SIZ (p->_mp_a);
-
- /* Allocate temporary storage. Let there be room for calculation of
- (A * seed + C) % M, or M if bigger than that. */
-
- TMP_MARK;
-
- ta = an + seedn + 1;
- tn = BITS_TO_LIMBS (m2exp);
- if (ta <= tn) /* that is, if (ta < tn + 1) */
- {
- mp_size_t tmp = an + seedn;
- ta = tn + 1;
- tp = TMP_ALLOC_LIMBS (ta);
- MPN_ZERO (&tp[tmp], ta - tmp); /* mpn_mul won't zero it out. */
- }
- else
- tp = TMP_ALLOC_LIMBS (ta);
-
- /* t = a * seed. NOTE: an is always > 0; see initialization. */
- ASSERT (seedn >= an && an > 0);
- mpn_mul (tp, seedp, seedn, ap, an);
-
- /* t = t + c. NOTE: tn is always >= p->_cn (precondition for __GMPN_ADD);
- see initialization. */
- ASSERT (tn >= p->_cn);
- __GMPN_ADD (cy, tp, tp, tn, p->_cp, p->_cn);
-
- /* t = t % m */
- tp[m2exp / GMP_NUMB_BITS] &= (CNST_LIMB (1) << m2exp % GMP_NUMB_BITS) - 1;
-
- /* Save result as next seed. */
- MPN_COPY (PTR (p->_mp_seed), tp, tn);
-
- /* Discard the lower m2exp/2 of the result. */
- bits = m2exp / 2;
- xn = bits / GMP_NUMB_BITS;
-
- tn -= xn;
- if (tn > 0)
- {
- unsigned int cnt = bits % GMP_NUMB_BITS;
- if (cnt != 0)
- {
- mpn_rshift (tp, tp + xn, tn, cnt);
- MPN_COPY_INCR (rp, tp, xn + 1);
- }
- else /* Even limb boundary. */
- MPN_COPY_INCR (rp, tp + xn, tn);
- }
-
- TMP_FREE;
-
- /* Return number of valid bits in the result. */
- return (m2exp + 1) / 2;
-}
-
-
-/* Obtain a sequence of random numbers. */
-static void
-randget_lc (gmp_randstate_t rstate, mp_ptr rp, unsigned long int nbits)
-{
- unsigned long int rbitpos;
- int chunk_nbits;
- mp_ptr tp;
- mp_size_t tn;
- gmp_rand_lc_struct *p;
- TMP_DECL;
-
- p = (gmp_rand_lc_struct *) RNG_STATE (rstate);
-
- TMP_MARK;
-
- chunk_nbits = p->_mp_m2exp / 2;
- tn = BITS_TO_LIMBS (chunk_nbits);
-
- tp = TMP_ALLOC_LIMBS (tn);
-
- rbitpos = 0;
- while (rbitpos + chunk_nbits <= nbits)
- {
- mp_ptr r2p = rp + rbitpos / GMP_NUMB_BITS;
-
- if (rbitpos % GMP_NUMB_BITS != 0)
- {
- mp_limb_t savelimb, rcy;
- /* Target of new chunk is not bit aligned. Use temp space
- and align things by shifting it up. */
- lc (tp, rstate);
- savelimb = r2p[0];
- rcy = mpn_lshift (r2p, tp, tn, rbitpos % GMP_NUMB_BITS);
- r2p[0] |= savelimb;
- /* bogus */
- if ((chunk_nbits % GMP_NUMB_BITS + rbitpos % GMP_NUMB_BITS)
- > GMP_NUMB_BITS)
- r2p[tn] = rcy;
- }
- else
- {
- /* Target of new chunk is bit aligned. Let `lc' put bits
- directly into our target variable. */
- lc (r2p, rstate);
- }
- rbitpos += chunk_nbits;
- }
-
- /* Handle last [0..chunk_nbits) bits. */
- if (rbitpos != nbits)
- {
- mp_ptr r2p = rp + rbitpos / GMP_NUMB_BITS;
- int last_nbits = nbits - rbitpos;
- tn = BITS_TO_LIMBS (last_nbits);
- lc (tp, rstate);
- if (rbitpos % GMP_NUMB_BITS != 0)
- {
- mp_limb_t savelimb, rcy;
- /* Target of new chunk is not bit aligned. Use temp space
- and align things by shifting it up. */
- savelimb = r2p[0];
- rcy = mpn_lshift (r2p, tp, tn, rbitpos % GMP_NUMB_BITS);
- r2p[0] |= savelimb;
- if (rbitpos + tn * GMP_NUMB_BITS - rbitpos % GMP_NUMB_BITS < nbits)
- r2p[tn] = rcy;
- }
- else
- {
- MPN_COPY (r2p, tp, tn);
- }
- /* Mask off top bits if needed. */
- if (nbits % GMP_NUMB_BITS != 0)
- rp[nbits / GMP_NUMB_BITS]
- &= ~(~CNST_LIMB (0) << nbits % GMP_NUMB_BITS);
- }
-
- TMP_FREE;
-}
-
-
-static void
-randseed_lc (gmp_randstate_t rstate, mpz_srcptr seed)
-{
- gmp_rand_lc_struct *p = (gmp_rand_lc_struct *) RNG_STATE (rstate);
- mpz_ptr seedz = p->_mp_seed;
- mp_size_t seedn = BITS_TO_LIMBS (p->_mp_m2exp);
-
- /* Store p->_mp_seed as an unnormalized integer with size enough
- for numbers up to 2^m2exp-1. That size can't be zero. */
- mpz_fdiv_r_2exp (seedz, seed, p->_mp_m2exp);
- MPN_ZERO (&PTR (seedz)[SIZ (seedz)], seedn - SIZ (seedz));
- SIZ (seedz) = seedn;
-}
-
-
-static void
-randclear_lc (gmp_randstate_t rstate)
-{
- gmp_rand_lc_struct *p = (gmp_rand_lc_struct *) RNG_STATE (rstate);
-
- mpz_clear (p->_mp_seed);
- mpz_clear (p->_mp_a);
- (*__gmp_free_func) (p, sizeof (gmp_rand_lc_struct));
-}
-
-static void randiset_lc __GMP_PROTO ((gmp_randstate_ptr dst, gmp_randstate_srcptr src));
-
-static const gmp_randfnptr_t Linear_Congruential_Generator = {
- randseed_lc,
- randget_lc,
- randclear_lc,
- randiset_lc
-};
-
-static void
-randiset_lc (gmp_randstate_ptr dst, gmp_randstate_srcptr src)
-{
- gmp_rand_lc_struct *dstp, *srcp;
-
- srcp = (gmp_rand_lc_struct *) RNG_STATE (src);
- dstp = (*__gmp_allocate_func) (sizeof (gmp_rand_lc_struct));
-
- RNG_STATE (dst) = (void *) dstp;
- RNG_FNPTR (dst) = (void *) &Linear_Congruential_Generator;
-
- /* _mp_seed and _mp_a might be unnormalized (high zero limbs), but
- mpz_init_set won't worry about that */
- mpz_init_set (dstp->_mp_seed, srcp->_mp_seed);
- mpz_init_set (dstp->_mp_a, srcp->_mp_a);
-
- dstp->_cn = srcp->_cn;
-
- dstp->_cp[0] = srcp->_cp[0];
- if (LIMBS_PER_ULONG > 1)
- dstp->_cp[1] = srcp->_cp[1];
- if (LIMBS_PER_ULONG > 2) /* usually there's only 1 or 2 */
- MPN_COPY (dstp->_cp + 2, srcp->_cp + 2, LIMBS_PER_ULONG - 2);
-
- dstp->_mp_m2exp = srcp->_mp_m2exp;
-}
-
-
-void
-gmp_randinit_lc_2exp (gmp_randstate_t rstate,
- mpz_srcptr a,
- unsigned long int c,
- mp_bitcnt_t m2exp)
-{
- gmp_rand_lc_struct *p;
- mp_size_t seedn = BITS_TO_LIMBS (m2exp);
-
- ASSERT_ALWAYS (m2exp != 0);
-
- p = __GMP_ALLOCATE_FUNC_TYPE (1, gmp_rand_lc_struct);
- RNG_STATE (rstate) = (void *) p;
- RNG_FNPTR (rstate) = (void *) &Linear_Congruential_Generator;
-
- /* allocate m2exp bits of space for p->_mp_seed, and initial seed "1" */
- mpz_init2 (p->_mp_seed, m2exp);
- MPN_ZERO (PTR (p->_mp_seed), seedn);
- SIZ (p->_mp_seed) = seedn;
- PTR (p->_mp_seed)[0] = 1;
-
- /* "a", forced to 0 to 2^m2exp-1 */
- mpz_init (p->_mp_a);
- mpz_fdiv_r_2exp (p->_mp_a, a, m2exp);
-
- /* Avoid SIZ(a) == 0 to avoid checking for special case in lc(). */
- if (SIZ (p->_mp_a) == 0)
- {
- SIZ (p->_mp_a) = 1;
- PTR (p->_mp_a)[0] = CNST_LIMB (0);
- }
-
- MPN_SET_UI (p->_cp, p->_cn, c);
-
- /* Internally we may discard any bits of c above m2exp. The following
- code ensures that __GMPN_ADD in lc() will always work. */
- if (seedn < p->_cn)
- p->_cn = (p->_cp[0] != 0);
-
- p->_mp_m2exp = m2exp;
-}
+++ /dev/null
-/* Mersenne Twister pseudo-random number generator functions.
-
- THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
- CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
- FUTURE GNU MP RELEASES.
-
-Copyright 2002, 2003, 2006 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include <stdio.h> /* for NULL */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "randmt.h"
-
-
-/* This code implements the Mersenne Twister pseudorandom number generator
- by Takuji Nishimura and Makoto Matsumoto. The buffer initialization
- function is different in order to permit seeds greater than 2^32-1.
-
- This file contains a special __gmp_randinit_mt_noseed which excludes the
- seeding function from the gmp_randfnptr_t routines. This is for use by
- mpn_random and mpn_random2 on the global random generator. MT seeding
- uses mpz functions, and we don't want mpn routines dragging mpz functions
- into the link. */
-
-
-/* Default seed to use when the generator is not initialized. */
-#define DEFAULT_SEED 5489 /* was 4357 */
-
-/* Tempering masks. */
-#define MASK_1 0x9D2C5680
-#define MASK_2 0xEFC60000
-
-/* Initial state of buffer when initialized with default seed. */
-static const gmp_uint_least32_t default_state[N] =
-{
- 0xD247B233,0x9E5AA8F1,0x0FFA981B,0x9DCB0980,0x74200F2B,0xA576D044,
- 0xE9F05ADF,0x1538BFF5,0x59818BBF,0xCF9E58D8,0x09FCE032,0x6A1C663F,
- 0x5116E78A,0x69B3E0FA,0x6D92D665,0xD0A8BE98,0xF669B734,0x41AC1B68,
- 0x630423F1,0x4B8D6B8A,0xC2C46DD7,0x5680747D,0x43703E8F,0x3B6103D2,
- 0x49E5EB3F,0xCBDAB4C1,0x9C988E23,0x747BEE0B,0x9111E329,0x9F031B5A,
- 0xECCA71B9,0x2AFE4EF8,0x8421C7ED,0xAC89AFF1,0xAED90DF3,0x2DD74F01,
- 0x14906A13,0x75873FA9,0xFF83F877,0x5028A0C9,0x11B4C41D,0x7CAEDBC4,
- 0x8672D0A7,0x48A7C109,0x8320E59F,0xBC0B3D5F,0x75A30886,0xF9E0D128,
- 0x41AF7580,0x239BB94D,0xC67A3C81,0x74EEBD6E,0xBC02B53C,0x727EA449,
- 0x6B8A2806,0x5853B0DA,0xBDE032F4,0xCE234885,0x320D6145,0x48CC053F,
- 0x00DBC4D2,0xD55A2397,0xE1059B6F,0x1C3E05D1,0x09657C64,0xD07CB661,
- 0x6E982E34,0x6DD1D777,0xEDED1071,0xD79DFD65,0xF816DDCE,0xB6FAF1E4,
- 0x1C771074,0x311835BD,0x18F952F7,0xF8F40350,0x4ECED354,0x7C8AC12B,
- 0x31A9994D,0x4FD47747,0xDC227A23,0x6DFAFDDF,0x6796E748,0x0C6F634F,
- 0xF992FA1D,0x4CF670C9,0x067DFD31,0xA7A3E1A5,0x8CD7D9DF,0x972CCB34,
- 0x67C82156,0xD548F6A8,0x045CEC21,0xF3240BFB,0xDEF656A7,0x43DE08C5,
- 0xDAD1F92F,0x3726C56B,0x1409F19A,0x942FD147,0xB926749C,0xADDC31B8,
- 0x53D0D869,0xD1BA52FE,0x6722DF8C,0x22D95A74,0x7DC1B52A,0x1DEC6FD5,
- 0x7262874D,0x0A725DC9,0xE6A8193D,0xA052835A,0xDC9AD928,0xE59EBB90,
- 0x70DBA9FF,0xD612749D,0x5A5A638C,0x6086EC37,0x2A579709,0x1449EA3A,
- 0xBC8E3C06,0x2F900666,0xFBE74FD1,0x6B35B911,0xF8335008,0xEF1E979D,
- 0x738AB29D,0xA2DC0FDC,0x7696305D,0xF5429DAC,0x8C41813B,0x8073E02E,
- 0xBEF83CCD,0x7B50A95A,0x05EE5862,0x00829ECE,0x8CA1958C,0xBE4EA2E2,
- 0x4293BB73,0x656F7B23,0x417316D8,0x4467D7CF,0x2200E63B,0x109050C8,
- 0x814CBE47,0x36B1D4A8,0x36AF9305,0x308327B3,0xEBCD7344,0xA738DE27,
- 0x5A10C399,0x4142371D,0x64A18528,0x0B31E8B2,0x641057B9,0x6AFC363B,
- 0x108AD953,0x9D4DA234,0x0C2D9159,0x1C8A1A1F,0x310C66BA,0x87AA1070,
- 0xDAC832FF,0x0A433422,0x7AF15812,0x2D8D9BD0,0x995A25E9,0x25326CAC,
- 0xA34384DB,0x4C8421CC,0x4F0315EC,0x29E8649E,0xA7732D6F,0x2E94D3E3,
- 0x7D98A340,0x397C4D74,0x659DB4DE,0x747D4E9A,0xD9DB8435,0x4659DBE9,
- 0x313E6DC5,0x29D104DC,0x9F226CBA,0x452F18B0,0xD0BC5068,0x844CA299,
- 0x782B294E,0x4AE2EB7B,0xA4C475F8,0x70A81311,0x4B3E8BCC,0x7E20D4BA,
- 0xABCA33C9,0x57BE2960,0x44F9B419,0x2E567746,0x72EB757A,0x102CC0E8,
- 0xB07F32B9,0xD0DABD59,0xBA85AD6B,0xF3E20667,0x98D77D81,0x197AFA47,
- 0x518EE9AC,0xE10CE5A2,0x01CF2C2A,0xD3A3AF3D,0x16DDFD65,0x669232F8,
- 0x1C50A301,0xB93D9151,0x9354D3F4,0x847D79D0,0xD5FE2EC6,0x1F7B0610,
- 0xFA6B90A5,0xC5879041,0x2E7DC05E,0x423F1F32,0xEF623DDB,0x49C13280,
- 0x98714E92,0xC7B6E4AD,0xC4318466,0x0737F312,0x4D3C003F,0x9ACC1F1F,
- 0x5F1C926D,0x085FA771,0x185A83A2,0xF9AA159D,0x0B0B0132,0xF98E7A43,
- 0xCD9EBDBE,0x0190CB29,0x10D93FB6,0x3B8A4D97,0x66A65A41,0xE43E766F,
- 0x77BE3C41,0xB9686364,0xCB36994D,0x6846A287,0x567E77F7,0x36178DD8,
- 0xBDE6B1F2,0xB6EFDC64,0x82950324,0x42053F47,0xC09BE51C,0x0942D762,
- 0x35F92C7F,0x367DEC61,0x6EE3D983,0xDBAAF78A,0x265D2C47,0x8EB4BF5C,
- 0x33B232D7,0xB0137E77,0x373C39A7,0x8D2B2E76,0xC7510F01,0x50F9E032,
- 0x7B1FDDDB,0x724C2AAE,0xB10ECB31,0xCCA3D1B8,0x7F0BCF10,0x4254BBBD,
- 0xE3F93B97,0x2305039B,0x53120E22,0x1A2F3B9A,0x0FDDBD97,0x0118561E,
- 0x0A798E13,0x9E0B3ACD,0xDB6C9F15,0xF512D0A2,0x9E8C3A28,0xEE2184AE,
- 0x0051EC2F,0x2432F74F,0xB0AA66EA,0x55128D88,0xF7D83A38,0x4DAE8E82,
- 0x3FDC98D6,0x5F0BD341,0x7244BE1D,0xC7B48E78,0x2D473053,0x43892E20,
- 0xBA0F1F2A,0x524D4895,0x2E10BCB1,0x4C372D81,0x5C3E50CD,0xCF61CC2E,
- 0x931709AB,0x81B3AEFC,0x39E9405E,0x7FFE108C,0x4FBB3FF8,0x06ABE450,
- 0x7F5BF51E,0xA4E3CDFD,0xDB0F6C6F,0x159A1227,0x3B9FED55,0xD20B6F7F,
- 0xFBE9CC83,0x64856619,0xBF52B8AF,0x9D7006B0,0x71165BC6,0xAE324AEE,
- 0x29D27F2C,0x794C2086,0x74445CE2,0x782915CC,0xD4CE6886,0x3289AE7C,
- 0x53DEF297,0x4185F7ED,0x88B72400,0x3C09DC11,0xBCE3AAB6,0x6A75934A,
- 0xB267E399,0x000DF1BF,0x193BA5E2,0xFA3E1977,0x179E14F6,0x1EEDE298,
- 0x691F0B06,0xB84F78AC,0xC1C15316,0xFFFF3AD6,0x0B457383,0x518CD612,
- 0x05A00F3E,0xD5B7D275,0x4C5ECCD7,0xE02CD0BE,0x5558E9F2,0x0C89BBF0,
- 0xA3D96227,0x2832D2B2,0xF667B897,0xD4556554,0xF9D2F01F,0xFA1E3FAE,
- 0x52C2E1EE,0xE5451F31,0x7E849729,0xDABDB67A,0x54BF5E7E,0xF831C271,
- 0x5F1A17E3,0x9D140AFE,0x92741C47,0x48CFABCE,0x9CBBE477,0x9C3EE57F,
- 0xB07D4C39,0xCC21BCE2,0x697708B1,0x58DA2A6B,0x2370DB16,0x6E641948,
- 0xACC5BD52,0x868F24CC,0xCA1DB0F5,0x4CADA492,0x3F443E54,0xC4A4D5E9,
- 0xF00AD670,0xE93C86E0,0xFE90651A,0xDDE532A3,0xA66458DF,0xAB7D7151,
- 0x0E2E775F,0xC9109F99,0x8D96D59F,0x73CEF14C,0xC74E88E9,0x02712DC0,
- 0x04F41735,0x2E5914A2,0x59F4B2FB,0x0287FC83,0x80BC0343,0xF6B32559,
- 0xC74178D4,0xF1D99123,0x383CCC07,0xACC0637D,0x0863A548,0xA6FCAC85,
- 0x2A13EFF0,0xAF2EEDB1,0x41E72750,0xE0C6B342,0x5DA22B46,0x635559E0,
- 0xD2EA40AC,0x10AA98C0,0x19096497,0x112C542B,0x2C85040C,0xA868E7D0,
- 0x6E260188,0xF596D390,0xC3BB5D7A,0x7A2AA937,0xDFD15032,0x6780AE3B,
- 0xDB5F9CD8,0x8BD266B0,0x7744AF12,0xB463B1B0,0x589629C9,0xE30DBC6E,
- 0x880F5569,0x209E6E16,0x9DECA50C,0x02987A57,0xBED3EA57,0xD3A678AA,
- 0x70DD030D,0x0CFD9C5D,0x92A18E99,0xF5740619,0x7F6F0A7D,0x134CAF9A,
- 0x70F5BAE4,0x23DCA7B5,0x4D788FCD,0xC7F07847,0xBCF77DA1,0x9071D568,
- 0xFC627EA1,0xAE004B77,0x66B54BCB,0x7EF2DAAC,0xDCD5AC30,0xB9BDF730,
- 0x505A97A7,0x9D881FD3,0xADB796CC,0x94A1D202,0x97535D7F,0x31EC20C0,
- 0xB1887A98,0xC1475069,0xA6F73AF3,0x71E4E067,0x46A569DE,0xD2ADE430,
- 0x6F0762C7,0xF50876F4,0x53510542,0x03741C3E,0x53502224,0xD8E54D60,
- 0x3C44AB1A,0x34972B46,0x74BFA89D,0xD7D768E0,0x37E605DC,0xE13D1BDF,
- 0x5051C421,0xB9E057BE,0xB717A14C,0xA1730C43,0xB99638BE,0xB5D5F36D,
- 0xE960D9EA,0x6B1388D3,0xECB6D3B6,0xBDBE8B83,0x2E29AFC5,0x764D71EC,
- 0x4B8F4F43,0xC21DDC00,0xA63F657F,0x82678130,0xDBF535AC,0xA594FC58,
- 0x942686BC,0xBD9B657B,0x4A0F9B61,0x44FF184F,0x38E10A2F,0x61910626,
- 0x5E247636,0x7106D137,0xC62802F0,0xBD1D1F00,0x7CC0DCB2,0xED634909,
- 0xDC13B24E,0x9799C499,0xD77E3D6A,0x14773B68,0x967A4FB7,0x35EECFB1,
- 0x2A5110B8,0xE2F0AF94,0x9D09DEA5,0x20255D27,0x5771D34B,0xE1089EE4,
- 0x246F330B,0x8F7CAEE5,0xD3064712,0x75CAFBEE,0xB94F7028,0xED953666,
- 0x5D1975B4,0x5AF81271,0x13BE2025,0x85194659,0x30805331,0xEC9D46C0,
- 0xBC027C36,0x2AF84188,0xC2141B80,0xC02B1E4A,0x04D36177,0xFC50E9D7,
- 0x39CE79DA,0x917E0A00,0xEF7A0BF4,0xA98BD8D1,0x19424DD2,0x9439DF1F,
- 0xC42AF746,0xADDBE83E,0x85221F0D,0x45563E90,0x9095EC52,0x77887B25,
- 0x8AE46064,0xBD43B71A,0xBB541956,0x7366CF9D,0xEE8E1737,0xB5A727C9,
- 0x5076B3E7,0xFC70BACA,0xCE135B75,0xC4E91AA3,0xF0341911,0x53430C3F,
- 0x886B0824,0x6BB5B8B7,0x33E21254,0xF193B456,0x5B09617F,0x215FFF50,
- 0x48D97EF1,0x356479AB,0x6EA9DDC4,0x0D352746,0xA2F5CE43,0xB226A1B3,
- 0x1329EA3C,0x7A337CC2,0xB5CCE13D,0x563E3B5B,0x534E8E8F,0x561399C9,
- 0xE1596392,0xB0F03125,0x4586645B,0x1F371847,0x94EAABD1,0x41F97EDD,
- 0xE3E5A39B,0x71C774E2,0x507296F4,0x5960133B,0x7852C494,0x3F5B2691,
- 0xA3F87774,0x5A7AF89E,0x17DA3F28,0xE9D9516D,0xFCC1C1D5,0xE4618628,
- 0x04081047,0xD8E4DB5F,0xDC380416,0x8C4933E2,0x95074D53,0xB1B0032D,
- 0xCC8102EA,0x71641243,0x98D6EB6A,0x90FEC945,0xA0914345,0x6FAB037D,
- 0x70F49C4D,0x05BF5B0E,0x927AAF7F,0xA1940F61,0xFEE0756F,0xF815369F,
- 0x5C00253B,0xF2B9762F,0x4AEB3CCC,0x1069F386,0xFBA4E7B9,0x70332665,
- 0x6BCA810E,0x85AB8058,0xAE4B2B2F,0x9D120712,0xBEE8EACB,0x776A1112
-};
-
-void
-__gmp_mt_recalc_buffer (gmp_uint_least32_t mt[])
-{
- gmp_uint_least32_t y;
- int kk;
-
- for (kk = 0; kk < N - M; kk++)
- {
- y = (mt[kk] & 0x80000000) | (mt[kk + 1] & 0x7FFFFFFF);
- mt[kk] = mt[kk + M] ^ (y >> 1) ^ ((y & 0x01) != 0 ? MATRIX_A : 0);
- }
- for (; kk < N - 1; kk++)
- {
- y = (mt[kk] & 0x80000000) | (mt[kk + 1] & 0x7FFFFFFF);
- mt[kk] = mt[kk - (N - M)] ^ (y >> 1) ^ ((y & 0x01) != 0 ? MATRIX_A : 0);
- }
-
- y = (mt[N - 1] & 0x80000000) | (mt[0] & 0x7FFFFFFF);
- mt[N - 1] = mt[M - 1] ^ (y >> 1) ^ ((y & 0x01) != 0 ? MATRIX_A : 0);
-}
-
-
-/* Get nbits bits of output from the generator into dest.
- Note that Mersenne Twister is designed to produce outputs in
- 32-bit words. */
-void
-__gmp_randget_mt (gmp_randstate_t rstate, mp_ptr dest, unsigned long int nbits)
-{
- gmp_uint_least32_t y;
- int rbits;
- mp_size_t i;
- mp_size_t nlimbs;
- int *pmti;
- gmp_uint_least32_t *mt;
-
- pmti = &((gmp_rand_mt_struct *) RNG_STATE (rstate))->mti;
- mt = ((gmp_rand_mt_struct *) RNG_STATE (rstate))->mt;
-
- nlimbs = nbits / GMP_NUMB_BITS;
- rbits = nbits % GMP_NUMB_BITS;
-
-#define NEXT_RANDOM \
- do \
- { \
- if (*pmti >= N) \
- { \
- __gmp_mt_recalc_buffer (mt); \
- *pmti = 0; \
- } \
- y = mt[(*pmti)++]; \
- y ^= (y >> 11); \
- y ^= (y << 7) & MASK_1; \
- y ^= (y << 15) & MASK_2; \
- y ^= (y >> 18); \
- } \
- while (0)
-
-
- /* Handle the common cases of 32- or 64-bit limbs with fast,
- optimized routines, and the rest of cases with a general
- routine. In all cases, no more than 31 bits are rejected
- for the last limb so that every version of the code is
- consistent with the others. */
-
-#if (GMP_NUMB_BITS == 32)
-
- for (i = 0; i < nlimbs; i++)
- {
- NEXT_RANDOM;
- dest[i] = (mp_limb_t) y;
- }
- if (rbits)
- {
- NEXT_RANDOM;
- dest[nlimbs] = (mp_limb_t) (y & ~(ULONG_MAX << rbits));
- }
-
-#else /* GMP_NUMB_BITS != 32 */
-#if (GMP_NUMB_BITS == 64)
-
- for (i = 0; i < nlimbs; i++)
- {
- NEXT_RANDOM;
- dest[i] = (mp_limb_t) y;
- NEXT_RANDOM;
- dest[i] |= (mp_limb_t) y << 32;
- }
- if (rbits)
- {
- if (rbits < 32)
- {
- NEXT_RANDOM;
- dest[nlimbs] = (mp_limb_t) (y & ~(ULONG_MAX << rbits));
- }
- else
- {
- NEXT_RANDOM;
- dest[nlimbs] = (mp_limb_t) y;
- if (rbits > 32)
- {
- NEXT_RANDOM;
- dest[nlimbs] |=
- ((mp_limb_t) (y & ~(ULONG_MAX << (rbits-32)))) << 32;
- }
- }
- }
-
-#else /* GMP_NUMB_BITS != 64 */
-
- {
- /* Fall back to a general algorithm. This algorithm works by
- keeping a pool of up to 64 bits (2 outputs from MT) acting
- as a shift register from which bits are consumed as needed.
- Bits are consumed using the LSB bits of bitpool_l, and
- inserted via bitpool_h and shifted to the right place. */
-
- gmp_uint_least32_t bitpool_h = 0;
- gmp_uint_least32_t bitpool_l = 0;
- int bits_in_pool = 0; /* Holds number of valid bits in the pool. */
- int bits_to_fill; /* Holds total number of bits to put in
- destination. */
- int bitidx; /* Holds the destination bit position. */
- mp_size_t nlimbs2; /* Number of whole+partial limbs to fill. */
-
- nlimbs2 = nlimbs + (rbits != 0);
-
- for (i = 0; i < nlimbs2; i++)
- {
- bitidx = 0;
- if (i < nlimbs)
- bits_to_fill = GMP_NUMB_BITS;
- else
- bits_to_fill = rbits;
-
- dest[i] = CNST_LIMB (0);
- while (bits_to_fill >= 32) /* Process whole 32-bit blocks first. */
- {
- if (bits_in_pool < 32) /* Need more bits. */
- {
- /* 64-bit right shift. */
- NEXT_RANDOM;
- bitpool_h = y;
- bitpool_l |= (bitpool_h << bits_in_pool) & 0xFFFFFFFF;
- if (bits_in_pool == 0)
- bitpool_h = 0;
- else
- bitpool_h >>= 32 - bits_in_pool;
- bits_in_pool += 32; /* We've got 32 more bits. */
- }
-
- /* Fill a 32-bit chunk. */
- dest[i] |= ((mp_limb_t) bitpool_l) << bitidx;
- bitpool_l = bitpool_h;
- bits_in_pool -= 32;
- bits_to_fill -= 32;
- bitidx += 32;
- }
-
- /* Cover the case where GMP_NUMB_BITS is not a multiple of 32. */
- if (bits_to_fill != 0)
- {
- if (bits_in_pool < bits_to_fill)
- {
- NEXT_RANDOM;
- bitpool_h = y;
- bitpool_l |= (bitpool_h << bits_in_pool) & 0xFFFFFFFF;
- if (bits_in_pool == 0)
- bitpool_h = 0;
- else
- bitpool_h >>= 32 - bits_in_pool;
- bits_in_pool += 32;
- }
-
- dest[i] |= (((mp_limb_t) bitpool_l
- & ~(~CNST_LIMB (0) << bits_to_fill))
- << bitidx);
- bitpool_l = ((bitpool_l >> bits_to_fill)
- | (bitpool_h << (32 - bits_to_fill))) & 0xFFFFFFFF;
- bitpool_h >>= bits_to_fill;
- bits_in_pool -= bits_to_fill;
- }
- }
- }
-
-#endif /* GMP_NUMB_BITS != 64 */
-#endif /* GMP_NUMB_BITS != 32 */
-}
-
-void
-__gmp_randclear_mt (gmp_randstate_t rstate)
-{
- (*__gmp_free_func) ((void *) RNG_STATE (rstate),
- ALLOC (rstate->_mp_seed) * BYTES_PER_MP_LIMB);
-}
-
-void __gmp_randiset_mt __GMP_PROTO ((gmp_randstate_ptr dst, gmp_randstate_srcptr src));
-
-static const gmp_randfnptr_t Mersenne_Twister_Generator_Noseed = {
- NULL,
- __gmp_randget_mt,
- __gmp_randclear_mt,
- __gmp_randiset_mt
-};
-
-void
-__gmp_randiset_mt (gmp_randstate_ptr dst, gmp_randstate_srcptr src)
-{
- const mp_size_t sz = ((sizeof (gmp_rand_mt_struct) - 1) / BYTES_PER_MP_LIMB) + 1;
- gmp_rand_mt_struct *dstp, *srcp;
- mp_size_t i;
-
- /* Set the generator functions. */
- RNG_FNPTR (dst) = (void *) &Mersenne_Twister_Generator_Noseed;
-
- /* Allocate the MT-specific state. */
- dstp = (gmp_rand_mt_struct *) __GMP_ALLOCATE_FUNC_LIMBS (sz);
- RNG_STATE (dst) = (mp_ptr) dstp;
- ALLOC (dst->_mp_seed) = sz; /* Initialize alloc field to placate Camm. */
-
- /* Copy state. */
- srcp = (gmp_rand_mt_struct *) RNG_STATE (src);
- for (i = 0; i < N; i++)
- dstp->mt[i] = srcp->mt[i];
-
- dstp->mti = srcp->mti;
-}
-
-void
-__gmp_randinit_mt_noseed (gmp_randstate_ptr dst)
-{
- const mp_size_t sz = ((sizeof (gmp_rand_mt_struct) - 1) / BYTES_PER_MP_LIMB) + 1;
- gmp_rand_mt_struct *dstp;
- mp_size_t i;
-
- /* Set the generator functions. */
- RNG_FNPTR (dst) = (void *) &Mersenne_Twister_Generator_Noseed;
-
- /* Allocate the MT-specific state. */
- dstp = (gmp_rand_mt_struct *) __GMP_ALLOCATE_FUNC_LIMBS (sz);
- RNG_STATE (dst) = (mp_ptr) dstp;
- ALLOC (dst->_mp_seed) = sz; /* Initialize alloc field to placate Camm. */
-
- /* Set state for default seed. */
- for (i = 0; i < N; i++)
- dstp->mt[i] = default_state[i];
-
- dstp->mti = WARM_UP % N;
-}
+++ /dev/null
-/* Mersenne Twister pseudo-random number generator defines.
-
-Copyright 2002, 2003 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-
-/* Number of extractions used to warm the buffer up. */
-#define WARM_UP 2000
-
-/* Period parameters. */
-#define N 624
-#define M 397
-#define MATRIX_A 0x9908B0DF /* Constant vector a. */
-
-/* State structure for MT. */
-typedef struct
-{
- gmp_uint_least32_t mt[N]; /* State array. */
- int mti; /* Index of current value. */
-} gmp_rand_mt_struct;
-
-
-void __gmp_mt_recalc_buffer __GMP_PROTO ((gmp_uint_least32_t *));
-void __gmp_randget_mt __GMP_PROTO ((gmp_randstate_t, mp_ptr, unsigned long int));
-void __gmp_randclear_mt __GMP_PROTO ((gmp_randstate_t rstate));
-void __gmp_randiset_mt __GMP_PROTO ((gmp_randstate_ptr, gmp_randstate_srcptr));
+++ /dev/null
-/* Mersenne Twister pseudo-random number generator functions.
-
-Copyright 2002, 2003 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "randmt.h"
-
-
-/* Calculate (b^e) mod (2^n-k) for e=1074888996, n=19937 and k=20023,
- needed by the seeding function below. */
-static void
-mangle_seed (mpz_ptr r, mpz_srcptr b_orig)
-{
- mpz_t t, b;
- unsigned long e = 0x40118124;
- unsigned long bit = 0x20000000;
-
- mpz_init (t);
- mpz_init_set (b, b_orig); /* in case r==b_orig */
-
- mpz_set (r, b);
- do
- {
- mpz_mul (r, r, r);
-
- reduce:
- for (;;)
- {
- mpz_tdiv_q_2exp (t, r, 19937L);
- if (mpz_sgn (t) == 0)
- break;
- mpz_tdiv_r_2exp (r, r, 19937L);
- mpz_addmul_ui (r, t, 20023L);
- }
-
- if ((e & bit) != 0)
- {
- e &= ~bit;
- mpz_mul (r, r, b);
- goto reduce;
- }
-
- bit >>= 1;
- }
- while (bit != 0);
-
- mpz_clear (t);
- mpz_clear (b);
-}
-
-
-/* Seeding function. Uses powering modulo a non-Mersenne prime to obtain
- a permutation of the input seed space. The modulus is 2^19937-20023,
- which is probably prime. The power is 1074888996. In order to avoid
- seeds 0 and 1 generating invalid or strange output, the input seed is
- first manipulated as follows:
-
- seed1 = seed mod (2^19937-20027) + 2
-
- so that seed1 lies between 2 and 2^19937-20026 inclusive. Then the
- powering is performed as follows:
-
- seed2 = (seed1^1074888996) mod (2^19937-20023)
-
- and then seed2 is used to bootstrap the buffer.
-
- This method aims to give guarantees that:
- a) seed2 will never be zero,
- b) seed2 will very seldom have a very low population of ones in its
- binary representation, and
- c) every seed between 0 and 2^19937-20028 (inclusive) will yield a
- different sequence.
-
- CAVEATS:
-
- The period of the seeding function is 2^19937-20027. This means that
- with seeds 2^19937-20027, 2^19937-20026, ... the exact same sequences
- are obtained as with seeds 0, 1, etc.; it also means that seed -1
- produces the same sequence as seed 2^19937-20028, etc.
- */
-
-static void
-randseed_mt (gmp_randstate_t rstate, mpz_srcptr seed)
-{
- int i;
- size_t cnt;
-
- gmp_rand_mt_struct *p;
- mpz_t mod; /* Modulus. */
- mpz_t seed1; /* Intermediate result. */
-
- p = (gmp_rand_mt_struct *) RNG_STATE (rstate);
-
- mpz_init (mod);
- mpz_init (seed1);
-
- mpz_set_ui (mod, 0L);
- mpz_setbit (mod, 19937L);
- mpz_sub_ui (mod, mod, 20027L);
- mpz_mod (seed1, seed, mod); /* Reduce `seed' modulo `mod'. */
- mpz_add_ui (seed1, seed1, 2L); /* seed1 is now ready. */
- mangle_seed (seed1, seed1); /* Perform the mangling by powering. */
-
- /* Copy the last bit into bit 31 of mt[0] and clear it. */
- p->mt[0] = (mpz_tstbit (seed1, 19936L) != 0) ? 0x80000000 : 0;
- mpz_clrbit (seed1, 19936L);
-
- /* Split seed1 into N-1 32-bit chunks. */
- mpz_export (&p->mt[1], &cnt, -1, sizeof (p->mt[1]), 0,
- 8 * sizeof (p->mt[1]) - 32, seed1);
- cnt++;
- ASSERT (cnt <= N);
- while (cnt < N)
- p->mt[cnt++] = 0;
-
- mpz_clear (mod);
- mpz_clear (seed1);
-
- /* Warm the generator up if necessary. */
- if (WARM_UP != 0)
- for (i = 0; i < WARM_UP / N; i++)
- __gmp_mt_recalc_buffer (p->mt);
-
- p->mti = WARM_UP % N;
-}
-
-
-static const gmp_randfnptr_t Mersenne_Twister_Generator = {
- randseed_mt,
- __gmp_randget_mt,
- __gmp_randclear_mt,
- __gmp_randiset_mt
-};
-
-/* Initialize MT-specific data. */
-void
-gmp_randinit_mt (gmp_randstate_t rstate)
-{
- __gmp_randinit_mt_noseed (rstate);
- RNG_FNPTR (rstate) = (void *) &Mersenne_Twister_Generator;
-}
+++ /dev/null
-/* gmp_urandomm_ui -- uniform random number 0 to N-1 for ulong N.
-
-Copyright 2003, 2004 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "longlong.h"
-
-
-/* If n is a power of 2 then the test ret<n is always true and the loop is
- unnecessary, but there's no need to add special code for this. Just get
- the "bits" calculation correct and let it go through normally.
-
- If n is 1 then will have bits==0 and _gmp_rand will produce no output and
- we always return 0. Again there seems no need for a special case, just
- initialize a[0]=0 and let it go through normally. */
-
-#define MAX_URANDOMM_ITER 80
-
-unsigned long
-gmp_urandomm_ui (gmp_randstate_ptr rstate, unsigned long n)
-{
- mp_limb_t a[LIMBS_PER_ULONG];
- unsigned long ret, bits, leading;
- int i;
-
- if (UNLIKELY (n == 0))
- DIVIDE_BY_ZERO;
-
- /* start with zeros, since if bits==0 then _gmp_rand will store nothing at
- all (bits==0 arises when n==1), or if bits <= GMP_NUMB_BITS then it
- will store only a[0]. */
- a[0] = 0;
-#if LIMBS_PER_ULONG > 1
- a[1] = 0;
-#endif
-
- count_leading_zeros (leading, (mp_limb_t) n);
- bits = GMP_LIMB_BITS - leading - (POW2_P(n) != 0);
-
- for (i = 0; i < MAX_URANDOMM_ITER; i++)
- {
- _gmp_rand (a, rstate, bits);
-#if LIMBS_PER_ULONG == 1
- ret = a[0];
-#else
- ret = a[0] | (a[1] << GMP_NUMB_BITS);
-#endif
- if (LIKELY (ret < n)) /* usually one iteration suffices */
- goto done;
- }
-
- /* Too many iterations, there must be something degenerate about the
- rstate algorithm. Return r%n. */
- ret -= n;
- ASSERT (ret < n);
-
- done:
- return ret;
-}
+++ /dev/null
-/* __gmp_rands -- global random state for old-style random functions.
-
- EVERYTHING IN THIS FILE IS FOR INTERNAL USE ONLY. IT'S ALMOST CERTAIN TO
- BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN FUTURE GNU
- MP RELEASES. */
-
-/*
-Copyright 2001 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-
-/* Use this via the RANDS macro in gmp-impl.h */
-char __gmp_rands_initialized = 0;
-gmp_randstate_t __gmp_rands;
+++ /dev/null
-/* gmp_randseed (state, seed) -- Set initial seed SEED in random state STATE.
-
-Copyright 2000, 2002 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-gmp_randseed (gmp_randstate_t rstate,
- mpz_srcptr seed)
-{
- (*((gmp_randfnptr_t *) RNG_FNPTR (rstate))->randseed_fn) (rstate, seed);
-}
+++ /dev/null
-/* gmp_randseed_ui (state, seed) -- Set initial seed SEED in random
- state STATE.
-
-Copyright 2000, 2002 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include "gmp.h"
-#include "gmp-impl.h"
-
-void
-gmp_randseed_ui (gmp_randstate_t rstate,
- unsigned long int seed)
-{
- mpz_t zseed;
- mp_limb_t zlimbs[LIMBS_PER_ULONG];
-
- MPZ_FAKE_UI (zseed, zlimbs, seed);
- gmp_randseed (rstate, zseed);
-}
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
VPATH = @srcdir@
+am__make_dryrun = \
+ { \
+ am__dry=no; \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
+ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+ *) \
+ for am__flg in $$MAKEFLAGS; do \
+ case $$am__flg in \
+ *=*|--*) ;; \
+ *n*) am__dry=yes; break;; \
+ esac; \
+ done;; \
+ esac; \
+ test $$am__dry = yes; \
+ }
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
subdir = scanf
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/configure.in
+ $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_CLEAN_VPATH_FILES =
LTLIBRARIES = $(noinst_LTLIBRARIES)
libscanf_la_LIBADD =
-am_libscanf_la_OBJECTS = doscan$U.lo fscanf$U.lo fscanffuns$U.lo \
- scanf$U.lo sscanf$U.lo sscanffuns$U.lo vfscanf$U.lo \
- vscanf$U.lo vsscanf$U.lo
+am_libscanf_la_OBJECTS = doscan.lo fscanf.lo fscanffuns.lo scanf.lo \
+ sscanf.lo sscanffuns.lo vfscanf.lo vscanf.lo vsscanf.lo
libscanf_la_OBJECTS = $(am_libscanf_la_OBJECTS)
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
depcomp =
$(LDFLAGS) -o $@
SOURCES = $(libscanf_la_SOURCES)
DIST_SOURCES = $(libscanf_la_SOURCES)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
STRIP = @STRIP@
TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
U_FOR_BUILD = @U_FOR_BUILD@
VERSION = @VERSION@
WITH_READLINE_01 = @WITH_READLINE_01@
mkdir_p = @mkdir_p@
mpn_objects = @mpn_objects@
mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-libscanf.la: $(libscanf_la_OBJECTS) $(libscanf_la_DEPENDENCIES)
+libscanf.la: $(libscanf_la_OBJECTS) $(libscanf_la_DEPENDENCIES) $(EXTRA_libscanf_la_DEPENDENCIES)
$(LINK) $(libscanf_la_OBJECTS) $(libscanf_la_LIBADD) $(LIBS)
mostlyclean-compile:
distclean-compile:
-rm -f *.tab.c
-$(top_builddir)/ansi2knr:
- $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
- -test "$U" = "" || rm -f *_.c
.c.o:
$(COMPILE) -c $<
.c.lo:
$(LTCOMPILE) -c -o $@ $<
-doscan_.c: doscan.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/doscan.c; then echo $(srcdir)/doscan.c; else echo doscan.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fscanf_.c: fscanf.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fscanf.c; then echo $(srcdir)/fscanf.c; else echo fscanf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-fscanffuns_.c: fscanffuns.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/fscanffuns.c; then echo $(srcdir)/fscanffuns.c; else echo fscanffuns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-scanf_.c: scanf.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/scanf.c; then echo $(srcdir)/scanf.c; else echo scanf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sscanf_.c: sscanf.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sscanf.c; then echo $(srcdir)/sscanf.c; else echo sscanf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sscanffuns_.c: sscanffuns.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sscanffuns.c; then echo $(srcdir)/sscanffuns.c; else echo sscanffuns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-vfscanf_.c: vfscanf.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/vfscanf.c; then echo $(srcdir)/vfscanf.c; else echo vfscanf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-vscanf_.c: vscanf.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/vscanf.c; then echo $(srcdir)/vscanf.c; else echo vscanf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-vsscanf_.c: vsscanf.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/vsscanf.c; then echo $(srcdir)/vsscanf.c; else echo vsscanf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-doscan_.$(OBJEXT) doscan_.lo fscanf_.$(OBJEXT) fscanf_.lo \
-fscanffuns_.$(OBJEXT) fscanffuns_.lo scanf_.$(OBJEXT) scanf_.lo \
-sscanf_.$(OBJEXT) sscanf_.lo sscanffuns_.$(OBJEXT) sscanffuns_.lo \
-vfscanf_.$(OBJEXT) vfscanf_.lo vscanf_.$(OBJEXT) vscanf_.lo \
-vsscanf_.$(OBJEXT) vsscanf_.lo : $(ANSI2KNR)
mostlyclean-libtool:
-rm -f *.lo
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
mostlyclean: mostlyclean-am
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
uninstall-am:
-.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+.MAKE: install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
clean-libtool clean-noinstLTLIBRARIES ctags distclean \
install-pdf install-pdf-am install-ps install-ps-am \
install-strip installcheck installcheck-am installdirs \
maintainer-clean maintainer-clean-generic mostlyclean \
- mostlyclean-compile mostlyclean-generic mostlyclean-kr \
- mostlyclean-libtool pdf pdf-am ps ps-am tags uninstall \
- uninstall-am
+ mostlyclean-compile mostlyclean-generic mostlyclean-libtool \
+ pdf pdf-am ps ps-am tags uninstall uninstall-am
# Tell versions [3.59,3.63) of GNU make to not export all variables.
/* SunOS 4 stdio.h doesn't provide prototypes for these */
#if ! HAVE_DECL_FGETC
-int fgetc __GMP_PROTO ((FILE *fp));
+int fgetc (FILE *);
#endif
#if ! HAVE_DECL_FSCANF
-int fscanf __GMP_PROTO ((FILE *fp, const char *fmt, ...));
+int fscanf (FILE *, const char *, ...);
#endif
#if ! HAVE_DECL_UNGETC
-int ungetc __GMP_PROTO ((int c, FILE *fp));
+int ungetc (int, FILE *);
#endif
#define P ((struct tmp_reentrant_t *) p)
total_size = size + HSIZ;
- p = (*__gmp_allocate_func) (total_size);
+ p = (char *) (*__gmp_allocate_func) (total_size);
P->size = total_size;
P->next = *markp;
*markp = P;
## Process this file with automake to generate Makefile.in
-# Copyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2002, 2003, 2004, 2013 Free Software Foundation, Inc.
#
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+# Public License for more details.
#
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/.
-SUBDIRS = . devel mpn mpz mpq mpf rand misc cxx mpbsd
+SUBDIRS = . devel mpn mpz mpq mpf rand misc cxx
include ../mpn/Makeasm.am
check_LTLIBRARIES = libtests.la
-EXTRA_libtests_la_SOURCES = amd64call.asm amd64check.c x86call.asm x86check.c
+EXTRA_libtests_la_SOURCES = amd64call.asm amd64check.c x86call.asm x86check.c \
+ arm32call.asm arm32check.c
libtests_la_SOURCES = tests.h \
memory.c misc.c refmpf.c refmpn.c refmpq.c refmpz.c spinner.c trace.c
libtests_la_DEPENDENCIES = @CALLING_CONVENTIONS_OBJS@
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@SET_MAKE@
-# Copyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2002, 2003, 2004, 2013 Free Software Foundation, Inc.
#
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+# Public License for more details.
#
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/.
# Copyright 1996, 1998, 1999, 2000, 2001, 2002 Free Software Foundation,
# Inc.
# You should have received a copy of the GNU Lesser General Public License
# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
VPATH = @srcdir@
+am__make_dryrun = \
+ { \
+ am__dry=no; \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
+ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+ *) \
+ for am__flg in $$MAKEFLAGS; do \
+ case $$am__flg in \
+ *=*|--*) ;; \
+ *n*) am__dry=yes; break;; \
+ esac; \
+ done;; \
+ esac; \
+ test $$am__dry = yes; \
+ }
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
DIST_COMMON = $(srcdir)/../mpn/Makeasm.am $(srcdir)/Makefile.am \
$(srcdir)/Makefile.in
check_PROGRAMS = t-bswap$(EXEEXT) t-constants$(EXEEXT) \
subdir = tests
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/configure.in
+ $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
am__DEPENDENCIES_1 =
-am_libtests_la_OBJECTS = memory$U.lo misc$U.lo refmpf$U.lo refmpn$U.lo \
- refmpq$U.lo refmpz$U.lo spinner$U.lo trace$U.lo
+am_libtests_la_OBJECTS = memory.lo misc.lo refmpf.lo refmpn.lo \
+ refmpq.lo refmpz.lo spinner.lo trace.lo
libtests_la_OBJECTS = $(am_libtests_la_OBJECTS)
t_bswap_SOURCES = t-bswap.c
-t_bswap_OBJECTS = t-bswap$U.$(OBJEXT)
+t_bswap_OBJECTS = t-bswap.$(OBJEXT)
t_bswap_LDADD = $(LDADD)
t_bswap_DEPENDENCIES = libtests.la $(top_builddir)/libgmp.la
t_constants_SOURCES = t-constants.c
-t_constants_OBJECTS = t-constants$U.$(OBJEXT)
+t_constants_OBJECTS = t-constants.$(OBJEXT)
t_constants_LDADD = $(LDADD)
t_constants_DEPENDENCIES = libtests.la $(top_builddir)/libgmp.la
t_count_zeros_SOURCES = t-count_zeros.c
-t_count_zeros_OBJECTS = t-count_zeros$U.$(OBJEXT)
+t_count_zeros_OBJECTS = t-count_zeros.$(OBJEXT)
t_count_zeros_LDADD = $(LDADD)
t_count_zeros_DEPENDENCIES = libtests.la $(top_builddir)/libgmp.la
t_gmpmax_SOURCES = t-gmpmax.c
-t_gmpmax_OBJECTS = t-gmpmax$U.$(OBJEXT)
+t_gmpmax_OBJECTS = t-gmpmax.$(OBJEXT)
t_gmpmax_LDADD = $(LDADD)
t_gmpmax_DEPENDENCIES = libtests.la $(top_builddir)/libgmp.la
t_hightomask_SOURCES = t-hightomask.c
-t_hightomask_OBJECTS = t-hightomask$U.$(OBJEXT)
+t_hightomask_OBJECTS = t-hightomask.$(OBJEXT)
t_hightomask_LDADD = $(LDADD)
t_hightomask_DEPENDENCIES = libtests.la $(top_builddir)/libgmp.la
t_modlinv_SOURCES = t-modlinv.c
-t_modlinv_OBJECTS = t-modlinv$U.$(OBJEXT)
+t_modlinv_OBJECTS = t-modlinv.$(OBJEXT)
t_modlinv_LDADD = $(LDADD)
t_modlinv_DEPENDENCIES = libtests.la $(top_builddir)/libgmp.la
t_parity_SOURCES = t-parity.c
-t_parity_OBJECTS = t-parity$U.$(OBJEXT)
+t_parity_OBJECTS = t-parity.$(OBJEXT)
t_parity_LDADD = $(LDADD)
t_parity_DEPENDENCIES = libtests.la $(top_builddir)/libgmp.la
t_popc_SOURCES = t-popc.c
-t_popc_OBJECTS = t-popc$U.$(OBJEXT)
+t_popc_OBJECTS = t-popc.$(OBJEXT)
t_popc_LDADD = $(LDADD)
t_popc_DEPENDENCIES = libtests.la $(top_builddir)/libgmp.la
t_sub_SOURCES = t-sub.c
-t_sub_OBJECTS = t-sub$U.$(OBJEXT)
+t_sub_OBJECTS = t-sub.$(OBJEXT)
t_sub_LDADD = $(LDADD)
t_sub_DEPENDENCIES = libtests.la $(top_builddir)/libgmp.la
DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
install-pdf-recursive install-ps-recursive install-recursive \
installcheck-recursive installdirs-recursive pdf-recursive \
ps-recursive uninstall-recursive
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
distclean-recursive maintainer-clean-recursive
AM_RECURSIVE_TARGETS = $(RECURSIVE_TARGETS:-recursive=) \
SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
STRIP = @STRIP@
TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
U_FOR_BUILD = @U_FOR_BUILD@
VERSION = @VERSION@
WITH_READLINE_01 = @WITH_READLINE_01@
mkdir_p = @mkdir_p@
mpn_objects = @mpn_objects@
mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
-SUBDIRS = . devel mpn mpz mpq mpf rand misc cxx mpbsd
+SUBDIRS = . devel mpn mpz mpq mpf rand misc cxx
# COMPILE minus CC.
#
INCLUDES = -I$(top_srcdir)
LDADD = libtests.la $(top_builddir)/libgmp.la
check_LTLIBRARIES = libtests.la
-EXTRA_libtests_la_SOURCES = amd64call.asm amd64check.c x86call.asm x86check.c
+EXTRA_libtests_la_SOURCES = amd64call.asm amd64check.c x86call.asm x86check.c \
+ arm32call.asm arm32check.c
+
libtests_la_SOURCES = tests.h \
memory.c misc.c refmpf.c refmpn.c refmpq.c refmpz.c spinner.c trace.c
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
+$(srcdir)/../mpn/Makeasm.am:
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
echo "rm -f \"$${dir}/so_locations\""; \
rm -f "$${dir}/so_locations"; \
done
-libtests.la: $(libtests_la_OBJECTS) $(libtests_la_DEPENDENCIES)
+libtests.la: $(libtests_la_OBJECTS) $(libtests_la_DEPENDENCIES) $(EXTRA_libtests_la_DEPENDENCIES)
$(LINK) $(libtests_la_OBJECTS) $(libtests_la_LIBADD) $(LIBS)
clean-checkPROGRAMS:
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-t-bswap$(EXEEXT): $(t_bswap_OBJECTS) $(t_bswap_DEPENDENCIES)
+t-bswap$(EXEEXT): $(t_bswap_OBJECTS) $(t_bswap_DEPENDENCIES) $(EXTRA_t_bswap_DEPENDENCIES)
@rm -f t-bswap$(EXEEXT)
$(LINK) $(t_bswap_OBJECTS) $(t_bswap_LDADD) $(LIBS)
-t-constants$(EXEEXT): $(t_constants_OBJECTS) $(t_constants_DEPENDENCIES)
+t-constants$(EXEEXT): $(t_constants_OBJECTS) $(t_constants_DEPENDENCIES) $(EXTRA_t_constants_DEPENDENCIES)
@rm -f t-constants$(EXEEXT)
$(LINK) $(t_constants_OBJECTS) $(t_constants_LDADD) $(LIBS)
-t-count_zeros$(EXEEXT): $(t_count_zeros_OBJECTS) $(t_count_zeros_DEPENDENCIES)
+t-count_zeros$(EXEEXT): $(t_count_zeros_OBJECTS) $(t_count_zeros_DEPENDENCIES) $(EXTRA_t_count_zeros_DEPENDENCIES)
@rm -f t-count_zeros$(EXEEXT)
$(LINK) $(t_count_zeros_OBJECTS) $(t_count_zeros_LDADD) $(LIBS)
-t-gmpmax$(EXEEXT): $(t_gmpmax_OBJECTS) $(t_gmpmax_DEPENDENCIES)
+t-gmpmax$(EXEEXT): $(t_gmpmax_OBJECTS) $(t_gmpmax_DEPENDENCIES) $(EXTRA_t_gmpmax_DEPENDENCIES)
@rm -f t-gmpmax$(EXEEXT)
$(LINK) $(t_gmpmax_OBJECTS) $(t_gmpmax_LDADD) $(LIBS)
-t-hightomask$(EXEEXT): $(t_hightomask_OBJECTS) $(t_hightomask_DEPENDENCIES)
+t-hightomask$(EXEEXT): $(t_hightomask_OBJECTS) $(t_hightomask_DEPENDENCIES) $(EXTRA_t_hightomask_DEPENDENCIES)
@rm -f t-hightomask$(EXEEXT)
$(LINK) $(t_hightomask_OBJECTS) $(t_hightomask_LDADD) $(LIBS)
-t-modlinv$(EXEEXT): $(t_modlinv_OBJECTS) $(t_modlinv_DEPENDENCIES)
+t-modlinv$(EXEEXT): $(t_modlinv_OBJECTS) $(t_modlinv_DEPENDENCIES) $(EXTRA_t_modlinv_DEPENDENCIES)
@rm -f t-modlinv$(EXEEXT)
$(LINK) $(t_modlinv_OBJECTS) $(t_modlinv_LDADD) $(LIBS)
-t-parity$(EXEEXT): $(t_parity_OBJECTS) $(t_parity_DEPENDENCIES)
+t-parity$(EXEEXT): $(t_parity_OBJECTS) $(t_parity_DEPENDENCIES) $(EXTRA_t_parity_DEPENDENCIES)
@rm -f t-parity$(EXEEXT)
$(LINK) $(t_parity_OBJECTS) $(t_parity_LDADD) $(LIBS)
-t-popc$(EXEEXT): $(t_popc_OBJECTS) $(t_popc_DEPENDENCIES)
+t-popc$(EXEEXT): $(t_popc_OBJECTS) $(t_popc_DEPENDENCIES) $(EXTRA_t_popc_DEPENDENCIES)
@rm -f t-popc$(EXEEXT)
$(LINK) $(t_popc_OBJECTS) $(t_popc_LDADD) $(LIBS)
-t-sub$(EXEEXT): $(t_sub_OBJECTS) $(t_sub_DEPENDENCIES)
+t-sub$(EXEEXT): $(t_sub_OBJECTS) $(t_sub_DEPENDENCIES) $(EXTRA_t_sub_DEPENDENCIES)
@rm -f t-sub$(EXEEXT)
$(LINK) $(t_sub_OBJECTS) $(t_sub_LDADD) $(LIBS)
distclean-compile:
-rm -f *.tab.c
-$(top_builddir)/ansi2knr:
- $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
- -test "$U" = "" || rm -f *_.c
.c.o:
$(COMPILE) -c $<
.c.lo:
$(LTCOMPILE) -c -o $@ $<
-amd64check_.c: amd64check.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/amd64check.c; then echo $(srcdir)/amd64check.c; else echo amd64check.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-memory_.c: memory.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/memory.c; then echo $(srcdir)/memory.c; else echo memory.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-misc_.c: misc.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/misc.c; then echo $(srcdir)/misc.c; else echo misc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-refmpf_.c: refmpf.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/refmpf.c; then echo $(srcdir)/refmpf.c; else echo refmpf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-refmpn_.c: refmpn.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/refmpn.c; then echo $(srcdir)/refmpn.c; else echo refmpn.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-refmpq_.c: refmpq.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/refmpq.c; then echo $(srcdir)/refmpq.c; else echo refmpq.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-refmpz_.c: refmpz.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/refmpz.c; then echo $(srcdir)/refmpz.c; else echo refmpz.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-spinner_.c: spinner.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/spinner.c; then echo $(srcdir)/spinner.c; else echo spinner.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-bswap_.c: t-bswap.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-bswap.c; then echo $(srcdir)/t-bswap.c; else echo t-bswap.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-constants_.c: t-constants.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-constants.c; then echo $(srcdir)/t-constants.c; else echo t-constants.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-count_zeros_.c: t-count_zeros.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-count_zeros.c; then echo $(srcdir)/t-count_zeros.c; else echo t-count_zeros.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-gmpmax_.c: t-gmpmax.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-gmpmax.c; then echo $(srcdir)/t-gmpmax.c; else echo t-gmpmax.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-hightomask_.c: t-hightomask.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-hightomask.c; then echo $(srcdir)/t-hightomask.c; else echo t-hightomask.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-modlinv_.c: t-modlinv.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-modlinv.c; then echo $(srcdir)/t-modlinv.c; else echo t-modlinv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-parity_.c: t-parity.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-parity.c; then echo $(srcdir)/t-parity.c; else echo t-parity.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-popc_.c: t-popc.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-popc.c; then echo $(srcdir)/t-popc.c; else echo t-popc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-sub_.c: t-sub.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-sub.c; then echo $(srcdir)/t-sub.c; else echo t-sub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-trace_.c: trace.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/trace.c; then echo $(srcdir)/trace.c; else echo trace.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-x86check_.c: x86check.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/x86check.c; then echo $(srcdir)/x86check.c; else echo x86check.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-amd64check_.$(OBJEXT) amd64check_.lo memory_.$(OBJEXT) memory_.lo \
-misc_.$(OBJEXT) misc_.lo refmpf_.$(OBJEXT) refmpf_.lo \
-refmpn_.$(OBJEXT) refmpn_.lo refmpq_.$(OBJEXT) refmpq_.lo \
-refmpz_.$(OBJEXT) refmpz_.lo spinner_.$(OBJEXT) spinner_.lo \
-t-bswap_.$(OBJEXT) t-bswap_.lo t-constants_.$(OBJEXT) t-constants_.lo \
-t-count_zeros_.$(OBJEXT) t-count_zeros_.lo t-gmpmax_.$(OBJEXT) \
-t-gmpmax_.lo t-hightomask_.$(OBJEXT) t-hightomask_.lo \
-t-modlinv_.$(OBJEXT) t-modlinv_.lo t-parity_.$(OBJEXT) t-parity_.lo \
-t-popc_.$(OBJEXT) t-popc_.lo t-sub_.$(OBJEXT) t-sub_.lo \
-trace_.$(OBJEXT) trace_.lo x86check_.$(OBJEXT) x86check_.lo : \
-$(ANSI2KNR)
mostlyclean-libtool:
-rm -f *.lo
fi; \
dashes=`echo "$$dashes" | sed s/./=/g`; \
if test "$$failed" -eq 0; then \
- echo "$$grn$$dashes"; \
+ col="$$grn"; \
else \
- echo "$$red$$dashes"; \
+ col="$$red"; \
fi; \
- echo "$$banner"; \
- test -z "$$skipped" || echo "$$skipped"; \
- test -z "$$report" || echo "$$report"; \
- echo "$$dashes$$std"; \
+ echo "$${col}$$dashes$${std}"; \
+ echo "$${col}$$banner$${std}"; \
+ test -z "$$skipped" || echo "$${col}$$skipped$${std}"; \
+ test -z "$$report" || echo "$${col}$$report$${std}"; \
+ echo "$${col}$$dashes$${std}"; \
test "$$failed" -eq 0; \
else :; fi
done
@list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
if test "$$subdir" = .; then :; else \
- test -d "$(distdir)/$$subdir" \
- || $(MKDIR_P) "$(distdir)/$$subdir" \
- || exit 1; \
- fi; \
- done
- @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
- if test "$$subdir" = .; then :; else \
+ $(am__make_dryrun) \
+ || test -d "$(distdir)/$$subdir" \
+ || $(MKDIR_P) "$(distdir)/$$subdir" \
+ || exit 1; \
dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
$(am__relativize); \
new_distdir=$$reldir; \
installcheck: installcheck-recursive
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
mostlyclean: mostlyclean-recursive
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-recursive
uninstall-am:
-.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) \
- $(top_builddir)/ansi2knr check-am ctags-recursive install-am \
- install-strip tags-recursive
+.MAKE: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) check-am \
+ ctags-recursive install-am install-strip tags-recursive
.PHONY: $(RECURSIVE_CLEAN_TARGETS) $(RECURSIVE_TARGETS) CTAGS GTAGS \
all all-am check check-TESTS check-am clean \
install-strip installcheck installcheck-am installdirs \
installdirs-am maintainer-clean maintainer-clean-generic \
mostlyclean mostlyclean-compile mostlyclean-generic \
- mostlyclean-kr mostlyclean-libtool pdf pdf-am ps ps-am tags \
- tags-recursive uninstall uninstall-am
+ mostlyclean-libtool pdf pdf-am ps ps-am tags tags-recursive \
+ uninstall uninstall-am
# .s assembler, no preprocessing.
dnl AMD64 calling conventions checking.
-dnl Copyright 2000, 2003, 2004, 2006, 2007 Free Software Foundation, Inc.
-dnl
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or
-dnl modify it under the terms of the GNU Lesser General Public License as
+dnl Copyright 2000, 2003, 2004, 2006, 2007, 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library test suite.
+
+dnl The GNU MP Library test suite is free software; you can redistribute it
+dnl and/or modify it under the terms of the GNU General Public License as
dnl published by the Free Software Foundation; either version 3 of the
dnl License, or (at your option) any later version.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl Lesser General Public License for more details.
-dnl
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+
+dnl The GNU MP Library test suite is distributed in the hope that it will be
+dnl useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+dnl Public License for more details.
+
+dnl You should have received a copy of the GNU General Public License along
+dnl with the GNU MP Library test suite. If not, see
+dnl http://www.gnu.org/licenses/.
+dnl The current version of the code attempts to keep the call/return
+dnl prediction stack valid, but matching calls and returns.
+
include(`../config.m4')
C Execute an fstcw, returning the current x87 control word.
PROLOGUE(x86_fstcw)
- movq $0, -8(%rsp)
- fstcw -8(%rsp)
- movq -8(%rsp), %rax
+ movq $0, -8(%rsp)
+ fstcw -8(%rsp)
+ movq -8(%rsp), %rax
ret
EPILOGUE()
-dnl Instrumented profiling won't come out quite right below, since we don't
-dnl do an actual "ret". There's only a few instructions here, so there's
-dnl no great need to get them separately accounted, just let them get
-dnl attributed to the caller.
+dnl Instrumented profiling won't come out quite right below, since we don't do
+dnl an actual "ret". There's only a few instructions here, so there's no
+dnl great need to get them separately accounted, just let them get attributed
+dnl to the caller. FIXME this comment might no longer be true.
ifelse(WANT_PROFILING,instrument,
`define(`WANT_PROFILING',no)')
TEXT
ALIGN(32)
PROLOGUE(calling_conventions)
- push %rdi
- movq G(calling_conventions_values)@GOTPCREL(%rip), %rdi
-
- movq 8(%rsp), %rax
- movq %rax, RETADDR(%rdi)
-
- leaq L(return)(%rip), %rax
- movq %rax, 8(%rsp)
-
- movq %rbx, SAVE_RBX(%rdi)
- movq %rbp, SAVE_RBP(%rdi)
- movq %r12, SAVE_R12(%rdi)
- movq %r13, SAVE_R13(%rdi)
- movq %r14, SAVE_R14(%rdi)
- movq %r15, SAVE_R15(%rdi)
-
- C values we expect to see unchanged, as per amd64check.c
- movq WANT_RBX(%rdi), %rbx
- movq WANT_RBP(%rdi), %rbp
- movq WANT_R12(%rdi), %r12
- movq WANT_R13(%rdi), %r13
- movq WANT_R14(%rdi), %r14
- movq WANT_R15(%rdi), %r15
-
- C Try to provoke a problem by starting with junk in the registers,
- C especially %rax which will be the return value.
- C
- C ENHANCE-ME: If we knew how many of the parameter registers were
- C actually being used we could put junk in the rest. Maybe we could
- C get try.c to communicate this to us.
-C movq JUNK_RAX(%rdi), %rax C overwritten below anyway
- movq JUNK_R10(%rdi), %r10
- movq JUNK_R11(%rdi), %r11
+ movq G(calling_conventions_values)@GOTPCREL(%rip), %rax
+ popq RETADDR(%rax)
+
+ movq %rbx, SAVE_RBX(%rax)
+ movq %rbp, SAVE_RBP(%rax)
+ movq %r12, SAVE_R12(%rax)
+ movq %r13, SAVE_R13(%rax)
+ movq %r14, SAVE_R14(%rax)
+ movq %r15, SAVE_R15(%rax)
+
+ C Values we expect to see unchanged, as per amd64check.c
+ movq WANT_RBX(%rax), %rbx
+ movq WANT_RBP(%rax), %rbp
+ movq WANT_R12(%rax), %r12
+ movq WANT_R13(%rax), %r13
+ movq WANT_R14(%rax), %r14
+ movq WANT_R15(%rax), %r15
+
+ C Try to provoke a problem by starting with junk in the caller-saves
+ C registers, especially %rax which will be the return value.
+C movq JUNK_RAX(%rax), %rax C overwritten below anyway
+ movq JUNK_R10(%rax), %r10
+ movq JUNK_R11(%rax), %r11
movq G(calling_conventions_function)@GOTPCREL(%rip), %rax
- pop %rdi
- jmp *(%rax)
+ call *(%rax)
-L(return):
- movq G(calling_conventions_values)@GOTPCREL(%rip), %rdi
+ movq G(calling_conventions_values)@GOTPCREL(%rip), %rcx
- movq %rbx, RBX(%rdi)
- movq %rbp, RBP(%rdi)
- movq %r12, R12(%rdi)
- movq %r13, R13(%rdi)
- movq %r14, R14(%rdi)
- movq %r15, R15(%rdi)
+ movq %rbx, RBX(%rcx)
+ movq %rbp, RBP(%rcx)
+ movq %r12, R12(%rcx)
+ movq %r13, R13(%rcx)
+ movq %r14, R14(%rcx)
+ movq %r15, R15(%rcx)
pushfq
popq %rbx
- movq %rbx, RFLAGS(%rdi)
-
- movq G(calling_conventions_fenv)@GOTPCREL(%rip), %rbx
- fstenv (%rbx)
+ movq %rbx, RFLAGS(%rcx)
+
+ movq SAVE_RBX(%rcx), %rbx
+ movq SAVE_RBP(%rcx), %rbp
+ movq SAVE_R12(%rcx), %r12
+ movq SAVE_R13(%rcx), %r13
+ movq SAVE_R14(%rcx), %r14
+ movq SAVE_R15(%rcx), %r15
+
+ C Overwrite parameter registers
+C mov JUNK_R9(%rcx), %r9
+C mov JUNK_R8(%rcx), %r8
+C mov JUNK_RCX(%rcx), %rcx
+C mov JUNK_RDX(%rcx), %rdx
+C mov JUNK_RSI(%rcx), %rsi
+C mov JUNK_RDI(%rcx), %rdi
+
+ pushq RETADDR(%rcx)
+
+ movq G(calling_conventions_fenv)@GOTPCREL(%rip), %rcx
+ fstenv (%rcx)
finit
- movq SAVE_RBX(%rdi), %rbx
- movq SAVE_RBP(%rdi), %rbp
- movq SAVE_R12(%rdi), %r12
- movq SAVE_R13(%rdi), %r13
- movq SAVE_R14(%rdi), %r14
- movq SAVE_R15(%rdi), %r15
-
- jmp *RETADDR(%rdi)
+ ret
EPILOGUE()
Copyright 2000, 2001, 2004, 2007 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include "gmp.h"
} calling_conventions_fenv;
-char *regname[6] = {"rbx", "rbp", "r12", "r13", "r14", "r15"};
+const char *regname[6] = {"rbx", "rbp", "r12", "r13", "r14", "r15"};
#define DIR_BIT(rflags) (((rflags) & (1<<10)) != 0)
--- /dev/null
+dnl ARM32 calling conventions checking.
+
+dnl Copyright 2000, 2003, 2004, 2006, 2007, 2010, 2013 Free Software
+dnl Foundation, Inc.
+
+dnl This file is part of the GNU MP Library test suite.
+
+dnl The GNU MP Library test suite is free software; you can redistribute it
+dnl and/or modify it under the terms of the GNU General Public License as
+dnl published by the Free Software Foundation; either version 3 of the
+dnl License, or (at your option) any later version.
+
+dnl The GNU MP Library test suite is distributed in the hope that it will be
+dnl useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+dnl Public License for more details.
+
+dnl You should have received a copy of the GNU General Public License along
+dnl with the GNU MP Library test suite. If not, see
+dnl http://www.gnu.org/licenses/.
+
+
+dnl The current version of the code attempts to keep the call/return
+dnl prediction stack valid, but matching calls and returns.
+
+include(`../config.m4')
+
+
+C int calling_conventions (...);
+C
+C The global variable "calling_conventions_function" is the function to
+C call, with the arguments as passed here.
+
+define(`WANT_CALLEE_SAVES', eval(4*0))
+define(`SAVE_CALLEE_SAVES', eval(4*8))
+define(`RETADDR', eval(4*16))
+define(`GOT_CALLEE_SAVES', eval(4*17))
+define(`JUNK_PARAMS', eval(4*25))
+
+ TEXT
+ ALIGN(32)
+PROLOGUE(calling_conventions)
+ LEA( r12, calling_conventions_values)
+
+ C Preserve callee-saves registers, including the link register r14
+ add r12, r12, #SAVE_CALLEE_SAVES
+ stm r12, {r4-r11,r14}
+ sub r12, r12, #SAVE_CALLEE_SAVES
+
+ C Put chosen junk into callee-saves registers
+ add r12, r12, #WANT_CALLEE_SAVES
+ ldm r12, {r4-r11}
+ sub r12, r12, #WANT_CALLEE_SAVES
+
+ C No callee-saves registers on arm except r12 and parameter registers
+ C
+
+ C Make the actual call
+ LEA( r12, calling_conventions_function)
+ ldr r12, [r12]
+ mov r14, pc
+ bx r12
+
+ LEA( r12, calling_conventions_values)
+
+ C Save callee-saves registers after call
+ add r12, r12, #GOT_CALLEE_SAVES
+ stm r12, {r4-r11}
+ sub r12, r12, #GOT_CALLEE_SAVES
+
+ C Restore callee-saves registers, including the link register r14
+ add r12, r12, #SAVE_CALLEE_SAVES
+ ldm r12, {r4-r11,r14}
+ sub r12, r12, #SAVE_CALLEE_SAVES
+
+ C Overwrite parameter registers. Note that we overwrite r1, which
+ C could hold one half of a 64-bit return value, since we don't use that
+ C in GMP.
+ add r12, r12, #JUNK_PARAMS
+ ldm r12, {r1-r3}
+
+ bx r14
+EPILOGUE()
--- /dev/null
+/* ARM32 calling conventions checking.
+
+Copyright 2000, 2001, 2004, 2007 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <stdio.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* Vector if constants and register values. */
+mp_limb_t calling_conventions_values[29] =
+{
+ 0x12345678, /* 0 want_r4 */
+ 0x87654321, /* 1 want_r5 */
+ 0x89ABCDEF, /* 2 want_r6 */
+ 0xFEDCBA98, /* 3 want_r7 */
+ 0xDEADBEEF, /* 4 want_r8 */
+ 0xBADECAFE, /* 5 want_r9 */
+ 0xFFEEDDCC, /* 6 want_r10 */
+ 0xBBAA9988, /* 7 want_r11 */
+
+ 0x00000000, /* 8 save_r4 */
+ 0x00000000, /* 9 save_r5 */
+ 0x00000000, /* 10 save_r6 */
+ 0x00000000, /* 11 save_r7 */
+ 0x00000000, /* 12 save_r8 */
+ 0x00000000, /* 13 save_r9 */
+ 0x00000000, /* 14 save_r10 */
+ 0x00000000, /* 15 save_r11 */
+ 0x00000000, /* 16 save_r14 */
+
+ 0x00000000, /* 17 got_r4 */
+ 0x00000000, /* 18 got_r5 */
+ 0x00000000, /* 19 got_r6 */
+ 0x00000000, /* 20 got_r7 */
+ 0x00000000, /* 21 got_r8 */
+ 0x00000000, /* 22 got_r9 */
+ 0x00000000, /* 23 got_r10 */
+ 0x00000000, /* 24 got_r11 */
+
+ 0x00112233, /* 25 junk_r0 */
+ 0x44556677, /* 26 junk_r1 */
+ 0x12344321, /* 27 junk_r2 */
+ 0x56788765, /* 28 junk_r3 */
+};
+
+/* Index starts for various regions in above vector. */
+#define WANT_CALLEE_SAVES 0
+#define SAVE_CALLEE_SAVES 8
+#define RETADDR 16
+#define GOT_CALLEE_SAVES 17
+#define JUNK_PARAMS 25
+
+/* Return 1 if ok, 0 if not */
+
+int
+calling_conventions_check (void)
+{
+ const char *header = "Violated calling conventions:\n";
+ int ret = 1;
+ int i;
+
+#define CHECK(callreg, regnum, value) \
+ if (callreg != value) \
+ { \
+ printf ("%s r%d got 0x%08lX want 0x%08lX\n", \
+ header, regnum, callreg, value); \
+ header = ""; \
+ ret = 0; \
+ }
+
+ for (i = 0; i < 8; i++)
+ {
+ CHECK (calling_conventions_values[GOT_CALLEE_SAVES + i],
+ i + 4,
+ calling_conventions_values[WANT_CALLEE_SAVES + i]);
+ }
+
+ return ret;
+}
# Copyright 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
#
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+# Public License for more details.
#
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/.
# LDADD has an explicit -L of $(top_builddir)/.libs for the benefit of gcc
# 3.2 on itanium2-hp-hpux11.22. Without this option, the libgmp.sl.6
# required by libgmpxx.sl (ie. in its NEEDED records) is not found by the
# linker. FIXME: Presumably libtool should do something about this itself.
+# -lm is needed for t-ops2 which compares the results of trunc and mpf_trunc.
#
INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
LDADD = -L$(top_builddir)/.libs \
$(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmpxx.la \
- $(top_builddir)/libgmp.la
+ $(top_builddir)/libgmp.la \
+ -lm
if WANT_CXX
-check_PROGRAMS = t-assign t-binary t-cast t-constr t-headers \
- t-istream t-locale t-misc t-ops t-ostream t-prec t-rand t-ternary t-unary
+check_PROGRAMS = t-binary t-cast t-cxx11 \
+ t-headers t-iostream t-istream t-locale t-misc t-mix \
+ t-ops t-ops2 t-ops3 t-ostream t-prec \
+ t-ternary t-unary \
+ t-do-exceptions-work-at-all-with-this-compiler \
+ t-assign t-constr t-rand
TESTS = $(check_PROGRAMS)
endif
t_binary_SOURCES = t-binary.cc
t_cast_SOURCES = t-cast.cc
t_constr_SOURCES = t-constr.cc
+t_cxx11_SOURCES = t-cxx11.cc
t_headers_SOURCES = t-headers.cc
+t_iostream_SOURCES= t-iostream.cc
t_istream_SOURCES = t-istream.cc
t_locale_SOURCES = t-locale.cc clocale.c
t_misc_SOURCES = t-misc.cc
+t_mix_SOURCES = t-mix.cc
t_ops_SOURCES = t-ops.cc
+t_ops2_SOURCES = t-ops2.cc
+t_ops3_SOURCES = t-ops3.cc
t_ostream_SOURCES = t-ostream.cc
t_prec_SOURCES = t-prec.cc
t_rand_SOURCES = t-rand.cc
t_ternary_SOURCES = t-ternary.cc
t_unary_SOURCES = t-unary.cc
+t_do_exceptions_work_at_all_with_this_compiler_SOURCES = \
+ t-do-exceptions-work-at-all-with-this-compiler.cc
$(top_builddir)/tests/libtests.la:
cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# Copyright 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
#
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+# Public License for more details.
#
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/.
VPATH = @srcdir@
+am__make_dryrun = \
+ { \
+ am__dry=no; \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
+ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+ *) \
+ for am__flg in $$MAKEFLAGS; do \
+ case $$am__flg in \
+ *=*|--*) ;; \
+ *n*) am__dry=yes; break;; \
+ esac; \
+ done;; \
+ esac; \
+ test $$am__dry = yes; \
+ }
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
-@WANT_CXX_TRUE@check_PROGRAMS = t-assign$(EXEEXT) t-binary$(EXEEXT) \
-@WANT_CXX_TRUE@ t-cast$(EXEEXT) t-constr$(EXEEXT) \
-@WANT_CXX_TRUE@ t-headers$(EXEEXT) t-istream$(EXEEXT) \
+@WANT_CXX_TRUE@check_PROGRAMS = t-binary$(EXEEXT) t-cast$(EXEEXT) \
+@WANT_CXX_TRUE@ t-cxx11$(EXEEXT) t-headers$(EXEEXT) \
+@WANT_CXX_TRUE@ t-iostream$(EXEEXT) t-istream$(EXEEXT) \
@WANT_CXX_TRUE@ t-locale$(EXEEXT) t-misc$(EXEEXT) \
-@WANT_CXX_TRUE@ t-ops$(EXEEXT) t-ostream$(EXEEXT) \
-@WANT_CXX_TRUE@ t-prec$(EXEEXT) t-rand$(EXEEXT) \
-@WANT_CXX_TRUE@ t-ternary$(EXEEXT) t-unary$(EXEEXT)
+@WANT_CXX_TRUE@ t-mix$(EXEEXT) t-ops$(EXEEXT) t-ops2$(EXEEXT) \
+@WANT_CXX_TRUE@ t-ops3$(EXEEXT) t-ostream$(EXEEXT) \
+@WANT_CXX_TRUE@ t-prec$(EXEEXT) t-ternary$(EXEEXT) \
+@WANT_CXX_TRUE@ t-unary$(EXEEXT) \
+@WANT_CXX_TRUE@ t-do-exceptions-work-at-all-with-this-compiler$(EXEEXT) \
+@WANT_CXX_TRUE@ t-assign$(EXEEXT) t-constr$(EXEEXT) \
+@WANT_CXX_TRUE@ t-rand$(EXEEXT)
subdir = tests/cxx
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/configure.in
+ $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
t_constr_LDADD = $(LDADD)
t_constr_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
+am_t_cxx11_OBJECTS = t-cxx11.$(OBJEXT)
+t_cxx11_OBJECTS = $(am_t_cxx11_OBJECTS)
+t_cxx11_LDADD = $(LDADD)
+t_cxx11_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+ $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
+am_t_do_exceptions_work_at_all_with_this_compiler_OBJECTS = \
+ t-do-exceptions-work-at-all-with-this-compiler.$(OBJEXT)
+t_do_exceptions_work_at_all_with_this_compiler_OBJECTS = \
+ $(am_t_do_exceptions_work_at_all_with_this_compiler_OBJECTS)
+t_do_exceptions_work_at_all_with_this_compiler_LDADD = $(LDADD)
+t_do_exceptions_work_at_all_with_this_compiler_DEPENDENCIES = \
+ $(top_builddir)/tests/libtests.la $(top_builddir)/libgmpxx.la \
+ $(top_builddir)/libgmp.la
am_t_headers_OBJECTS = t-headers.$(OBJEXT)
t_headers_OBJECTS = $(am_t_headers_OBJECTS)
t_headers_LDADD = $(LDADD)
t_headers_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
+am_t_iostream_OBJECTS = t-iostream.$(OBJEXT)
+t_iostream_OBJECTS = $(am_t_iostream_OBJECTS)
+t_iostream_LDADD = $(LDADD)
+t_iostream_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+ $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
am_t_istream_OBJECTS = t-istream.$(OBJEXT)
t_istream_OBJECTS = $(am_t_istream_OBJECTS)
t_istream_LDADD = $(LDADD)
t_istream_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
-am_t_locale_OBJECTS = t-locale.$(OBJEXT) clocale$U.$(OBJEXT)
+am_t_locale_OBJECTS = t-locale.$(OBJEXT) clocale.$(OBJEXT)
t_locale_OBJECTS = $(am_t_locale_OBJECTS)
t_locale_LDADD = $(LDADD)
t_locale_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
t_misc_LDADD = $(LDADD)
t_misc_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
+am_t_mix_OBJECTS = t-mix.$(OBJEXT)
+t_mix_OBJECTS = $(am_t_mix_OBJECTS)
+t_mix_LDADD = $(LDADD)
+t_mix_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+ $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
am_t_ops_OBJECTS = t-ops.$(OBJEXT)
t_ops_OBJECTS = $(am_t_ops_OBJECTS)
t_ops_LDADD = $(LDADD)
t_ops_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
+am_t_ops2_OBJECTS = t-ops2.$(OBJEXT)
+t_ops2_OBJECTS = $(am_t_ops2_OBJECTS)
+t_ops2_LDADD = $(LDADD)
+t_ops2_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+ $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
+am_t_ops3_OBJECTS = t-ops3.$(OBJEXT)
+t_ops3_OBJECTS = $(am_t_ops3_OBJECTS)
+t_ops3_LDADD = $(LDADD)
+t_ops3_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+ $(top_builddir)/libgmpxx.la $(top_builddir)/libgmp.la
am_t_ostream_OBJECTS = t-ostream.$(OBJEXT)
t_ostream_OBJECTS = $(am_t_ostream_OBJECTS)
t_ostream_LDADD = $(LDADD)
--mode=link $(CXXLD) $(AM_CXXFLAGS) $(CXXFLAGS) $(AM_LDFLAGS) \
$(LDFLAGS) -o $@
SOURCES = $(t_assign_SOURCES) $(t_binary_SOURCES) $(t_cast_SOURCES) \
- $(t_constr_SOURCES) $(t_headers_SOURCES) $(t_istream_SOURCES) \
- $(t_locale_SOURCES) $(t_misc_SOURCES) $(t_ops_SOURCES) \
- $(t_ostream_SOURCES) $(t_prec_SOURCES) $(t_rand_SOURCES) \
- $(t_ternary_SOURCES) $(t_unary_SOURCES)
+ $(t_constr_SOURCES) $(t_cxx11_SOURCES) \
+ $(t_do_exceptions_work_at_all_with_this_compiler_SOURCES) \
+ $(t_headers_SOURCES) $(t_iostream_SOURCES) \
+ $(t_istream_SOURCES) $(t_locale_SOURCES) $(t_misc_SOURCES) \
+ $(t_mix_SOURCES) $(t_ops_SOURCES) $(t_ops2_SOURCES) \
+ $(t_ops3_SOURCES) $(t_ostream_SOURCES) $(t_prec_SOURCES) \
+ $(t_rand_SOURCES) $(t_ternary_SOURCES) $(t_unary_SOURCES)
DIST_SOURCES = $(t_assign_SOURCES) $(t_binary_SOURCES) \
- $(t_cast_SOURCES) $(t_constr_SOURCES) $(t_headers_SOURCES) \
+ $(t_cast_SOURCES) $(t_constr_SOURCES) $(t_cxx11_SOURCES) \
+ $(t_do_exceptions_work_at_all_with_this_compiler_SOURCES) \
+ $(t_headers_SOURCES) $(t_iostream_SOURCES) \
$(t_istream_SOURCES) $(t_locale_SOURCES) $(t_misc_SOURCES) \
- $(t_ops_SOURCES) $(t_ostream_SOURCES) $(t_prec_SOURCES) \
+ $(t_mix_SOURCES) $(t_ops_SOURCES) $(t_ops2_SOURCES) \
+ $(t_ops3_SOURCES) $(t_ostream_SOURCES) $(t_prec_SOURCES) \
$(t_rand_SOURCES) $(t_ternary_SOURCES) $(t_unary_SOURCES)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
ETAGS = etags
CTAGS = ctags
am__tty_colors = \
SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
STRIP = @STRIP@
TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
U_FOR_BUILD = @U_FOR_BUILD@
VERSION = @VERSION@
WITH_READLINE_01 = @WITH_READLINE_01@
mkdir_p = @mkdir_p@
mpn_objects = @mpn_objects@
mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
# 3.2 on itanium2-hp-hpux11.22. Without this option, the libgmp.sl.6
# required by libgmpxx.sl (ie. in its NEEDED records) is not found by the
# linker. FIXME: Presumably libtool should do something about this itself.
+# -lm is needed for t-ops2 which compares the results of trunc and mpf_trunc.
#
INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
LDADD = -L$(top_builddir)/.libs \
$(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmpxx.la \
- $(top_builddir)/libgmp.la
+ $(top_builddir)/libgmp.la \
+ -lm
@WANT_CXX_TRUE@TESTS = $(check_PROGRAMS)
t_assign_SOURCES = t-assign.cc
t_binary_SOURCES = t-binary.cc
t_cast_SOURCES = t-cast.cc
t_constr_SOURCES = t-constr.cc
+t_cxx11_SOURCES = t-cxx11.cc
t_headers_SOURCES = t-headers.cc
+t_iostream_SOURCES = t-iostream.cc
t_istream_SOURCES = t-istream.cc
t_locale_SOURCES = t-locale.cc clocale.c
t_misc_SOURCES = t-misc.cc
+t_mix_SOURCES = t-mix.cc
t_ops_SOURCES = t-ops.cc
+t_ops2_SOURCES = t-ops2.cc
+t_ops3_SOURCES = t-ops3.cc
t_ostream_SOURCES = t-ostream.cc
t_prec_SOURCES = t-prec.cc
t_rand_SOURCES = t-rand.cc
t_ternary_SOURCES = t-ternary.cc
t_unary_SOURCES = t-unary.cc
+t_do_exceptions_work_at_all_with_this_compiler_SOURCES = \
+ t-do-exceptions-work-at-all-with-this-compiler.cc
+
# Libtool (1.5) somehow botches its uninstalled shared library setups on
# OpenBSD 3.2, making the C++ test programs here fail. libgmpxx.so ends up
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-t-assign$(EXEEXT): $(t_assign_OBJECTS) $(t_assign_DEPENDENCIES)
+t-assign$(EXEEXT): $(t_assign_OBJECTS) $(t_assign_DEPENDENCIES) $(EXTRA_t_assign_DEPENDENCIES)
@rm -f t-assign$(EXEEXT)
$(CXXLINK) $(t_assign_OBJECTS) $(t_assign_LDADD) $(LIBS)
-t-binary$(EXEEXT): $(t_binary_OBJECTS) $(t_binary_DEPENDENCIES)
+t-binary$(EXEEXT): $(t_binary_OBJECTS) $(t_binary_DEPENDENCIES) $(EXTRA_t_binary_DEPENDENCIES)
@rm -f t-binary$(EXEEXT)
$(CXXLINK) $(t_binary_OBJECTS) $(t_binary_LDADD) $(LIBS)
-t-cast$(EXEEXT): $(t_cast_OBJECTS) $(t_cast_DEPENDENCIES)
+t-cast$(EXEEXT): $(t_cast_OBJECTS) $(t_cast_DEPENDENCIES) $(EXTRA_t_cast_DEPENDENCIES)
@rm -f t-cast$(EXEEXT)
$(CXXLINK) $(t_cast_OBJECTS) $(t_cast_LDADD) $(LIBS)
-t-constr$(EXEEXT): $(t_constr_OBJECTS) $(t_constr_DEPENDENCIES)
+t-constr$(EXEEXT): $(t_constr_OBJECTS) $(t_constr_DEPENDENCIES) $(EXTRA_t_constr_DEPENDENCIES)
@rm -f t-constr$(EXEEXT)
$(CXXLINK) $(t_constr_OBJECTS) $(t_constr_LDADD) $(LIBS)
-t-headers$(EXEEXT): $(t_headers_OBJECTS) $(t_headers_DEPENDENCIES)
+t-cxx11$(EXEEXT): $(t_cxx11_OBJECTS) $(t_cxx11_DEPENDENCIES) $(EXTRA_t_cxx11_DEPENDENCIES)
+ @rm -f t-cxx11$(EXEEXT)
+ $(CXXLINK) $(t_cxx11_OBJECTS) $(t_cxx11_LDADD) $(LIBS)
+t-do-exceptions-work-at-all-with-this-compiler$(EXEEXT): $(t_do_exceptions_work_at_all_with_this_compiler_OBJECTS) $(t_do_exceptions_work_at_all_with_this_compiler_DEPENDENCIES) $(EXTRA_t_do_exceptions_work_at_all_with_this_compiler_DEPENDENCIES)
+ @rm -f t-do-exceptions-work-at-all-with-this-compiler$(EXEEXT)
+ $(CXXLINK) $(t_do_exceptions_work_at_all_with_this_compiler_OBJECTS) $(t_do_exceptions_work_at_all_with_this_compiler_LDADD) $(LIBS)
+t-headers$(EXEEXT): $(t_headers_OBJECTS) $(t_headers_DEPENDENCIES) $(EXTRA_t_headers_DEPENDENCIES)
@rm -f t-headers$(EXEEXT)
$(CXXLINK) $(t_headers_OBJECTS) $(t_headers_LDADD) $(LIBS)
-t-istream$(EXEEXT): $(t_istream_OBJECTS) $(t_istream_DEPENDENCIES)
+t-iostream$(EXEEXT): $(t_iostream_OBJECTS) $(t_iostream_DEPENDENCIES) $(EXTRA_t_iostream_DEPENDENCIES)
+ @rm -f t-iostream$(EXEEXT)
+ $(CXXLINK) $(t_iostream_OBJECTS) $(t_iostream_LDADD) $(LIBS)
+t-istream$(EXEEXT): $(t_istream_OBJECTS) $(t_istream_DEPENDENCIES) $(EXTRA_t_istream_DEPENDENCIES)
@rm -f t-istream$(EXEEXT)
$(CXXLINK) $(t_istream_OBJECTS) $(t_istream_LDADD) $(LIBS)
-t-locale$(EXEEXT): $(t_locale_OBJECTS) $(t_locale_DEPENDENCIES)
+t-locale$(EXEEXT): $(t_locale_OBJECTS) $(t_locale_DEPENDENCIES) $(EXTRA_t_locale_DEPENDENCIES)
@rm -f t-locale$(EXEEXT)
$(CXXLINK) $(t_locale_OBJECTS) $(t_locale_LDADD) $(LIBS)
-t-misc$(EXEEXT): $(t_misc_OBJECTS) $(t_misc_DEPENDENCIES)
+t-misc$(EXEEXT): $(t_misc_OBJECTS) $(t_misc_DEPENDENCIES) $(EXTRA_t_misc_DEPENDENCIES)
@rm -f t-misc$(EXEEXT)
$(CXXLINK) $(t_misc_OBJECTS) $(t_misc_LDADD) $(LIBS)
-t-ops$(EXEEXT): $(t_ops_OBJECTS) $(t_ops_DEPENDENCIES)
+t-mix$(EXEEXT): $(t_mix_OBJECTS) $(t_mix_DEPENDENCIES) $(EXTRA_t_mix_DEPENDENCIES)
+ @rm -f t-mix$(EXEEXT)
+ $(CXXLINK) $(t_mix_OBJECTS) $(t_mix_LDADD) $(LIBS)
+t-ops$(EXEEXT): $(t_ops_OBJECTS) $(t_ops_DEPENDENCIES) $(EXTRA_t_ops_DEPENDENCIES)
@rm -f t-ops$(EXEEXT)
$(CXXLINK) $(t_ops_OBJECTS) $(t_ops_LDADD) $(LIBS)
-t-ostream$(EXEEXT): $(t_ostream_OBJECTS) $(t_ostream_DEPENDENCIES)
+t-ops2$(EXEEXT): $(t_ops2_OBJECTS) $(t_ops2_DEPENDENCIES) $(EXTRA_t_ops2_DEPENDENCIES)
+ @rm -f t-ops2$(EXEEXT)
+ $(CXXLINK) $(t_ops2_OBJECTS) $(t_ops2_LDADD) $(LIBS)
+t-ops3$(EXEEXT): $(t_ops3_OBJECTS) $(t_ops3_DEPENDENCIES) $(EXTRA_t_ops3_DEPENDENCIES)
+ @rm -f t-ops3$(EXEEXT)
+ $(CXXLINK) $(t_ops3_OBJECTS) $(t_ops3_LDADD) $(LIBS)
+t-ostream$(EXEEXT): $(t_ostream_OBJECTS) $(t_ostream_DEPENDENCIES) $(EXTRA_t_ostream_DEPENDENCIES)
@rm -f t-ostream$(EXEEXT)
$(CXXLINK) $(t_ostream_OBJECTS) $(t_ostream_LDADD) $(LIBS)
-t-prec$(EXEEXT): $(t_prec_OBJECTS) $(t_prec_DEPENDENCIES)
+t-prec$(EXEEXT): $(t_prec_OBJECTS) $(t_prec_DEPENDENCIES) $(EXTRA_t_prec_DEPENDENCIES)
@rm -f t-prec$(EXEEXT)
$(CXXLINK) $(t_prec_OBJECTS) $(t_prec_LDADD) $(LIBS)
-t-rand$(EXEEXT): $(t_rand_OBJECTS) $(t_rand_DEPENDENCIES)
+t-rand$(EXEEXT): $(t_rand_OBJECTS) $(t_rand_DEPENDENCIES) $(EXTRA_t_rand_DEPENDENCIES)
@rm -f t-rand$(EXEEXT)
$(CXXLINK) $(t_rand_OBJECTS) $(t_rand_LDADD) $(LIBS)
-t-ternary$(EXEEXT): $(t_ternary_OBJECTS) $(t_ternary_DEPENDENCIES)
+t-ternary$(EXEEXT): $(t_ternary_OBJECTS) $(t_ternary_DEPENDENCIES) $(EXTRA_t_ternary_DEPENDENCIES)
@rm -f t-ternary$(EXEEXT)
$(CXXLINK) $(t_ternary_OBJECTS) $(t_ternary_LDADD) $(LIBS)
-t-unary$(EXEEXT): $(t_unary_OBJECTS) $(t_unary_DEPENDENCIES)
+t-unary$(EXEEXT): $(t_unary_OBJECTS) $(t_unary_DEPENDENCIES) $(EXTRA_t_unary_DEPENDENCIES)
@rm -f t-unary$(EXEEXT)
$(CXXLINK) $(t_unary_OBJECTS) $(t_unary_LDADD) $(LIBS)
distclean-compile:
-rm -f *.tab.c
-$(top_builddir)/ansi2knr:
- $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
- -test "$U" = "" || rm -f *_.c
.c.o:
$(COMPILE) -c $<
.c.lo:
$(LTCOMPILE) -c -o $@ $<
-clocale_.c: clocale.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/clocale.c; then echo $(srcdir)/clocale.c; else echo clocale.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-clocale_.$(OBJEXT) clocale_.lo : $(ANSI2KNR)
.cc.o:
$(CXXCOMPILE) -c -o $@ $<
fi; \
dashes=`echo "$$dashes" | sed s/./=/g`; \
if test "$$failed" -eq 0; then \
- echo "$$grn$$dashes"; \
+ col="$$grn"; \
else \
- echo "$$red$$dashes"; \
+ col="$$red"; \
fi; \
- echo "$$banner"; \
- test -z "$$skipped" || echo "$$skipped"; \
- test -z "$$report" || echo "$$report"; \
- echo "$$dashes$$std"; \
+ echo "$${col}$$dashes$${std}"; \
+ echo "$${col}$$banner$${std}"; \
+ test -z "$$skipped" || echo "$${col}$$skipped$${std}"; \
+ test -z "$$report" || echo "$${col}$$report$${std}"; \
+ echo "$${col}$$dashes$${std}"; \
test "$$failed" -eq 0; \
else :; fi
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
mostlyclean: mostlyclean-am
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
uninstall-am:
-.MAKE: $(top_builddir)/ansi2knr check-am install-am install-strip
+.MAKE: check-am install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \
clean-checkPROGRAMS clean-generic clean-libtool ctags \
install-ps install-ps-am install-strip installcheck \
installcheck-am installdirs maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
- mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
- pdf-am ps ps-am tags uninstall uninstall-am
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags uninstall uninstall-am
$(top_builddir)/tests/libtests.la:
Copyright 2001, 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include "config.h"
Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include "config.h"
#include "gmp-impl.h"
#include "tests.h"
-using namespace std;
+using std::string;
+using std::invalid_argument;
void
} catch (invalid_argument) {
}
}
+
+ // swap(mpz_class &)
+ {
+ mpz_class a(123);
+ mpz_class b(456);
+ a.swap(b);
+ a.swap(a);
+ ASSERT_ALWAYS(a == 456);
+ ASSERT_ALWAYS(b == 123);
+ }
+
+ // swap(mpz_class &, mpz_class &)
+ {
+ mpz_class a(123);
+ mpz_class b(456);
+ ::swap(a, b);
+ ::swap(a, a);
+ ASSERT_ALWAYS(a == 456);
+ ASSERT_ALWAYS(b == 123);
+ }
+ {
+ using std::swap;
+ mpz_class a(123);
+ mpz_class b(456);
+ swap(a, b);
+ swap(a, a);
+ ASSERT_ALWAYS(a == 456);
+ ASSERT_ALWAYS(b == 123);
+ }
}
void
// operator=(unsigned short int)
{
unsigned short int a = 54321u;
- mpz_class b;
+ mpq_class b;
b = a; ASSERT_ALWAYS(b == 54321u);
}
} catch (invalid_argument) {
}
}
+
+ // swap(mpq_class &)
+ {
+ mpq_class a(3, 2);
+ mpq_class b(-1, 4);
+ a.swap(b);
+ a.swap(a);
+ ASSERT_ALWAYS(a == -.25);
+ ASSERT_ALWAYS(b == 1.5);
+ }
+
+ // swap(mpq_class &, mpq_class &)
+ {
+ mpq_class a(3, 2);
+ mpq_class b(-1, 4);
+ ::swap(a, b);
+ ::swap(a, a);
+ ASSERT_ALWAYS(a == -.25);
+ ASSERT_ALWAYS(b == 1.5);
+ }
+ {
+ using std::swap;
+ mpq_class a(3, 2);
+ mpq_class b(-1, 4);
+ swap(a, b);
+ swap(a, a);
+ ASSERT_ALWAYS(a == -.25);
+ ASSERT_ALWAYS(b == 1.5);
+ }
}
void
} catch (invalid_argument) {
}
}
+
+ // swap(mpf_class &)
+ {
+ mpf_class a(123);
+ mpf_class b(456);
+ a.swap(b);
+ a.swap(a);
+ ASSERT_ALWAYS(a == 456);
+ ASSERT_ALWAYS(b == 123);
+ }
+
+ // swap(mpf_class &, mpf_class &)
+ {
+ mpf_class a(123);
+ mpf_class b(456);
+ ::swap(a, b);
+ ::swap(a, a);
+ ASSERT_ALWAYS(a == 456);
+ ASSERT_ALWAYS(b == 123);
+ }
+ {
+ using std::swap;
+ mpf_class a(123);
+ mpf_class b(456);
+ swap(a, b);
+ swap(a, a);
+ ASSERT_ALWAYS(a == 456);
+ ASSERT_ALWAYS(b == 123);
+ }
}
/* Test mp*_class binary expressions.
-Copyright 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2003, 2008, 2012 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include "config.h"
{
mpz_class a(3), b(4);
mpz_class c(a * (-b)); ASSERT_ALWAYS(c == -12);
+ c = c * (-b); ASSERT_ALWAYS(c == 48);
}
// template <class T, class U, class Op>
mpz_class a(3), b(2), c(1);
mpz_class d;
d = (a % b) + c; ASSERT_ALWAYS(d == 2);
+ d = (a % b) + d; ASSERT_ALWAYS(d == 3);
}
// template <class T, class U, class V, class Op>
mpz_class a(3), b(5), c(7);
mpz_class d;
d = (a - b) * (-c); ASSERT_ALWAYS(d == 14);
+ d = (b - d) * (-a); ASSERT_ALWAYS(d == 27);
+ d = (a - b) * (-d); ASSERT_ALWAYS(d == 54);
}
{
double c = 2.0;
mpq_class d;
d = a * (b + c); ASSERT_ALWAYS(d == 2);
+ d = d * (b + c); ASSERT_ALWAYS(d == 6);
}
// template <class T, class U, class V, class Op>
mpz_class b(1), c(4);
mpq_class d;
d = (b - c) * a; ASSERT_ALWAYS(d == -2);
+ d = (b - c) * d; ASSERT_ALWAYS(d == 6);
}
// template <class T, class U, class Op>
mpq_class a(1, 3), b(3, 4);
mpq_class c;
c = a * (-b); ASSERT_ALWAYS(c == -0.25);
+ a = a * (-b); ASSERT_ALWAYS(a == -0.25);
}
// template <class T, class U, class Op>
{
mpq_class a(1, 3), b(2, 3), c(1, 4);
mpq_class d((a / b) + c); ASSERT_ALWAYS(d == 0.75);
+ c = (a / b) + c; ASSERT_ALWAYS(c == 0.75);
}
// template <class T, class U, class V, class Op>
mpq_class a(1, 2), b(1, 4);
mpz_class c(1);
mpq_class d((a + b) - c); ASSERT_ALWAYS(d == -0.25);
+ d = (a + d) - c; ASSERT_ALWAYS(d == -0.75);
+ d = (a + d) - d.get_num(); ASSERT_ALWAYS(d == 2.75);
+ d = (2 * d) * d.get_den(); ASSERT_ALWAYS(d == 22);
+ d = (b * d) / -d.get_num(); ASSERT_ALWAYS(d == -0.25);
}
{
mpq_class a(1, 3), b(3, 2);
mpz_class c(2), d(4);
mpq_class e;
e = (a * b) / (c - d); ASSERT_ALWAYS(e == -0.25);
+ e = (2 * e) / (c - d); ASSERT_ALWAYS(e == 0.25);
}
// template <class T, class U, class V, class W, class Op>
signed int d = 4;
mpq_class e;
e = (c % d) / (a * b); ASSERT_ALWAYS(e == 10);
+ e = (e.get_num() % d) / (2 / e); ASSERT_ALWAYS(e == 10);
}
// template <class T, class U, class V, class Op>
mpq_class a(1, 3), b(3, 4), c(2, 5);
mpq_class d;
d = (a * b) / (-c); ASSERT_ALWAYS(d == -0.625);
+ d = (c * d) / (-b); ASSERT_ALWAYS(3 * d == 1);
+ d = (a * c) / (-d); ASSERT_ALWAYS(5 * d == -2);
}
}
mpz_class b(2), c(1);
mpf_class d;
d = a * (b + c); ASSERT_ALWAYS(d == 9);
+ a = a * (b + c); ASSERT_ALWAYS(a == 9);
}
// template <class T, class U, class V, class Op>
mpf_class a(2), b(-3);
mpf_class c;
c = a * (-b); ASSERT_ALWAYS(c == 6);
+ c = c * (-b); ASSERT_ALWAYS(c == 18);
}
// template <class T, class U, class Op>
mpf_class a(3), b(4), c(5);
mpf_class d;
d = (a / b) - c; ASSERT_ALWAYS(d == -4.25);
+ c = (a / b) - c; ASSERT_ALWAYS(c == -4.25);
}
// template <class T, class U, class V, class Op>
Copyright 2003 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include "gmp.h"
#include "gmpxx.h"
Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include "config.h"
const char *a = "FFFF";
int base = 16;
mpq_class b(a, base); ASSERT_ALWAYS(b == 65535u);
+ mpq_class c(0, 1); ASSERT_ALWAYS(c == 0);
}
// mpq_class(const std::string &)
--- /dev/null
+/* Test C++11 features
+
+Copyright 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include "config.h"
+
+#include "gmp.h"
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#if __GMPXX_USE_CXX11
+
+#include <utility>
+#include <type_traits>
+
+void check_noexcept ()
+{
+ mpz_class z1, z2;
+ mpq_class q1, q2;
+ mpf_class f1, f2;
+ static_assert(noexcept(z1 = std::move(z2)), "sorry");
+ static_assert(noexcept(q1 = std::move(q2)), "sorry");
+ static_assert(noexcept(f1 = std::move(f2)), "sorry");
+ static_assert(noexcept(q1 = std::move(z1)), "sorry");
+}
+
+void check_common_type ()
+{
+#define CHECK_COMMON_TYPE(T, U, Res) \
+ static_assert(std::is_same<std::common_type<T, U>::type, Res>::value, "sorry")
+#define CHECK_COMMON_TYPE_BUILTIN1(T, Res) \
+ CHECK_COMMON_TYPE( signed char , T, Res); \
+ CHECK_COMMON_TYPE(unsigned char , T, Res); \
+ CHECK_COMMON_TYPE( signed short, T, Res); \
+ CHECK_COMMON_TYPE(unsigned short, T, Res); \
+ CHECK_COMMON_TYPE( signed int , T, Res); \
+ CHECK_COMMON_TYPE(unsigned int , T, Res); \
+ CHECK_COMMON_TYPE( signed long , T, Res); \
+ CHECK_COMMON_TYPE(unsigned long , T, Res); \
+ CHECK_COMMON_TYPE(float , T, Res); \
+ CHECK_COMMON_TYPE(double, T, Res)
+#define CHECK_COMMON_TYPE_BUILTIN2(T, Res) \
+ CHECK_COMMON_TYPE(T, signed char , Res); \
+ CHECK_COMMON_TYPE(T, unsigned char , Res); \
+ CHECK_COMMON_TYPE(T, signed short, Res); \
+ CHECK_COMMON_TYPE(T, unsigned short, Res); \
+ CHECK_COMMON_TYPE(T, signed int , Res); \
+ CHECK_COMMON_TYPE(T, unsigned int , Res); \
+ CHECK_COMMON_TYPE(T, signed long , Res); \
+ CHECK_COMMON_TYPE(T, unsigned long , Res); \
+ CHECK_COMMON_TYPE(T, float , Res); \
+ CHECK_COMMON_TYPE(T, double, Res)
+#define CHECK_COMMON_TYPE_BUILTIN(T, Res) \
+ CHECK_COMMON_TYPE_BUILTIN1(T, Res); \
+ CHECK_COMMON_TYPE_BUILTIN2(T, Res)
+ /* These would just work with implicit conversions */
+ CHECK_COMMON_TYPE (mpz_class, mpq_class, mpq_class);
+ CHECK_COMMON_TYPE (mpz_class, mpf_class, mpf_class);
+ CHECK_COMMON_TYPE (mpf_class, mpq_class, mpf_class);
+
+ CHECK_COMMON_TYPE_BUILTIN (mpz_class, mpz_class);
+ CHECK_COMMON_TYPE_BUILTIN (mpq_class, mpq_class);
+ CHECK_COMMON_TYPE_BUILTIN (mpf_class, mpf_class);
+
+ mpz_class z; mpq_class q; mpf_class f;
+
+ CHECK_COMMON_TYPE (decltype(-z), mpz_class, mpz_class);
+ CHECK_COMMON_TYPE (decltype(-q), mpq_class, mpq_class);
+ CHECK_COMMON_TYPE (decltype(-f), mpf_class, mpf_class);
+
+ CHECK_COMMON_TYPE (decltype(-z), mpq_class, mpq_class);
+ CHECK_COMMON_TYPE (decltype(-z), mpf_class, mpf_class);
+ CHECK_COMMON_TYPE (decltype(-q), mpf_class, mpf_class);
+
+ /* These require a common_type specialization */
+ CHECK_COMMON_TYPE (decltype(-z), decltype(z+z), mpz_class);
+ CHECK_COMMON_TYPE (decltype(-q), decltype(q+q), mpq_class);
+ CHECK_COMMON_TYPE (decltype(-f), decltype(f+f), mpf_class);
+
+ CHECK_COMMON_TYPE (decltype(-q), mpz_class, mpq_class);
+ CHECK_COMMON_TYPE (decltype(-f), mpz_class, mpf_class);
+ CHECK_COMMON_TYPE (decltype(-f), mpq_class, mpf_class);
+
+ CHECK_COMMON_TYPE (decltype(-z), decltype(-q), mpq_class);
+ CHECK_COMMON_TYPE (decltype(-z), decltype(-f), mpf_class);
+ CHECK_COMMON_TYPE (decltype(-q), decltype(-f), mpf_class);
+
+ /* These could be broken by a naive common_type specialization */
+ CHECK_COMMON_TYPE (decltype(-z), decltype(-z), decltype(-z));
+ CHECK_COMMON_TYPE (decltype(-q), decltype(-q), decltype(-q));
+ CHECK_COMMON_TYPE (decltype(-f), decltype(-f), decltype(-f));
+
+ /* Painful */
+ CHECK_COMMON_TYPE_BUILTIN (decltype(-z), mpz_class);
+ CHECK_COMMON_TYPE_BUILTIN (decltype(-q), mpq_class);
+ CHECK_COMMON_TYPE_BUILTIN (decltype(-f), mpf_class);
+}
+
+template<class T, class U = T>
+void check_move_init ()
+{
+ {
+ // Delete moved-from x1
+ T x1 = 3;
+ U x2 = std::move(x1);
+ ASSERT_ALWAYS (x2 == 3);
+ }
+ {
+ // Assign to moved-from x1
+ T x1 = 2;
+ U x2 = std::move(x1);
+ x1 = -7;
+ ASSERT_ALWAYS (x1 == -7);
+ ASSERT_ALWAYS (x2 == 2);
+ }
+}
+
+template<class T, class U = T>
+void check_move_assign ()
+{
+ {
+ // Delete moved-from x1
+ T x1 = 3; U x2;
+ x2 = std::move(x1);
+ ASSERT_ALWAYS (x2 == 3);
+ }
+ {
+ // Assign to moved-from x1
+ T x1 = 2; U x2;
+ x2 = std::move(x1);
+ x1 = -7;
+ ASSERT_ALWAYS (x1 == -7);
+ ASSERT_ALWAYS (x2 == 2);
+ }
+ {
+ // Self move-assign (not necessary, but it happens to work...)
+ T x = 4;
+ x = std::move(x);
+ ASSERT_ALWAYS (x == 4);
+ }
+}
+
+void check_user_defined_literal ()
+{
+ ASSERT_ALWAYS (123_mpz % 5 == 3);
+ ASSERT_ALWAYS (-11_mpq / 22 == -.5);
+ ASSERT_ALWAYS (112.5e-1_mpf * 4 == 45);
+ {
+ mpz_class ref ( "123456789abcdef0123456789abcdef0123", 16);
+ ASSERT_ALWAYS (0x123456789abcdef0123456789abcdef0123_mpz == ref);
+ }
+}
+
+// Check for explicit conversion to bool
+void implicit_bool(bool);
+int implicit_bool(...);
+
+void check_bool_conversion ()
+{
+ const mpz_class zn = -2;
+ const mpq_class qn = -2;
+ const mpf_class fn = -2;
+ const mpz_class z0 = 0;
+ const mpq_class q0 = 0;
+ const mpf_class f0 = 0;
+ const mpz_class zp = +2;
+ const mpq_class qp = +2;
+ const mpf_class fp = +2;
+ if (zn && qn && fn && zp && qp && fp && !z0 && !q0 && !f0)
+ {
+ if (z0 || q0 || f0) ASSERT_ALWAYS(false);
+ }
+ else ASSERT_ALWAYS(false);
+ decltype(implicit_bool(zn)) zi = 1;
+ decltype(implicit_bool(qn)) qi = 1;
+ decltype(implicit_bool(fn)) fi = 1;
+ (void)(zi+qi+fi);
+}
+
+int
+main (void)
+{
+ tests_start();
+
+ check_noexcept();
+ check_common_type();
+ check_move_init<mpz_class>();
+ check_move_init<mpq_class>();
+ check_move_init<mpf_class>();
+ check_move_assign<mpz_class>();
+ check_move_assign<mpq_class>();
+ check_move_assign<mpf_class>();
+ check_move_init<mpz_class,mpq_class>();
+ check_move_assign<mpz_class,mpq_class>();
+ check_user_defined_literal();
+ check_bool_conversion();
+
+ tests_end();
+ return 0;
+}
+
+#else
+int main () { return 0; }
+#endif
--- /dev/null
+/* Test if the compiler has working try / throw / catch.
+
+Copyright 2013 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <stdexcept>
+
+inline void
+throw_expr ()
+{
+ throw std::invalid_argument ("Test");
+}
+
+using namespace std;
+
+int
+main ()
+{
+ try
+ {
+ throw_expr();
+ }
+ catch (invalid_argument) { }
+}
Copyright 2001, 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include "gmpxx.h"
--- /dev/null
+/* Test stream formatted input and output on mp*_class
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <sstream>
+
+#include "gmp.h"
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+// The tests are extremely basic. These functions just forward to the
+// ones tested in t-istream.cc and t-ostream.cc; we rely on those for
+// advanced tests and only check the syntax here.
+
+void
+checki ()
+{
+ {
+ istringstream i("123");
+ mpz_class x;
+ i >> x;
+ ASSERT_ALWAYS (x == 123);
+ }
+ {
+ istringstream i("3/4");
+ mpq_class x;
+ i >> x;
+ ASSERT_ALWAYS (x == .75);
+ }
+ {
+ istringstream i("1.5");
+ mpf_class x;
+ i >> x;
+ ASSERT_ALWAYS (x == 1.5);
+ }
+}
+
+void
+checko ()
+{
+ {
+ ostringstream o;
+ mpz_class x=123;
+ o << x;
+ ASSERT_ALWAYS (o.str() == "123");
+ }
+ {
+ ostringstream o;
+ mpz_class x=123;
+ o << (x+1);
+ ASSERT_ALWAYS (o.str() == "124");
+ }
+ {
+ ostringstream o;
+ mpq_class x(3,4);
+ o << x;
+ ASSERT_ALWAYS (o.str() == "3/4");
+ }
+ {
+ ostringstream o;
+ mpq_class x(3,4);
+ o << (x+1);
+ ASSERT_ALWAYS (o.str() == "7/4");
+ }
+ {
+ ostringstream o;
+ mpf_class x=1.5;
+ o << x;
+ ASSERT_ALWAYS (o.str() == "1.5");
+ }
+ {
+ ostringstream o;
+ mpf_class x=1.5;
+ o << (x+1);
+ ASSERT_ALWAYS (o.str() == "2.5");
+ }
+}
+
+int
+main (int argc, char *argv[])
+{
+ tests_start ();
+
+ checki ();
+ checko ();
+
+ tests_end ();
+ return 0;
+}
Copyright 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <iostream>
#include <cstdlib>
// since there's no mantissa digits, but g++ reads the whole thing and only
// then decides it's bad.
-int option_check_standard = 0;
+bool option_check_standard = false;
// On some versions of g++ 2.96 it's been observed that putback() may leave
// result of a bug, since for instance it's ok in g++ 2.95 and g++ 3.3. We
// detect the problem at runtime and disable affected checks.
-int putback_tellg_works = 1;
+bool putback_tellg_works = true;
void
check_putback_tellg (void)
{
cout << "Warning, istringstream has a bug: putback() doesn't update tellg().\n";;
cout << "Tests on tellg() will be skipped.\n";
- putback_tellg_works = 0;
+ putback_tellg_works = false;
}
}
};
mpz_t got, want;
- int got_ok, want_ok;
+ bool got_ok, want_ok;
+ bool got_eof, want_eof;
long got_si, want_si;
streampos init_tellg, got_pos, want_pos;
for (size_t i = 0; i < numberof (data); i++)
{
+ size_t input_length = strlen (data[i].input);
want_pos = (data[i].want_pos == -1
- ? strlen (data[i].input) : data[i].want_pos);
+ ? input_length : data[i].want_pos);
+ want_eof = (want_pos == streampos(input_length));
want_ok = (data[i].want != NULL);
want_si = mpz_get_si (want);
input >> got_si;
- got_ok = (input ? 1 : 0);
+ got_ok = !input.fail();
+ got_eof = input.eof();
input.clear();
got_pos = input.tellg() - init_tellg;
cout << " got_si: " << got_si << "\n";
cout << " want_si: " << want_si << "\n";
}
+ if (want_ok && got_eof != want_eof)
+ {
+ WRONG ("stdc++ operator>> wrong EOF state, check_mpz");
+ cout << " got_eof: " << got_eof << "\n";
+ cout << " want_eof: " << want_eof << "\n";
+ }
if (putback_tellg_works && got_pos != want_pos)
{
WRONG ("stdc++ operator>> wrong position, check_mpz");
mpz_set_ui (got, 0xDEAD);
input >> got;
- got_ok = (input ? 1 : 0);
+ got_ok = !input.fail();
+ got_eof = input.eof();
input.clear();
got_pos = input.tellg() - init_tellg;
mpz_trace (" want", want);
abort ();
}
+ if (want_ok && got_eof != want_eof)
+ {
+ WRONG ("mpz operator>> wrong EOF state");
+ cout << " want_eof: " << want_eof << "\n";
+ cout << " got_eof: " << got_eof << "\n";
+ abort ();
+ }
if (putback_tellg_works && got_pos != want_pos)
{
WRONG ("mpz operator>> wrong position");
{ " 123", 0, NULL, (ios::fmtflags) 0 }, // not without skipws
{ " 123", -1, "123", ios::skipws },
+
+ { "123 /456", 3, "123", (ios::fmtflags) 0 },
+ { "123/ 456", 4, NULL, (ios::fmtflags) 0 },
+ { "123/" , -1, NULL, (ios::fmtflags) 0 },
+ { "123 /456", 3, "123", ios::skipws },
+ { "123/ 456", 4, NULL, ios::skipws },
};
mpq_t got, want;
- int got_ok, want_ok;
+ bool got_ok, want_ok;
+ bool got_eof, want_eof;
long got_si, want_si;
streampos init_tellg, got_pos, want_pos;
for (size_t i = 0; i < numberof (data); i++)
{
+ size_t input_length = strlen (data[i].input);
want_pos = (data[i].want_pos == -1
- ? strlen (data[i].input) : data[i].want_pos);
+ ? input_length : data[i].want_pos);
+ want_eof = (want_pos == streampos(input_length));
want_ok = (data[i].want != NULL);
if (option_check_standard
&& mpz_fits_slong_p (mpq_numref(want))
- && mpz_cmp_ui (mpq_denref(want), 1L) == 0)
+ && mpz_cmp_ui (mpq_denref(want), 1L) == 0
+ && strchr (data[i].input, '/') == NULL)
{
istringstream input (data[i].input);
input.flags (data[i].flags);
want_si = mpz_get_si (mpq_numref(want));
input >> got_si;
- got_ok = (input ? 1 : 0);
+ got_ok = !input.fail();
+ got_eof = input.eof();
input.clear();
got_pos = input.tellg() - init_tellg;
cout << " got_si: " << got_si << "\n";
cout << " want_si: " << want_si << "\n";
}
+ if (want_ok && got_eof != want_eof)
+ {
+ WRONG ("stdc++ operator>> wrong EOF state, check_mpq");
+ cout << " got_eof: " << got_eof << "\n";
+ cout << " want_eof: " << want_eof << "\n";
+ }
if (putback_tellg_works && got_pos != want_pos)
{
WRONG ("stdc++ operator>> wrong position, check_mpq");
mpq_set_si (got, 0xDEAD, 0xBEEF);
input >> got;
- got_ok = (input ? 1 : 0);
+ got_ok = !input.fail();
+ got_eof = input.eof();
input.clear();
got_pos = input.tellg() - init_tellg;
mpq_trace (" want", want);
abort ();
}
+ if (want_ok && got_eof != want_eof)
+ {
+ WRONG ("mpq operator>> wrong EOF state");
+ cout << " want_eof: " << want_eof << "\n";
+ cout << " got_eof: " << got_eof << "\n";
+ abort ();
+ }
if (putback_tellg_works && got_pos != want_pos)
{
WRONG ("mpq operator>> wrong position");
};
mpf_t got, want;
- int got_ok, want_ok;
+ bool got_ok, want_ok;
+ bool got_eof, want_eof;
double got_d, want_d;
streampos init_tellg, got_pos, want_pos;
for (size_t i = 0; i < numberof (data); i++)
{
+ size_t input_length = strlen (data[i].input);
want_pos = (data[i].want_pos == -1
- ? strlen (data[i].input) : data[i].want_pos);
+ ? input_length : data[i].want_pos);
+ want_eof = (want_pos == streampos(input_length));
want_ok = (data[i].want != NULL);
init_tellg = input.tellg();
input >> got_d;
- got_ok = (input ? 1 : 0);
+ got_ok = !input.fail();
+ got_eof = input.eof();
input.clear();
got_pos = input.tellg() - init_tellg;
cout << " got: " << got_d << "\n";
cout << " want: " << want_d << "\n";
}
+ if (want_ok && got_eof != want_eof)
+ {
+ WRONG ("stdc++ operator>> wrong EOF state, check_mpf");
+ cout << " got_eof: " << got_eof << "\n";
+ cout << " want_eof: " << want_eof << "\n";
+ }
if (putback_tellg_works && got_pos != want_pos)
{
WRONG ("stdc++ operator>> wrong position, check_mpf");
mpf_set_ui (got, 0xDEAD);
input >> got;
- got_ok = (input ? 1 : 0);
+ got_ok = !input.fail();
+ got_eof = input.eof();
input.clear();
got_pos = input.tellg() - init_tellg;
mpf_trace (" want", want);
abort ();
}
+ if (want_ok && got_eof != want_eof)
+ {
+ WRONG ("mpf operator>> wrong EOF state");
+ cout << " want_eof: " << want_eof << "\n";
+ cout << " got_eof: " << got_eof << "\n";
+ abort ();
+ }
if (putback_tellg_works && got_pos != want_pos)
{
WRONG ("mpf operator>> wrong position");
main (int argc, char *argv[])
{
if (argc > 1 && strcmp (argv[1], "-s") == 0)
- option_check_standard = 1;
+ option_check_standard = true;
tests_start ();
Copyright 2001, 2002, 2003, 2007 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <clocale>
#include <iostream>
Copyright 2002, 2003 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
/* Note that we don't use <climits> for LONG_MIN, but instead our own
{
// mpz_class::fits_sint_p
{
- int fits;
+ bool fits;
mpz_class z;
z = INT_MIN; fits = z.fits_sint_p(); ASSERT_ALWAYS (fits);
z--; fits = z.fits_sint_p(); ASSERT_ALWAYS (! fits);
// mpz_class::fits_uint_p
{
- int fits;
+ bool fits;
mpz_class z;
z = 0; fits = z.fits_uint_p(); ASSERT_ALWAYS (fits);
z--; fits = z.fits_uint_p(); ASSERT_ALWAYS (! fits);
// mpz_class::fits_slong_p
{
- int fits;
+ bool fits;
mpz_class z;
z = LONG_MIN; fits = z.fits_slong_p(); ASSERT_ALWAYS (fits);
z--; fits = z.fits_slong_p(); ASSERT_ALWAYS (! fits);
// mpz_class::fits_ulong_p
{
- int fits;
+ bool fits;
mpz_class z;
z = 0; fits = z.fits_ulong_p(); ASSERT_ALWAYS (fits);
z--; fits = z.fits_ulong_p(); ASSERT_ALWAYS (! fits);
// mpz_class::fits_sshort_p
{
- int fits;
+ bool fits;
mpz_class z;
z = SHRT_MIN; fits = z.fits_sshort_p(); ASSERT_ALWAYS (fits);
z--; fits = z.fits_sshort_p(); ASSERT_ALWAYS (! fits);
// mpz_class::fits_ushort_p
{
- int fits;
+ bool fits;
mpz_class z;
z = 0; fits = z.fits_ushort_p(); ASSERT_ALWAYS (fits);
z--; fits = z.fits_ushort_p(); ASSERT_ALWAYS (! fits);
// mpq_class::get_num, mpq_class::get_den
{
- mpq_class q(4,5);
+ const mpq_class q(4,5);
mpz_class z;
z = q.get_num(); ASSERT_ALWAYS (z == 4);
z = q.get_den(); ASSERT_ALWAYS (z == 5);
p = q.get_den_mpz_t(); ASSERT_ALWAYS (mpz_cmp_ui (p, 5) == 0);
}
{
- mpq_class q(4,5);
+ const mpq_class q(4,5);
mpz_srcptr p;
p = q.get_num_mpz_t(); ASSERT_ALWAYS (mpz_cmp_ui (p, 4) == 0);
p = q.get_den_mpz_t(); ASSERT_ALWAYS (mpz_cmp_ui (p, 5) == 0);
{
// mpf_class::fits_sint_p
{
- int fits;
+ bool fits;
mpf_class f (0, 2*8*sizeof(int));
f = INT_MIN; fits = f.fits_sint_p(); ASSERT_ALWAYS (fits);
f--; fits = f.fits_sint_p(); ASSERT_ALWAYS (! fits);
// mpf_class::fits_uint_p
{
- int fits;
+ bool fits;
mpf_class f (0, 2*8*sizeof(int));
f = 0; fits = f.fits_uint_p(); ASSERT_ALWAYS (fits);
f--; fits = f.fits_uint_p(); ASSERT_ALWAYS (! fits);
// mpf_class::fits_slong_p
{
- int fits;
+ bool fits;
mpf_class f (0, 2*8*sizeof(long));
f = LONG_MIN; fits = f.fits_slong_p(); ASSERT_ALWAYS (fits);
f--; fits = f.fits_slong_p(); ASSERT_ALWAYS (! fits);
// mpf_class::fits_ulong_p
{
- int fits;
+ bool fits;
mpf_class f (0, 2*8*sizeof(long));
f = 0; fits = f.fits_ulong_p(); ASSERT_ALWAYS (fits);
f--; fits = f.fits_ulong_p(); ASSERT_ALWAYS (! fits);
// mpf_class::fits_sshort_p
{
- int fits;
+ bool fits;
mpf_class f (0, 2*8*sizeof(short));
f = SHRT_MIN; fits = f.fits_sshort_p(); ASSERT_ALWAYS (fits);
f--; fits = f.fits_sshort_p(); ASSERT_ALWAYS (! fits);
// mpf_class::fits_ushort_p
{
- int fits;
+ bool fits;
mpf_class f (0, 2*8*sizeof(short));
f = 0; fits = f.fits_ushort_p(); ASSERT_ALWAYS (fits);
f--; fits = f.fits_ushort_p(); ASSERT_ALWAYS (! fits);
}
}
+// std::numeric_limits
+void
+check_limits (void)
+{
+ // Check that the content is not private.
+ ASSERT_ALWAYS ( std::numeric_limits<mpz_class>::is_integer);
+ ASSERT_ALWAYS (!std::numeric_limits<mpf_class>::is_integer);
+
+ // Check that symbols are emitted.
+ ASSERT_ALWAYS (&std::numeric_limits<mpz_class>::is_integer
+ != &std::numeric_limits<mpq_class>::is_integer);
+}
int
main (void)
check_mpz();
check_mpq();
check_mpf();
+ check_limits();
tests_end();
return 0;
--- /dev/null
+/* Test legality of conversion between the different mp*_class
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include "config.h"
+
+#include "gmp.h"
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+int f_z (mpz_class){return 0;}
+int f_q (mpq_class){return 1;}
+int f_f (mpf_class){return 2;}
+int f_zq (mpz_class){return 0;}
+int f_zq (mpq_class){return 1;}
+int f_zf (mpz_class){return 0;}
+int f_zf (mpf_class){return 2;}
+int f_qf (mpq_class){return 1;}
+int f_qf (mpf_class){return 2;}
+int f_zqf(mpz_class){return 0;}
+int f_zqf(mpq_class){return 1;}
+int f_zqf(mpf_class){return 2;}
+
+void
+check (void)
+{
+ mpz_class z=42;
+ mpq_class q=33;
+ mpf_class f=18;
+
+ ASSERT_ALWAYS(f_z (z)==0); ASSERT_ALWAYS(f_z (-z)==0);
+ ASSERT_ALWAYS(f_q (z)==1); ASSERT_ALWAYS(f_q (-z)==1);
+ ASSERT_ALWAYS(f_q (q)==1); ASSERT_ALWAYS(f_q (-q)==1);
+ ASSERT_ALWAYS(f_f (z)==2); ASSERT_ALWAYS(f_f (-z)==2);
+ ASSERT_ALWAYS(f_f (q)==2); ASSERT_ALWAYS(f_f (-q)==2);
+ ASSERT_ALWAYS(f_f (f)==2); ASSERT_ALWAYS(f_f (-f)==2);
+ ASSERT_ALWAYS(f_zq (z)==0);
+ ASSERT_ALWAYS(f_zq (q)==1); ASSERT_ALWAYS(f_zq (-q)==1);
+ ASSERT_ALWAYS(f_zf (z)==0);
+ ASSERT_ALWAYS(f_zf (f)==2); ASSERT_ALWAYS(f_zf (-f)==2);
+ ASSERT_ALWAYS(f_qf (q)==1);
+ ASSERT_ALWAYS(f_qf (f)==2); ASSERT_ALWAYS(f_qf (-f)==2);
+ ASSERT_ALWAYS(f_zqf(z)==0);
+ ASSERT_ALWAYS(f_zqf(q)==1);
+ ASSERT_ALWAYS(f_zqf(f)==2); ASSERT_ALWAYS(f_zqf(-f)==2);
+
+ ASSERT_ALWAYS(f_zqf(mpz_class(z))==0); ASSERT_ALWAYS(f_zqf(mpz_class(-z))==0);
+ ASSERT_ALWAYS(f_zqf(mpz_class(q))==0); ASSERT_ALWAYS(f_zqf(mpz_class(-q))==0);
+ ASSERT_ALWAYS(f_zqf(mpz_class(f))==0); ASSERT_ALWAYS(f_zqf(mpz_class(-f))==0);
+ ASSERT_ALWAYS(f_zqf(mpq_class(z))==1); ASSERT_ALWAYS(f_zqf(mpq_class(-z))==1);
+ ASSERT_ALWAYS(f_zqf(mpq_class(q))==1); ASSERT_ALWAYS(f_zqf(mpq_class(-q))==1);
+ ASSERT_ALWAYS(f_zqf(mpq_class(f))==1); ASSERT_ALWAYS(f_zqf(mpq_class(-f))==1);
+ ASSERT_ALWAYS(f_zqf(mpf_class(z))==2); ASSERT_ALWAYS(f_zqf(mpf_class(-z))==2);
+ ASSERT_ALWAYS(f_zqf(mpf_class(q))==2); ASSERT_ALWAYS(f_zqf(mpf_class(-q))==2);
+ ASSERT_ALWAYS(f_zqf(mpf_class(f))==2); ASSERT_ALWAYS(f_zqf(mpf_class(-f))==2);
+}
+
+int
+main (void)
+{
+ tests_start();
+
+ check();
+
+ tests_end();
+ return 0;
+}
Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include "config.h"
mpz_class a(-123);
mpz_class b;
b = abs(a); ASSERT_ALWAYS(b == 123);
+ a <<= 300;
+ b = abs(a); ASSERT_ALWAYS(a + b == 0);
}
// sqrt
mpq_class a(1, 3), b(3, 4);
mpq_class c;
c = a * b; ASSERT_ALWAYS(c == 0.25);
+ c = b * b; ASSERT_ALWAYS(c == 0.5625);
}
// operator/
--- /dev/null
+/* Test mp*_class operators and functions.
+
+Copyright 2011, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include "config.h"
+
+#include <math.h>
+
+#include "gmp.h"
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+#define CHECK1(Type,a,fun) \
+ ASSERT_ALWAYS(fun((Type)(a))==fun(a))
+#define CHECK(Type1,Type2,a,b,op) \
+ ASSERT_ALWAYS(((Type1)(a) op (Type2)(b))==((a) op (b)))
+#define CHECK_G(Type,a,b,op) \
+ CHECK(Type,Type,a,b,op)
+#define CHECK_UI(Type,a,b,op) \
+ CHECK(Type,unsigned long,a,b,op); \
+ CHECK(unsigned long,Type,a,b,op)
+#define CHECK_SI(Type,a,b,op) \
+ CHECK(Type,long,a,b,op); \
+ CHECK(long,Type,a,b,op)
+#define CHECK_D(Type,a,b,op) \
+ CHECK(Type,double,a,b,op); \
+ CHECK(double,Type,a,b,op)
+#define CHECK_MPZ(Type,a,b,op) \
+ CHECK(Type,mpz_class,a,b,op); \
+ CHECK(mpz_class,Type,a,b,op)
+#define CHECK_MPQ(Type,a,b,op) \
+ CHECK(Type,mpq_class,a,b,op); \
+ CHECK(mpq_class,Type,a,b,op)
+#define CHECK_ALL_SIGNED(Type,a,b,op) \
+ CHECK_G(Type,a,b,op); \
+ CHECK_SI(Type,a,b,op); \
+ CHECK_D(Type,a,b,op)
+#define CHECK_ALL_SIGNS(Type,a,b,op) \
+ CHECK_ALL_SIGNED(Type,a,b,op); \
+ CHECK_ALL_SIGNED(Type,-(a),b,op); \
+ CHECK_ALL_SIGNED(Type,a,-(b),op); \
+ CHECK_ALL_SIGNED(Type,-(a),-(b),op)
+#define CHECK_ALL(Type,a,b,op) \
+ CHECK_ALL_SIGNED(Type,a,b,op); \
+ CHECK_UI(Type,a,b,op)
+#define CHECK_ALL_SIGNED_COMPARISONS(Type,a,b) \
+ CHECK_ALL_SIGNED(Type,a,b,<); \
+ CHECK_ALL_SIGNED(Type,a,b,>); \
+ CHECK_ALL_SIGNED(Type,a,b,<=); \
+ CHECK_ALL_SIGNED(Type,a,b,>=); \
+ CHECK_ALL_SIGNED(Type,a,b,==); \
+ CHECK_ALL_SIGNED(Type,a,b,!=)
+#define CHECK_ALL_SIGNS_COMPARISONS(Type,a,b) \
+ CHECK_ALL_SIGNS(Type,a,b,<); \
+ CHECK_ALL_SIGNS(Type,a,b,>); \
+ CHECK_ALL_SIGNS(Type,a,b,<=); \
+ CHECK_ALL_SIGNS(Type,a,b,>=); \
+ CHECK_ALL_SIGNS(Type,a,b,==); \
+ CHECK_ALL_SIGNS(Type,a,b,!=)
+#define CHECK_ALL_COMPARISONS(Type,a,b) \
+ CHECK_ALL(Type,a,b,<); \
+ CHECK_ALL(Type,a,b,>); \
+ CHECK_ALL(Type,a,b,<=); \
+ CHECK_ALL(Type,a,b,>=); \
+ CHECK_ALL(Type,a,b,==); \
+ CHECK_ALL(Type,a,b,!=)
+
+
+void checkz (){
+ CHECK_ALL(mpz_class,5,2,+);
+ CHECK_ALL(mpz_class,5,2,-);
+ CHECK_ALL(mpz_class,5,2,*);
+ CHECK_ALL(mpz_class,5,2,/);
+ CHECK_ALL(mpz_class,5,2,%);
+ CHECK_ALL_COMPARISONS(mpz_class,5,2);
+ CHECK_ALL_SIGNS(mpz_class,11,3,+);
+ CHECK_ALL_SIGNS(mpz_class,11,3,-);
+ CHECK_ALL_SIGNS(mpz_class,11,3,*);
+ CHECK_ALL_SIGNS(mpz_class,11,3,/);
+ CHECK_ALL_SIGNS(mpz_class,11,3,%);
+ CHECK_ALL_SIGNS(mpz_class,17,2,*);
+ CHECK_ALL_SIGNS(mpz_class,17,2,/);
+ CHECK_ALL_SIGNS(mpz_class,17,2,%);
+ CHECK(unsigned long,mpz_class,5,-2,/);
+ CHECK(unsigned long,mpz_class,5,-2,%);
+ ASSERT_ALWAYS(7ul/mpz_class(1e35)==0);
+ ASSERT_ALWAYS(7ul%mpz_class(1e35)==7);
+ ASSERT_ALWAYS(7ul/mpz_class(-1e35)==0);
+ ASSERT_ALWAYS(7ul%mpz_class(-1e35)==7);
+ CHECK_ALL_SIGNS_COMPARISONS(mpz_class,11,3);
+ CHECK_ALL(mpz_class,6,3,&);
+ CHECK_ALL(mpz_class,6,3,|);
+ CHECK_ALL(mpz_class,6,3,^);
+ CHECK(mpz_class,unsigned long,6,2,<<);
+ CHECK(mpz_class,unsigned long,6,2,>>);
+ CHECK(mpz_class,unsigned long,-13,2,<<);
+ CHECK(mpz_class,unsigned long,-13,2,>>);
+ ASSERT_ALWAYS(++mpz_class(7)==8);
+ ASSERT_ALWAYS(++mpz_class(-8)==-7);
+ ASSERT_ALWAYS(--mpz_class(8)==7);
+ ASSERT_ALWAYS(--mpz_class(-7)==-8);
+ ASSERT_ALWAYS(~mpz_class(7)==-8);
+ ASSERT_ALWAYS(~mpz_class(-8)==7);
+ ASSERT_ALWAYS(+mpz_class(7)==7);
+ ASSERT_ALWAYS(+mpz_class(-8)==-8);
+ ASSERT_ALWAYS(-mpz_class(7)==-7);
+ ASSERT_ALWAYS(-mpz_class(-8)==8);
+ ASSERT_ALWAYS(abs(mpz_class(7))==7);
+ ASSERT_ALWAYS(abs(mpz_class(-8))==8);
+ ASSERT_ALWAYS(sqrt(mpz_class(7))==2);
+ ASSERT_ALWAYS(sqrt(mpz_class(0))==0);
+ ASSERT_ALWAYS(sgn(mpz_class(0))==0);
+ ASSERT_ALWAYS(sgn(mpz_class(9))==1);
+ ASSERT_ALWAYS(sgn(mpz_class(-17))==-1);
+}
+
+template<class T>
+void checkqf (){
+ CHECK_ALL(T,5.,2,+); CHECK_MPZ(T,5.,2,+);
+ CHECK_ALL(T,5.,2,-); CHECK_MPZ(T,5.,2,-);
+ CHECK_ALL(T,5.,2,*); CHECK_MPZ(T,5.,2,*);
+ CHECK_ALL(T,5.,2,/); CHECK_MPZ(T,5.,2,/);
+ CHECK_ALL(T,0.,2,/);
+ CHECK_ALL_SIGNS(T,11.,3,+);
+ CHECK_ALL_SIGNS(T,11.,3,-);
+ CHECK_ALL_SIGNS(T,11.,3,*);
+ CHECK_ALL_SIGNS(T,11.,4,/);
+ CHECK_SI(T,LONG_MIN,1,*);
+ CHECK_SI(T,0,3,*);
+ CHECK_ALL_COMPARISONS(T,5.,2);
+ CHECK_ALL_SIGNS_COMPARISONS(T,11.,3);
+ CHECK_MPZ(T,5,-2,<);
+ CHECK_MPZ(T,5,-2,>);
+ CHECK_MPZ(T,5,-2,<=);
+ CHECK_MPZ(T,5,-2,>=);
+ CHECK_MPZ(T,5,-2,==);
+ CHECK_MPZ(T,5,-2,!=);
+ CHECK_MPZ(T,0,0,<);
+ CHECK_MPZ(T,0,0,>);
+ CHECK_MPZ(T,0,0,<=);
+ CHECK_MPZ(T,0,0,>=);
+ CHECK_MPZ(T,0,0,==);
+ CHECK_MPZ(T,0,0,!=);
+ ASSERT_ALWAYS(T(6)<<2==6.*4);
+ ASSERT_ALWAYS(T(6)>>2==6./4);
+ ASSERT_ALWAYS(T(-13)<<2==-13.*4);
+ ASSERT_ALWAYS(T(-13)>>2==-13./4);
+ ASSERT_ALWAYS(++T(7)==8);
+ ASSERT_ALWAYS(++T(-8)==-7);
+ ASSERT_ALWAYS(--T(8)==7);
+ ASSERT_ALWAYS(--T(-7)==-8);
+ ASSERT_ALWAYS(+T(7)==7);
+ ASSERT_ALWAYS(+T(-8)==-8);
+ ASSERT_ALWAYS(-T(7)==-7);
+ ASSERT_ALWAYS(-T(-8)==8);
+ ASSERT_ALWAYS(abs(T(7))==7);
+ ASSERT_ALWAYS(abs(T(-8))==8);
+ ASSERT_ALWAYS(sgn(T(0))==0);
+ ASSERT_ALWAYS(sgn(T(9))==1);
+ ASSERT_ALWAYS(sgn(T(-17))==-1);
+}
+
+void checkf (){
+ ASSERT_ALWAYS(sqrt(mpf_class(7))>2.64);
+ ASSERT_ALWAYS(sqrt(mpf_class(7))<2.65);
+ ASSERT_ALWAYS(sqrt(mpf_class(0))==0);
+ // TODO: add some consistency checks, as described in
+ // http://gmplib.org/list-archives/gmp-bugs/2013-February/002940.html
+ CHECK1(mpf_class,1.9,trunc);
+ CHECK1(mpf_class,1.9,floor);
+ CHECK1(mpf_class,1.9,ceil);
+ CHECK1(mpf_class,4.3,trunc);
+ CHECK1(mpf_class,4.3,floor);
+ CHECK1(mpf_class,4.3,ceil);
+ CHECK1(mpf_class,-7.1,trunc);
+ CHECK1(mpf_class,-7.1,floor);
+ CHECK1(mpf_class,-7.1,ceil);
+ CHECK1(mpf_class,-2.8,trunc);
+ CHECK1(mpf_class,-2.8,floor);
+ CHECK1(mpf_class,-2.8,ceil);
+ CHECK1(mpf_class,-1.5,trunc);
+ CHECK1(mpf_class,-1.5,floor);
+ CHECK1(mpf_class,-1.5,ceil);
+ CHECK1(mpf_class,2.5,trunc);
+ CHECK1(mpf_class,2.5,floor);
+ CHECK1(mpf_class,2.5,ceil);
+ ASSERT_ALWAYS(hypot(mpf_class(-3),mpf_class(4))>4.9);
+ ASSERT_ALWAYS(hypot(mpf_class(-3),mpf_class(4))<5.1);
+ ASSERT_ALWAYS(hypot(mpf_class(-3),4.)>4.9);
+ ASSERT_ALWAYS(hypot(-3.,mpf_class(4))<5.1);
+ ASSERT_ALWAYS(hypot(mpf_class(-3),4l)>4.9);
+ ASSERT_ALWAYS(hypot(-3l,mpf_class(4))<5.1);
+ ASSERT_ALWAYS(hypot(mpf_class(-3),4ul)>4.9);
+ ASSERT_ALWAYS(hypot(3ul,mpf_class(4))<5.1);
+ CHECK(mpf_class,mpq_class,1.5,2.25,+);
+ CHECK(mpf_class,mpq_class,1.5,2.25,-);
+ CHECK(mpf_class,mpq_class,1.5,-2.25,*);
+ CHECK(mpf_class,mpq_class,1.5,-2,/);
+ CHECK_MPQ(mpf_class,-5.5,-2.25,+);
+ CHECK_MPQ(mpf_class,-5.5,-2.25,-);
+ CHECK_MPQ(mpf_class,-5.5,-2.25,*);
+ CHECK_MPQ(mpf_class,-5.25,-0.5,/);
+ CHECK_MPQ(mpf_class,5,-2,<);
+ CHECK_MPQ(mpf_class,5,-2,>);
+ CHECK_MPQ(mpf_class,5,-2,<=);
+ CHECK_MPQ(mpf_class,5,-2,>=);
+ CHECK_MPQ(mpf_class,5,-2,==);
+ CHECK_MPQ(mpf_class,5,-2,!=);
+ CHECK_MPQ(mpf_class,0,0,<);
+ CHECK_MPQ(mpf_class,0,0,>);
+ CHECK_MPQ(mpf_class,0,0,<=);
+ CHECK_MPQ(mpf_class,0,0,>=);
+ CHECK_MPQ(mpf_class,0,0,==);
+ CHECK_MPQ(mpf_class,0,0,!=);
+}
+
+int
+main (void)
+{
+ tests_start();
+
+ checkz();
+ checkqf<mpq_class>();
+ checkqf<mpf_class>();
+ checkf();
+
+ tests_end();
+ return 0;
+}
--- /dev/null
+/* Test mp*_class assignment operators (+=, -=, etc)
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include "config.h"
+
+#include "gmp.h"
+#include "gmpxx.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+using namespace std;
+
+#define FOR_ALL_SIGNED_BUILTIN(F) \
+ F(signed char) \
+ F(signed short) \
+ F(signed int) \
+ F(signed long) \
+ F(float) \
+ F(double)
+
+#define FOR_ALL_BUILTIN(F) \
+ FOR_ALL_SIGNED_BUILTIN(F) \
+ F(char) \
+ F(unsigned char) \
+ F(unsigned short) \
+ F(unsigned int) \
+ F(unsigned long)
+
+#define FOR_ALL_GMPXX(F) \
+ F(mpz_class) \
+ F(mpq_class) \
+ F(mpf_class)
+
+template<class T,class U> void f(T t, U u){
+ T a=t;
+ ASSERT_ALWAYS((a+=u)==(t+u)); ASSERT_ALWAYS(a==(t+u));
+ ASSERT_ALWAYS((a-=u)==t); ASSERT_ALWAYS(a==t);
+ ASSERT_ALWAYS((a*=u)==(t*u)); ASSERT_ALWAYS(a==(t*u));
+ ASSERT_ALWAYS((a/=u)==t); ASSERT_ALWAYS(a==t);
+ ASSERT_ALWAYS((a<<=5)==(t<<5)); ASSERT_ALWAYS(a==(t<<5));
+ ASSERT_ALWAYS((a>>=5)==t); ASSERT_ALWAYS(a==t);
+}
+
+template<class T,class U> void g(T t, U u){
+ T a=t;
+ ASSERT_ALWAYS((a%=u)==(t%u)); ASSERT_ALWAYS(a==(t%u));
+ a=t;
+ ASSERT_ALWAYS((a&=u)==(t&u)); ASSERT_ALWAYS(a==(t&u));
+ a=t;
+ ASSERT_ALWAYS((a|=u)==(t|u)); ASSERT_ALWAYS(a==(t|u));
+ a=t;
+ ASSERT_ALWAYS((a^=u)==(t^u)); ASSERT_ALWAYS(a==(t^u));
+}
+
+template<class T> void h(T t){
+ T a=t;
+ ASSERT_ALWAYS((a<<=5)==(t<<5)); ASSERT_ALWAYS(a==(t<<5));
+ ASSERT_ALWAYS((a>>=5)==t); ASSERT_ALWAYS(a==t);
+}
+
+template<class T, class U> void ffs(T t, U u){
+#define F(V) f(t,(V)u);
+ FOR_ALL_SIGNED_BUILTIN(F)
+ FOR_ALL_GMPXX(F)
+#undef F
+#define F(V) f(t,-(V)u);
+ FOR_ALL_GMPXX(F)
+#undef F
+}
+
+template<class T, class U> void ff(T t, U u){
+#define F(V) f(t,(V)u);
+ FOR_ALL_BUILTIN(F)
+ FOR_ALL_GMPXX(F)
+#undef F
+#define F(V) f(t,-(V)u);
+ FOR_ALL_GMPXX(F)
+#undef F
+}
+
+template<class U> void ggs(mpz_class t, U u){
+#define F(V) g(t,(V)u);
+ FOR_ALL_SIGNED_BUILTIN(F)
+#undef F
+ g(t,(mpz_class)u);
+ g(t,-(mpz_class)u);
+}
+
+template<class U> void gg(mpz_class t, U u){
+#define F(V) g(t,(V)u);
+ FOR_ALL_BUILTIN(F)
+#undef F
+ g(t,(mpz_class)u);
+ g(t,-(mpz_class)u);
+}
+
+void check(){
+ mpz_class z=18;
+ mpq_class q(7,2);
+ mpf_class d=3.375;
+ h(z); h(q); h(d);
+ ff(z,13); ff(q,13); ff(d,13);
+ ffs(z,-42); ffs(q,-42); ffs(d,-42);
+ gg(z,33); ggs(z,-22);
+}
+
+
+int
+main (void)
+{
+ tests_start();
+
+ check();
+
+ tests_end();
+ return 0;
+}
Copyright 2001, 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <iostream>
#include <cstdlib>
using namespace std;
-int option_check_standard = 0;
+bool option_check_standard = false;
#define CALL(expr) \
main (int argc, char *argv[])
{
if (argc > 1 && strcmp (argv[1], "-s") == 0)
- option_check_standard = 1;
+ option_check_standard = true;
tests_start ();
Copyright 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include "config.h"
Copyright 2002, 2003 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include "gmp.h"
#include "gmpxx.h"
r.seed(a);
mpf_class b;
b = r.get_f();
+ mpf_class c(r.get_f());
+ ASSERT_ALWAYS (c.get_prec() == mpf_get_default_prec());
+ mpf_class d(r.get_f(),212);
+ ASSERT_ALWAYS (d.get_prec() >= 212);
}
{
gmp_randclass r(gmp_randinit_default);
- int a = 123, b = 128;
+ int a = 123, b = 198;
r.seed(a);
mpf_class c;
c = r.get_f(b);
+ ASSERT_ALWAYS (c.get_prec() == mpf_get_default_prec());
+ mpf_class d(r.get_f(b));
+ ASSERT_ALWAYS (d.get_prec() >= 198);
+ mpf_class e(r.get_f(b)-r.get_f());
+ ASSERT_ALWAYS (e.get_prec() >= 198);
+ mpf_class f(r.get_f(60),300);
+ ASSERT_ALWAYS (f.get_prec() >= 300);
}
}
Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include "config.h"
Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include "config.h"
# Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
#
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+# Public License for more details.
#
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/.
INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
#
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+# Public License for more details.
#
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/.
VPATH = @srcdir@
+am__make_dryrun = \
+ { \
+ am__dry=no; \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
+ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+ *) \
+ for am__flg in $$MAKEFLAGS; do \
+ case $$am__flg in \
+ *=*|--*) ;; \
+ *n*) am__dry=yes; break;; \
+ esac; \
+ done;; \
+ esac; \
+ test $$am__dry = yes; \
+ }
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
EXTRA_PROGRAMS = aors_n$(EXEEXT) anymul_1$(EXEEXT) copy$(EXEEXT) \
divmod_1$(EXEEXT) divrem$(EXEEXT) shift$(EXEEXT) \
logops_n$(EXEEXT) tst-addsub$(EXEEXT) try$(EXEEXT)
DIST_COMMON = README $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/configure.in
+ $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
anymul_1_SOURCES = anymul_1.c
-anymul_1_OBJECTS = anymul_1$U.$(OBJEXT)
+anymul_1_OBJECTS = anymul_1.$(OBJEXT)
anymul_1_LDADD = $(LDADD)
anymul_1_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
aors_n_SOURCES = aors_n.c
-aors_n_OBJECTS = aors_n$U.$(OBJEXT)
+aors_n_OBJECTS = aors_n.$(OBJEXT)
aors_n_LDADD = $(LDADD)
aors_n_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
copy_SOURCES = copy.c
-copy_OBJECTS = copy$U.$(OBJEXT)
+copy_OBJECTS = copy.$(OBJEXT)
copy_LDADD = $(LDADD)
copy_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
divmod_1_SOURCES = divmod_1.c
-divmod_1_OBJECTS = divmod_1$U.$(OBJEXT)
+divmod_1_OBJECTS = divmod_1.$(OBJEXT)
divmod_1_LDADD = $(LDADD)
divmod_1_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
divrem_SOURCES = divrem.c
-divrem_OBJECTS = divrem$U.$(OBJEXT)
+divrem_OBJECTS = divrem.$(OBJEXT)
divrem_LDADD = $(LDADD)
divrem_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
logops_n_SOURCES = logops_n.c
-logops_n_OBJECTS = logops_n$U.$(OBJEXT)
+logops_n_OBJECTS = logops_n.$(OBJEXT)
logops_n_LDADD = $(LDADD)
logops_n_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
shift_SOURCES = shift.c
-shift_OBJECTS = shift$U.$(OBJEXT)
+shift_OBJECTS = shift.$(OBJEXT)
shift_LDADD = $(LDADD)
shift_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
try_SOURCES = try.c
-try_OBJECTS = try$U.$(OBJEXT)
+try_OBJECTS = try.$(OBJEXT)
try_LDADD = $(LDADD)
try_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
tst_addsub_SOURCES = tst-addsub.c
-tst_addsub_OBJECTS = tst-addsub$U.$(OBJEXT)
+tst_addsub_OBJECTS = tst-addsub.$(OBJEXT)
tst_addsub_LDADD = $(LDADD)
tst_addsub_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
shift.c try.c tst-addsub.c
DIST_SOURCES = anymul_1.c aors_n.c copy.c divmod_1.c divrem.c \
logops_n.c shift.c try.c tst-addsub.c
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
ETAGS = etags
CTAGS = ctags
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
STRIP = @STRIP@
TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
U_FOR_BUILD = @U_FOR_BUILD@
VERSION = @VERSION@
WITH_READLINE_01 = @WITH_READLINE_01@
mkdir_p = @mkdir_p@
mpn_objects = @mpn_objects@
mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
-anymul_1$(EXEEXT): $(anymul_1_OBJECTS) $(anymul_1_DEPENDENCIES)
+anymul_1$(EXEEXT): $(anymul_1_OBJECTS) $(anymul_1_DEPENDENCIES) $(EXTRA_anymul_1_DEPENDENCIES)
@rm -f anymul_1$(EXEEXT)
$(LINK) $(anymul_1_OBJECTS) $(anymul_1_LDADD) $(LIBS)
-aors_n$(EXEEXT): $(aors_n_OBJECTS) $(aors_n_DEPENDENCIES)
+aors_n$(EXEEXT): $(aors_n_OBJECTS) $(aors_n_DEPENDENCIES) $(EXTRA_aors_n_DEPENDENCIES)
@rm -f aors_n$(EXEEXT)
$(LINK) $(aors_n_OBJECTS) $(aors_n_LDADD) $(LIBS)
-copy$(EXEEXT): $(copy_OBJECTS) $(copy_DEPENDENCIES)
+copy$(EXEEXT): $(copy_OBJECTS) $(copy_DEPENDENCIES) $(EXTRA_copy_DEPENDENCIES)
@rm -f copy$(EXEEXT)
$(LINK) $(copy_OBJECTS) $(copy_LDADD) $(LIBS)
-divmod_1$(EXEEXT): $(divmod_1_OBJECTS) $(divmod_1_DEPENDENCIES)
+divmod_1$(EXEEXT): $(divmod_1_OBJECTS) $(divmod_1_DEPENDENCIES) $(EXTRA_divmod_1_DEPENDENCIES)
@rm -f divmod_1$(EXEEXT)
$(LINK) $(divmod_1_OBJECTS) $(divmod_1_LDADD) $(LIBS)
-divrem$(EXEEXT): $(divrem_OBJECTS) $(divrem_DEPENDENCIES)
+divrem$(EXEEXT): $(divrem_OBJECTS) $(divrem_DEPENDENCIES) $(EXTRA_divrem_DEPENDENCIES)
@rm -f divrem$(EXEEXT)
$(LINK) $(divrem_OBJECTS) $(divrem_LDADD) $(LIBS)
-logops_n$(EXEEXT): $(logops_n_OBJECTS) $(logops_n_DEPENDENCIES)
+logops_n$(EXEEXT): $(logops_n_OBJECTS) $(logops_n_DEPENDENCIES) $(EXTRA_logops_n_DEPENDENCIES)
@rm -f logops_n$(EXEEXT)
$(LINK) $(logops_n_OBJECTS) $(logops_n_LDADD) $(LIBS)
-shift$(EXEEXT): $(shift_OBJECTS) $(shift_DEPENDENCIES)
+shift$(EXEEXT): $(shift_OBJECTS) $(shift_DEPENDENCIES) $(EXTRA_shift_DEPENDENCIES)
@rm -f shift$(EXEEXT)
$(LINK) $(shift_OBJECTS) $(shift_LDADD) $(LIBS)
-try$(EXEEXT): $(try_OBJECTS) $(try_DEPENDENCIES)
+try$(EXEEXT): $(try_OBJECTS) $(try_DEPENDENCIES) $(EXTRA_try_DEPENDENCIES)
@rm -f try$(EXEEXT)
$(LINK) $(try_OBJECTS) $(try_LDADD) $(LIBS)
-tst-addsub$(EXEEXT): $(tst_addsub_OBJECTS) $(tst_addsub_DEPENDENCIES)
+tst-addsub$(EXEEXT): $(tst_addsub_OBJECTS) $(tst_addsub_DEPENDENCIES) $(EXTRA_tst_addsub_DEPENDENCIES)
@rm -f tst-addsub$(EXEEXT)
$(LINK) $(tst_addsub_OBJECTS) $(tst_addsub_LDADD) $(LIBS)
distclean-compile:
-rm -f *.tab.c
-$(top_builddir)/ansi2knr:
- $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
- -test "$U" = "" || rm -f *_.c
.c.o:
$(COMPILE) -c $<
.c.lo:
$(LTCOMPILE) -c -o $@ $<
-anymul_1_.c: anymul_1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/anymul_1.c; then echo $(srcdir)/anymul_1.c; else echo anymul_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-aors_n_.c: aors_n.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/aors_n.c; then echo $(srcdir)/aors_n.c; else echo aors_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-copy_.c: copy.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/copy.c; then echo $(srcdir)/copy.c; else echo copy.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divmod_1_.c: divmod_1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divmod_1.c; then echo $(srcdir)/divmod_1.c; else echo divmod_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divrem_.c: divrem.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem.c; then echo $(srcdir)/divrem.c; else echo divrem.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-logops_n_.c: logops_n.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/logops_n.c; then echo $(srcdir)/logops_n.c; else echo logops_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-shift_.c: shift.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/shift.c; then echo $(srcdir)/shift.c; else echo shift.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-try_.c: try.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/try.c; then echo $(srcdir)/try.c; else echo try.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tst-addsub_.c: tst-addsub.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tst-addsub.c; then echo $(srcdir)/tst-addsub.c; else echo tst-addsub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-anymul_1_.$(OBJEXT) anymul_1_.lo aors_n_.$(OBJEXT) aors_n_.lo \
-copy_.$(OBJEXT) copy_.lo divmod_1_.$(OBJEXT) divmod_1_.lo \
-divrem_.$(OBJEXT) divrem_.lo logops_n_.$(OBJEXT) logops_n_.lo \
-shift_.$(OBJEXT) shift_.lo try_.$(OBJEXT) try_.lo \
-tst-addsub_.$(OBJEXT) tst-addsub_.lo : $(ANSI2KNR)
mostlyclean-libtool:
-rm -f *.lo
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
mostlyclean: mostlyclean-am
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
uninstall-am:
-.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+.MAKE: install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
clean-libtool ctags distclean distclean-compile \
install-pdf-am install-ps install-ps-am install-strip \
installcheck installcheck-am installdirs maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
- mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
- pdf-am ps ps-am tags uninstall uninstall-am
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags uninstall uninstall-am
allprogs: $(EXTRA_PROGRAMS)
Copyright 2000, 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/.
Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2004, 2006, 2007, 2008
Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdlib.h>
#include <string.h>
#include "gmp.h"
#include "gmp-impl.h"
#include "longlong.h"
-#include "tests.h"
+#include "tests/tests.h"
#ifdef OPERATION_mul_1
#define func __gmpn_mul_1
}
#endif
+#ifdef PLAIN_RANDOM
+#define MPN_RANDOM mpn_random
+#else
+#define MPN_RANDOM mpn_random2
+#endif
+
#ifdef RANDOM
size = random () % SIZE + 1;
#else
#ifdef FIXED_XLIMB
xlimb = FIXED_XLIMB;
#else
- mpn_random2 (&xlimb, 1);
+ MPN_RANDOM (&xlimb, 1);
#endif
#if TIMES != 1
#endif
#ifndef NOCHECK
- mpn_random2 (s1, size);
+ MPN_RANDOM (s1, size);
#ifdef ZERO
memset (rp, 0, size * sizeof *rp);
#else
- mpn_random2 (rp, size);
+ MPN_RANDOM (rp, size);
#endif
#if defined (PRINT) || defined (XPRINT)
printf ("xlimb=");
/*
-Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2009
+Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2009, 2011
Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdlib.h>
+#include <string.h>
#include <stdio.h>
#include "gmp.h"
#include "gmp-impl.h"
}
#endif
+static void print_posneg (mp_limb_t);
static void mpn_print (mp_ptr, mp_size_t);
+#define LXW ((int) (2 * sizeof (mp_limb_t)))
#define M * 1000000
#ifndef CLOCK
if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0
|| dx[0] != 0x87654321 || dx[size+1] != 0x12345678)
{
+ mp_size_t s, e;
+ for (s = 0;; s++)
+ if ((unsigned long long) (dx+1)[s] != (unsigned long long) (dy+1)[s])
+ break;
+ for (e = size - 1;; e--)
+ if ((unsigned long long) (dx+1)[e] != (unsigned long long) (dy+1)[e])
+ break;
#ifndef PRINT
- mpn_print (&cyx, 1);
- mpn_print (dx+1, size);
- mpn_print (&cyy, 1);
- mpn_print (dy+1, size);
+ for (i = s; i <= e; i++)
+ {
+ printf ("%6d: ", i);
+ printf ("%0*llX ", LXW, (unsigned long long) (dx+1)[i]);
+ printf ("%0*llX ", LXW, (unsigned long long) (dy+1)[i]);
+ print_posneg ((dy+1)[i] - (dx+1)[i]);
+ printf ("\n");
+ }
+ printf ("%6s: ", "retval");
+ printf ("%0*llX ", LXW, (unsigned long long) cyx);
+ printf ("%0*llX ", LXW, (unsigned long long) cyy);
+ print_posneg (cyx - cyy);
#endif
printf ("\n");
if (dy[0] != 0x87654321)
exit (0);
}
+static void
+print_posneg (mp_limb_t d)
+{
+ char buf[LXW + 2];
+ if (d == 0)
+ printf (" %*X", LXW, 0);
+ else if (-d < d)
+ {
+ sprintf (buf, "%llX", (unsigned long long) -d);
+ printf ("%*s-%s", LXW - (int) strlen (buf), "", buf);
+ }
+ else
+ {
+ sprintf (buf, "%llX", (unsigned long long) d);
+ printf ("%*s+%s", LXW - (int) strlen (buf), "", buf);
+ }
+}
+
static void
mpn_print (mp_ptr p, mp_size_t size)
{
#ifdef _LONG_LONG_LIMB
printf ("%0*lX%0*lX", (int) (sizeof(mp_limb_t)),
(unsigned long) (p[i] >> (GMP_LIMB_BITS/2)),
- (int) (sizeof(mp_limb_t)), (unsigned long) (p[i]));
+ (int) (sizeof(mp_limb_t)), (unsigned long) (p[i]));
#else
printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]);
#endif
/*
-Copyright 1999, 2000, 2001, 2004, 2009 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2004, 2009, 2011 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdlib.h>
+#include <string.h>
#include <stdio.h>
#include "gmp.h"
#include "gmp-impl.h"
}
#endif
+static void print_posneg (mp_limb_t);
static void mpn_print (mp_ptr, mp_size_t);
+#define LXW ((int) (2 * sizeof (mp_limb_t)))
#define M * 1000000
#ifndef CLOCK
#endif
#ifndef OPS
-#define OPS (CLOCK/2)
+#define OPS (CLOCK/5)
#endif
#ifndef SIZE
#define SIZE 496
if (mpn_cmp (dx, dy, size+2) != 0
|| dx[0] != 0x87654321 || dx[size+1] != 0x12345678)
{
+ mp_size_t s, e;
+ for (s = 0;; s++)
+ if ((unsigned long long) (dx+1)[s] != (unsigned long long) (dy+1)[s])
+ break;
+ for (e = size - 1;; e--)
+ if ((unsigned long long) (dx+1)[e] != (unsigned long long) (dy+1)[e])
+ break;
#ifndef PRINT
- mpn_print (dx+1, size);
- mpn_print (dy+1, size);
+ for (i = s; i <= e; i++)
+ {
+ printf ("%6d: ", i);
+ printf ("%0*llX ", LXW, (unsigned long long) (dx+1)[i]);
+ printf ("%0*llX ", LXW, (unsigned long long) (dy+1)[i]);
+ print_posneg ((dy+1)[i] - (dx+1)[i]);
+ printf ("\n");
+ }
#endif
printf ("\n");
if (dy[0] != 0x87654321)
exit (0);
}
+static void
+print_posneg (mp_limb_t d)
+{
+ char buf[LXW + 2];
+ if (d == 0)
+ printf (" %*X", LXW, 0);
+ else if (-d < d)
+ {
+ sprintf (buf, "%llX", (unsigned long long) -d);
+ printf ("%*s-%s", LXW - (int) strlen (buf), "", buf);
+ }
+ else
+ {
+ sprintf (buf, "%llX", (unsigned long long) d);
+ printf ("%*s+%s", LXW - (int) strlen (buf), "", buf);
+ }
+}
+
static void
mpn_print (mp_ptr p, mp_size_t size)
{
#ifdef _LONG_LONG_LIMB
printf ("%0*lX%0*lX", (int) (sizeof(mp_limb_t)),
(unsigned long) (p[i] >> (GMP_LIMB_BITS/2)),
- (int) (sizeof(mp_limb_t)), (unsigned long) (p[i]));
+ (int) (sizeof(mp_limb_t)), (unsigned long) (p[i]));
#else
printf ("%0*lX", (int) (2 * sizeof(mp_limb_t)), p[i]);
#endif
/*
Copyright 1996, 1998, 2000, 2001, 2007 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 1996, 1997, 1998, 2000, 2001, 2007, 2009 Free Software Foundation,
Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2009 Free
Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdlib.h>
#include <stdio.h>
/*
-Copyright 1996, 1998, 1999, 2000, 2001, 2004, 2007, 2009 Free Software
+Copyright 1996, 1998, 1999, 2000, 2001, 2004, 2007, 2009, 2011 Free Software
Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdlib.h>
+#include <string.h>
#include <stdio.h>
#include "gmp.h"
#include "gmp-impl.h"
}
#endif
+static void print_posneg (mp_limb_t);
static void mpn_print (mp_ptr, mp_size_t);
+#define LXW ((int) (2 * sizeof (mp_limb_t)))
#define M * 1000000
#ifndef CLOCK
if (cyx != cyy || mpn_cmp (dx, dy, size+2) != 0
|| dx[0] != 0x87654321 || dx[size+1] != 0x12345678)
{
+ mp_size_t s, e;
+ for (s = 0;; s++)
+ if ((unsigned long long) (dx+1)[s] != (unsigned long long) (dy+1)[s])
+ break;
+ for (e = size - 1;; e--)
+ if ((unsigned long long) (dx+1)[e] != (unsigned long long) (dy+1)[e])
+ break;
#ifndef PRINT
- mpn_print (&cyx, 1);
- mpn_print (dx+1, size);
- mpn_print (&cyy, 1);
- mpn_print (dy+1, size);
+ printf ("cnt=%-*d\n", (int) (2 * sizeof(mp_limb_t)) - 4, cnt);
+ for (i = s; i <= e; i++)
+ {
+ printf ("%6d: ", i);
+ printf ("%0*llX ", LXW, (unsigned long long) (dx+1)[i]);
+ printf ("%0*llX ", LXW, (unsigned long long) (dy+1)[i]);
+ print_posneg ((dy+1)[i] - (dx+1)[i]);
+ printf ("\n");
+ }
+ printf ("%6s: ", "retval");
+ printf ("%0*llX ", LXW, (unsigned long long) cyx);
+ printf ("%0*llX ", LXW, (unsigned long long) cyy);
+ print_posneg (cyx - cyy);
#endif
printf ("\n");
if (dy[0] != 0x87654321)
exit (0);
}
+static void
+print_posneg (mp_limb_t d)
+{
+ char buf[LXW + 2];
+ if (d == 0)
+ printf (" %*X", LXW, 0);
+ else if (-d < d)
+ {
+ sprintf (buf, "%llX", (unsigned long long) -d);
+ printf ("%*s-%s", LXW - (int) strlen (buf), "", buf);
+ }
+ else
+ {
+ sprintf (buf, "%llX", (unsigned long long) d);
+ printf ("%*s+%s", LXW - (int) strlen (buf), "", buf);
+ }
+}
+
static void
mpn_print (mp_ptr p, mp_size_t size)
{
THIS IS A TEST PROGRAM USED ONLY FOR DEVELOPMENT. IT'S ALMOST CERTAIN TO
BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE VERSIONS OF GMP.
-Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2009 Free Software
-Foundation, Inc.
+Copyright 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2009, 2011, 2012
+Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
/* Usage: try [options] <function>...
int trap_location = TRAP_NOWHERE;
-#define NUM_SOURCES 2
+#define NUM_SOURCES 5
#define NUM_DESTS 2
struct source_t {
#define SRC_SIZE(n) ((n) == 1 && tr->size2 ? size2 : size)
-void validate_fail __GMP_PROTO ((void));
+void validate_fail (void);
#if HAVE_TRY_NEW_C
#endif
-typedef mp_limb_t (*tryfun_t) __GMP_PROTO ((ANYARGS));
+typedef mp_limb_t (*tryfun_t) (ANYARGS);
struct try_t {
char retval;
- char src[2];
- char dst[2];
+ char src[NUM_SOURCES];
+ char dst[NUM_DESTS];
#define SIZE_YES 1
#define SIZE_ALLOW_ZERO 2
#define SIZE_1 3 /* 1 limb */
#define SIZE_2 4 /* 2 limbs */
#define SIZE_3 5 /* 3 limbs */
-#define SIZE_FRACTION 6 /* size2 is fraction for divrem etc */
-#define SIZE_SIZE2 7
-#define SIZE_PLUS_1 8
-#define SIZE_SUM 9
-#define SIZE_DIFF 10
-#define SIZE_DIFF_PLUS_1 11
-#define SIZE_RETVAL 12
-#define SIZE_CEIL_HALF 13
-#define SIZE_GET_STR 14
-#define SIZE_PLUS_MSIZE_SUB_1 15 /* size+msize-1 */
+#define SIZE_4 6 /* 4 limbs */
+#define SIZE_6 7 /* 6 limbs */
+#define SIZE_FRACTION 8 /* size2 is fraction for divrem etc */
+#define SIZE_SIZE2 9
+#define SIZE_PLUS_1 10
+#define SIZE_SUM 11
+#define SIZE_DIFF 12
+#define SIZE_DIFF_PLUS_1 13
+#define SIZE_DIFF_PLUS_3 14
+#define SIZE_RETVAL 15
+#define SIZE_CEIL_HALF 16
+#define SIZE_GET_STR 17
+#define SIZE_PLUS_MSIZE_SUB_1 18 /* size+msize-1 */
+#define SIZE_ODD 19
char size;
char size2;
- char dst_size[2];
+ char dst_size[NUM_DESTS];
/* multiplier_N size in limbs */
mp_size_t msize;
- char dst_bytes[2];
+ char dst_bytes[NUM_DESTS];
char dst0_from_src1;
#define DATA_SRC0_ODD 3
#define DATA_SRC0_HIGHBIT 4
#define DATA_SRC1_ODD 5
-#define DATA_SRC1_HIGHBIT 6
-#define DATA_MULTIPLE_DIVISOR 7
-#define DATA_UDIV_QRNND 8
+#define DATA_SRC1_ODD_PRIME 6
+#define DATA_SRC1_HIGHBIT 7
+#define DATA_MULTIPLE_DIVISOR 8
+#define DATA_UDIV_QRNND 9
char data;
/* Default is allow full overlap. */
#define OVERLAP_HIGH_TO_LOW 3
#define OVERLAP_NOT_SRCS 4
#define OVERLAP_NOT_SRC2 8
+#define OVERLAP_NOT_DST2 16
char overlap;
tryfun_t reference;
const char *reference_name;
- void (*validate) __GMP_PROTO ((void));
+ void (*validate) (void);
const char *validate_name;
};
validate_fail ();
}
+void
+validate_bdiv_q_1
+ (void)
+{
+ mp_srcptr src = s[0].p;
+ mp_srcptr dst = fun.d[0].p;
+ int error = 0;
+
+ ASSERT (size >= 1);
+
+ {
+ mp_ptr tp = refmpn_malloc_limbs (size + 1);
+
+ refmpn_mul_1 (tp, dst, size, divisor);
+ /* Set ignored low bits */
+ tp[0] |= (src[0] & LOW_ZEROS_MASK (divisor));
+ if (! refmpn_equal_anynail (tp, src, size))
+ {
+ printf ("Bdiv wrong: res * divisor != src (mod B^size)\n");
+ mpn_trace ("res ", dst, size);
+ mpn_trace ("src ", src, size);
+ error = 1;
+ }
+ free (tp);
+ }
+
+ if (error)
+ validate_fail ();
+}
+
void
validate_modexact_1c_odd (void)
as they're all distinct and within the size of param[]. Renumber
whenever necessary or desired. */
-#define TYPE_ADD 1
-#define TYPE_ADD_N 2
-#define TYPE_ADD_NC 3
-#define TYPE_SUB 4
-#define TYPE_SUB_N 5
-#define TYPE_SUB_NC 6
-
-#define TYPE_MUL_1 7
-#define TYPE_MUL_1C 8
-
-#define TYPE_MUL_2 9
-#define TYPE_MUL_3 92
-#define TYPE_MUL_4 93
-
-#define TYPE_ADDMUL_1 10
-#define TYPE_ADDMUL_1C 11
-#define TYPE_SUBMUL_1 12
-#define TYPE_SUBMUL_1C 13
-
-#define TYPE_ADDMUL_2 14
-#define TYPE_ADDMUL_3 15
-#define TYPE_ADDMUL_4 16
-#define TYPE_ADDMUL_5 17
-#define TYPE_ADDMUL_6 18
-#define TYPE_ADDMUL_7 19
-#define TYPE_ADDMUL_8 20
-
-#define TYPE_ADDSUB_N 21
-#define TYPE_ADDSUB_NC 22
-
-#define TYPE_RSHIFT 23
-#define TYPE_LSHIFT 24
-#define TYPE_LSHIFTC 25
-
-#define TYPE_COPY 26
-#define TYPE_COPYI 27
-#define TYPE_COPYD 28
-#define TYPE_COM 29
-
-#define TYPE_ADDLSH1_N 30
-#define TYPE_ADDLSH2_N 48
-#define TYPE_ADDLSH_N 49
-#define TYPE_SUBLSH1_N 31
-#define TYPE_SUBLSH_N 130
-#define TYPE_RSBLSH1_N 34
-#define TYPE_RSBLSH2_N 46
-#define TYPE_RSBLSH_N 47
-#define TYPE_RSH1ADD_N 32
-#define TYPE_RSH1SUB_N 33
-
-#define TYPE_MOD_1 35
-#define TYPE_MOD_1C 36
-#define TYPE_DIVMOD_1 37
-#define TYPE_DIVMOD_1C 38
-#define TYPE_DIVREM_1 39
-#define TYPE_DIVREM_1C 40
-#define TYPE_PREINV_DIVREM_1 41
-#define TYPE_PREINV_MOD_1 42
-#define TYPE_MOD_34LSUB1 43
-#define TYPE_UDIV_QRNND 44
-#define TYPE_UDIV_QRNND_R 45
-
-#define TYPE_DIVEXACT_1 50
-#define TYPE_DIVEXACT_BY3 51
-#define TYPE_DIVEXACT_BY3C 52
-#define TYPE_MODEXACT_1_ODD 53
-#define TYPE_MODEXACT_1C_ODD 54
-
-#define TYPE_INVERT 55
-#define TYPE_BINVERT 56
-
-#define TYPE_GCD 60
-#define TYPE_GCD_1 61
-#define TYPE_GCD_FINDA 62
-#define TYPE_MPZ_JACOBI 63
-#define TYPE_MPZ_KRONECKER 64
-#define TYPE_MPZ_KRONECKER_UI 65
-#define TYPE_MPZ_KRONECKER_SI 66
-#define TYPE_MPZ_UI_KRONECKER 67
-#define TYPE_MPZ_SI_KRONECKER 68
-
-#define TYPE_AND_N 70
-#define TYPE_NAND_N 71
-#define TYPE_ANDN_N 72
-#define TYPE_IOR_N 73
-#define TYPE_IORN_N 74
-#define TYPE_NIOR_N 75
-#define TYPE_XOR_N 76
-#define TYPE_XNOR_N 77
-
-#define TYPE_MUL_MN 80
-#define TYPE_MUL_N 81
-#define TYPE_SQR 82
-#define TYPE_UMUL_PPMM 83
-#define TYPE_UMUL_PPMM_R 84
-#define TYPE_MULLO_N 85
-
-#define TYPE_SBPI1_DIV_QR 90
-#define TYPE_TDIV_QR 91
-
-#define TYPE_SQRTREM 100
-#define TYPE_ZERO 101
-#define TYPE_GET_STR 102
-#define TYPE_POPCOUNT 103
-#define TYPE_HAMDIST 104
-
-#define TYPE_EXTRA 110
-
-struct try_t param[150];
+enum {
+ TYPE_ADD = 1, TYPE_ADD_N, TYPE_ADD_NC, TYPE_SUB, TYPE_SUB_N, TYPE_SUB_NC,
+
+ TYPE_ADD_ERR1_N, TYPE_ADD_ERR2_N, TYPE_ADD_ERR3_N,
+ TYPE_SUB_ERR1_N, TYPE_SUB_ERR2_N, TYPE_SUB_ERR3_N,
+
+ TYPE_MUL_1, TYPE_MUL_1C,
+
+ TYPE_MUL_2, TYPE_MUL_3, TYPE_MUL_4, TYPE_MUL_5, TYPE_MUL_6,
+
+ TYPE_ADDMUL_1, TYPE_ADDMUL_1C, TYPE_SUBMUL_1, TYPE_SUBMUL_1C,
+
+ TYPE_ADDMUL_2, TYPE_ADDMUL_3, TYPE_ADDMUL_4, TYPE_ADDMUL_5, TYPE_ADDMUL_6,
+ TYPE_ADDMUL_7, TYPE_ADDMUL_8,
+
+ TYPE_ADDSUB_N, TYPE_ADDSUB_NC,
+
+ TYPE_RSHIFT, TYPE_LSHIFT, TYPE_LSHIFTC,
+
+ TYPE_COPY, TYPE_COPYI, TYPE_COPYD, TYPE_COM,
+
+ TYPE_ADDLSH1_N, TYPE_ADDLSH2_N, TYPE_ADDLSH_N,
+ TYPE_ADDLSH1_N_IP1, TYPE_ADDLSH2_N_IP1, TYPE_ADDLSH_N_IP1,
+ TYPE_ADDLSH1_N_IP2, TYPE_ADDLSH2_N_IP2, TYPE_ADDLSH_N_IP2,
+ TYPE_SUBLSH1_N, TYPE_SUBLSH2_N, TYPE_SUBLSH_N,
+ TYPE_SUBLSH1_N_IP1, TYPE_SUBLSH2_N_IP1, TYPE_SUBLSH_N_IP1,
+ TYPE_RSBLSH1_N, TYPE_RSBLSH2_N, TYPE_RSBLSH_N,
+ TYPE_RSH1ADD_N, TYPE_RSH1SUB_N,
+
+ TYPE_ADDLSH1_NC, TYPE_ADDLSH2_NC, TYPE_ADDLSH_NC,
+ TYPE_SUBLSH1_NC, TYPE_SUBLSH2_NC, TYPE_SUBLSH_NC,
+ TYPE_RSBLSH1_NC, TYPE_RSBLSH2_NC, TYPE_RSBLSH_NC,
+
+ TYPE_ADDCND_N, TYPE_SUBCND_N,
+
+ TYPE_MOD_1, TYPE_MOD_1C, TYPE_DIVMOD_1, TYPE_DIVMOD_1C, TYPE_DIVREM_1,
+ TYPE_DIVREM_1C, TYPE_PREINV_DIVREM_1, TYPE_DIVREM_2, TYPE_PREINV_MOD_1,
+ TYPE_MOD_34LSUB1, TYPE_UDIV_QRNND, TYPE_UDIV_QRNND_R,
+
+ TYPE_DIVEXACT_1, TYPE_BDIV_Q_1, TYPE_DIVEXACT_BY3, TYPE_DIVEXACT_BY3C,
+ TYPE_MODEXACT_1_ODD, TYPE_MODEXACT_1C_ODD,
+
+ TYPE_INVERT, TYPE_BINVERT,
+
+ TYPE_GCD, TYPE_GCD_1, TYPE_GCD_FINDA, TYPE_MPZ_JACOBI, TYPE_MPZ_KRONECKER,
+ TYPE_MPZ_KRONECKER_UI, TYPE_MPZ_KRONECKER_SI, TYPE_MPZ_UI_KRONECKER,
+ TYPE_MPZ_SI_KRONECKER, TYPE_MPZ_LEGENDRE,
+
+ TYPE_AND_N, TYPE_NAND_N, TYPE_ANDN_N, TYPE_IOR_N, TYPE_IORN_N, TYPE_NIOR_N,
+ TYPE_XOR_N, TYPE_XNOR_N,
+
+ TYPE_MUL_MN, TYPE_MUL_N, TYPE_SQR, TYPE_UMUL_PPMM, TYPE_UMUL_PPMM_R,
+ TYPE_MULLO_N, TYPE_MULMID_MN, TYPE_MULMID_N,
+
+ TYPE_SBPI1_DIV_QR, TYPE_TDIV_QR,
+
+ TYPE_SQRTREM, TYPE_ZERO, TYPE_GET_STR, TYPE_POPCOUNT, TYPE_HAMDIST,
+
+ TYPE_EXTRA
+};
+
+struct try_t param[TYPE_EXTRA];
void
REFERENCE (refmpn_sub);
+ p = ¶m[TYPE_ADD_ERR1_N];
+ p->retval = 1;
+ p->dst[0] = 1;
+ p->dst[1] = 1;
+ p->src[0] = 1;
+ p->src[1] = 1;
+ p->src[2] = 1;
+ p->dst_size[1] = SIZE_2;
+ p->carry = CARRY_BIT;
+ p->overlap = OVERLAP_NOT_DST2;
+ REFERENCE (refmpn_add_err1_n);
+
+ p = ¶m[TYPE_SUB_ERR1_N];
+ COPY (TYPE_ADD_ERR1_N);
+ REFERENCE (refmpn_sub_err1_n);
+
+ p = ¶m[TYPE_ADD_ERR2_N];
+ COPY (TYPE_ADD_ERR1_N);
+ p->src[3] = 1;
+ p->dst_size[1] = SIZE_4;
+ REFERENCE (refmpn_add_err2_n);
+
+ p = ¶m[TYPE_SUB_ERR2_N];
+ COPY (TYPE_ADD_ERR2_N);
+ REFERENCE (refmpn_sub_err2_n);
+
+ p = ¶m[TYPE_ADD_ERR3_N];
+ COPY (TYPE_ADD_ERR2_N);
+ p->src[4] = 1;
+ p->dst_size[1] = SIZE_6;
+ REFERENCE (refmpn_add_err3_n);
+
+ p = ¶m[TYPE_SUB_ERR3_N];
+ COPY (TYPE_ADD_ERR3_N);
+ REFERENCE (refmpn_sub_err3_n);
+
+ p = ¶m[TYPE_ADDCND_N];
+ COPY (TYPE_ADD_N);
+ p->carry = CARRY_BIT;
+ REFERENCE (refmpn_addcnd_n);
+
+ p = ¶m[TYPE_SUBCND_N];
+ COPY (TYPE_ADD_N);
+ p->carry = CARRY_BIT;
+ REFERENCE (refmpn_subcnd_n);
+
+
p = ¶m[TYPE_MUL_1];
p->retval = 1;
p->dst[0] = 1;
p->msize = 4;
REFERENCE (refmpn_mul_4);
+ p = ¶m[TYPE_MUL_5];
+ COPY (TYPE_MUL_2);
+ p->msize = 5;
+ REFERENCE (refmpn_mul_5);
+
+ p = ¶m[TYPE_MUL_6];
+ COPY (TYPE_MUL_2);
+ p->msize = 6;
+ REFERENCE (refmpn_mul_6);
+
p = ¶m[TYPE_ADDMUL_1];
p->retval = 1;
p->src[1] = 1;
p->msize = 2;
p->dst0_from_src1 = 1;
- p->overlap = OVERLAP_NOT_SRC2;
+ p->overlap = OVERLAP_NONE;
REFERENCE (refmpn_addmul_2);
p = ¶m[TYPE_ADDMUL_3];
p->shift = 1;
REFERENCE (refmpn_addlsh_n);
+ p = ¶m[TYPE_ADDLSH1_N_IP1];
+ p->retval = 1;
+ p->dst[0] = 1;
+ p->src[0] = 1;
+ p->dst0_from_src1 = 1;
+ REFERENCE (refmpn_addlsh1_n_ip1);
+
+ p = ¶m[TYPE_ADDLSH2_N_IP1];
+ COPY (TYPE_ADDLSH1_N_IP1);
+ REFERENCE (refmpn_addlsh2_n_ip1);
+
+ p = ¶m[TYPE_ADDLSH_N_IP1];
+ COPY (TYPE_ADDLSH1_N_IP1);
+ p->shift = 1;
+ REFERENCE (refmpn_addlsh_n_ip1);
+
+ p = ¶m[TYPE_ADDLSH1_N_IP2];
+ COPY (TYPE_ADDLSH1_N_IP1);
+ REFERENCE (refmpn_addlsh1_n_ip2);
+
+ p = ¶m[TYPE_ADDLSH2_N_IP2];
+ COPY (TYPE_ADDLSH1_N_IP1);
+ REFERENCE (refmpn_addlsh2_n_ip2);
+
+ p = ¶m[TYPE_ADDLSH_N_IP2];
+ COPY (TYPE_ADDLSH_N_IP1);
+ REFERENCE (refmpn_addlsh_n_ip2);
+
p = ¶m[TYPE_SUBLSH1_N];
COPY (TYPE_ADD_N);
REFERENCE (refmpn_sublsh1_n);
+ p = ¶m[TYPE_SUBLSH2_N];
+ COPY (TYPE_ADD_N);
+ REFERENCE (refmpn_sublsh2_n);
+
p = ¶m[TYPE_SUBLSH_N];
COPY (TYPE_ADDLSH_N);
REFERENCE (refmpn_sublsh_n);
+ p = ¶m[TYPE_SUBLSH1_N_IP1];
+ COPY (TYPE_ADDLSH1_N_IP1);
+ REFERENCE (refmpn_sublsh1_n_ip1);
+
+ p = ¶m[TYPE_SUBLSH2_N_IP1];
+ COPY (TYPE_ADDLSH1_N_IP1);
+ REFERENCE (refmpn_sublsh2_n_ip1);
+
+ p = ¶m[TYPE_SUBLSH_N_IP1];
+ COPY (TYPE_ADDLSH_N_IP1);
+ REFERENCE (refmpn_sublsh_n_ip1);
+
p = ¶m[TYPE_RSBLSH1_N];
COPY (TYPE_ADD_N);
REFERENCE (refmpn_rsblsh1_n);
REFERENCE (refmpn_rsh1sub_n);
+ p = ¶m[TYPE_ADDLSH1_NC];
+ COPY (TYPE_ADDLSH1_N);
+ p->carry = CARRY_3;
+ REFERENCE (refmpn_addlsh1_nc);
+
+ p = ¶m[TYPE_ADDLSH2_NC];
+ COPY (TYPE_ADDLSH2_N);
+ p->carry = CARRY_4; /* FIXME */
+ REFERENCE (refmpn_addlsh2_nc);
+
+ p = ¶m[TYPE_ADDLSH_NC];
+ COPY (TYPE_ADDLSH_N);
+ p->carry = CARRY_BIT; /* FIXME */
+ REFERENCE (refmpn_addlsh_nc);
+
+ p = ¶m[TYPE_SUBLSH1_NC];
+ COPY (TYPE_ADDLSH1_NC);
+ REFERENCE (refmpn_sublsh1_nc);
+
+ p = ¶m[TYPE_SUBLSH2_NC];
+ COPY (TYPE_ADDLSH2_NC);
+ REFERENCE (refmpn_sublsh2_nc);
+
+ p = ¶m[TYPE_SUBLSH_NC];
+ COPY (TYPE_ADDLSH_NC);
+ REFERENCE (refmpn_sublsh_nc);
+
+ p = ¶m[TYPE_RSBLSH1_NC];
+ COPY (TYPE_RSBLSH1_N);
+ p->carry = CARRY_BIT; /* FIXME */
+ REFERENCE (refmpn_rsblsh1_nc);
+
+ p = ¶m[TYPE_RSBLSH2_NC];
+ COPY (TYPE_RSBLSH2_N);
+ p->carry = CARRY_4; /* FIXME */
+ REFERENCE (refmpn_rsblsh2_nc);
+
+ p = ¶m[TYPE_RSBLSH_NC];
+ COPY (TYPE_RSBLSH_N);
+ p->carry = CARRY_BIT; /* FIXME */
+ REFERENCE (refmpn_rsblsh_nc);
+
+
p = ¶m[TYPE_MOD_1];
p->retval = 1;
p->src[0] = 1;
VALIDATE (validate_divexact_1);
REFERENCE (refmpn_divmod_1);
+ p = ¶m[TYPE_BDIV_Q_1];
+ p->dst[0] = 1;
+ p->src[0] = 1;
+ p->divisor = DIVISOR_LIMB;
+ VALIDATE (validate_bdiv_q_1);
p = ¶m[TYPE_DIVEXACT_BY3];
p->retval = 1;
REFERENCE (refmpn_gcd);
+ p = ¶m[TYPE_MPZ_LEGENDRE];
+ p->retval = 1;
+ p->src[0] = 1;
+ p->size = SIZE_ALLOW_ZERO;
+ p->src[1] = 1;
+ p->data = DATA_SRC1_ODD_PRIME;
+ p->size2 = 1;
+ p->carry = CARRY_BIT;
+ p->carry_sign = 1;
+ REFERENCE (refmpz_legendre);
+
p = ¶m[TYPE_MPZ_JACOBI];
p->retval = 1;
p->src[0] = 1;
p->src[1] = 1;
p->data = DATA_SRC1_ODD;
p->size2 = 1;
- p->carry = CARRY_4;
+ p->carry = CARRY_BIT;
p->carry_sign = 1;
REFERENCE (refmpz_jacobi);
p = ¶m[TYPE_MPZ_KRONECKER];
- COPY (TYPE_MPZ_JACOBI);
- p->data = 0; /* clear inherited DATA_SRC1_ODD */
+ p->retval = 1;
+ p->src[0] = 1;
+ p->size = SIZE_ALLOW_ZERO;
+ p->src[1] = 1;
+ p->data = 0;
+ p->size2 = 1;
+ p->carry = CARRY_4;
+ p->carry_sign = 1;
REFERENCE (refmpz_kronecker);
p->size2 = 1;
REFERENCE (refmpn_mul_basecase);
+ p = ¶m[TYPE_MULMID_MN];
+ COPY (TYPE_MUL_MN);
+ p->dst_size[0] = SIZE_DIFF_PLUS_3;
+ REFERENCE (refmpn_mulmid_basecase);
+
+ p = ¶m[TYPE_MULMID_N];
+ COPY (TYPE_MUL_N);
+ p->size = SIZE_ODD;
+ p->size2 = SIZE_CEIL_HALF;
+ p->dst_size[0] = SIZE_DIFF_PLUS_3;
+ REFERENCE (refmpn_mulmid_n);
+
p = ¶m[TYPE_UMUL_PPMM];
p->retval = 1;
p->src[0] = 1;
return mpn_divexact_by3 (rp, sp, size);
}
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+mp_limb_t
+mpn_addlsh1_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+ return mpn_addlsh1_n_ip1 (rp, sp, size);
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n_ip1
+mp_limb_t
+mpn_addlsh2_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+ return mpn_addlsh2_n_ip1 (rp, sp, size);
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n_ip1
+mp_limb_t
+mpn_addlsh_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned int sh)
+{
+ return mpn_addlsh_n_ip1 (rp, sp, size, sh);
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh1_n_ip2
+mp_limb_t
+mpn_addlsh1_n_ip2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+ return mpn_addlsh1_n_ip2 (rp, sp, size);
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n_ip2
+mp_limb_t
+mpn_addlsh2_n_ip2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+ return mpn_addlsh2_n_ip2 (rp, sp, size);
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n_ip2
+mp_limb_t
+mpn_addlsh_n_ip2_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned int sh)
+{
+ return mpn_addlsh_n_ip2 (rp, sp, size, sh);
+}
+#endif
+#if HAVE_NATIVE_mpn_sublsh1_n_ip1
+mp_limb_t
+mpn_sublsh1_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+ return mpn_sublsh1_n_ip1 (rp, sp, size);
+}
+#endif
+#if HAVE_NATIVE_mpn_sublsh2_n_ip1
+mp_limb_t
+mpn_sublsh2_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size)
+{
+ return mpn_sublsh2_n_ip1 (rp, sp, size);
+}
+#endif
+#if HAVE_NATIVE_mpn_sublsh_n_ip1
+mp_limb_t
+mpn_sublsh_n_ip1_fun (mp_ptr rp, mp_srcptr sp, mp_size_t size, unsigned int sh)
+{
+ return mpn_sublsh_n_ip1 (rp, sp, size, sh);
+}
+#endif
+
mp_limb_t
mpn_modexact_1_odd_fun (mp_srcptr ptr, mp_size_t size, mp_limb_t divisor)
{
TMP_FREE;
}
+void
+mpn_toom42_mulmid_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2,
+ mp_size_t size)
+{
+ mp_ptr tspace;
+ mp_size_t n;
+ TMP_DECL;
+ TMP_MARK;
+ tspace = TMP_ALLOC_LIMBS (mpn_toom42_mulmid_itch (size));
+ mpn_toom42_mulmid (dst, src1, src2, size, tspace);
+ TMP_FREE;
+}
+
mp_limb_t
umul_ppmm_fun (mp_limb_t *lowptr, mp_limb_t m1, mp_limb_t m2)
{
{ TRY(mpn_add_n_sub_nc), TYPE_ADDSUB_NC },
#endif
+ { TRY(mpn_add_err1_n), TYPE_ADD_ERR1_N },
+ { TRY(mpn_sub_err1_n), TYPE_SUB_ERR1_N },
+ { TRY(mpn_add_err2_n), TYPE_ADD_ERR2_N },
+ { TRY(mpn_sub_err2_n), TYPE_SUB_ERR2_N },
+ { TRY(mpn_add_err3_n), TYPE_ADD_ERR3_N },
+ { TRY(mpn_sub_err3_n), TYPE_SUB_ERR3_N },
+
{ TRY(mpn_addmul_1), TYPE_ADDMUL_1 },
{ TRY(mpn_submul_1), TYPE_SUBMUL_1 },
#if HAVE_NATIVE_mpn_addmul_1c
{ TRY(mpn_copyd), TYPE_COPYD },
#endif
+ { TRY(mpn_addcnd_n), TYPE_ADDCND_N },
+ { TRY(mpn_subcnd_n), TYPE_SUBCND_N },
#if HAVE_NATIVE_mpn_addlsh1_n
{ TRY(mpn_addlsh1_n), TYPE_ADDLSH1_N },
#endif
#if HAVE_NATIVE_mpn_addlsh_n
{ TRY(mpn_addlsh_n), TYPE_ADDLSH_N },
#endif
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+ { TRY_FUNFUN(mpn_addlsh1_n_ip1), TYPE_ADDLSH1_N_IP1 },
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n_ip1
+ { TRY_FUNFUN(mpn_addlsh2_n_ip1), TYPE_ADDLSH2_N_IP1 },
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n_ip1
+ { TRY_FUNFUN(mpn_addlsh_n_ip1), TYPE_ADDLSH_N_IP1 },
+#endif
+#if HAVE_NATIVE_mpn_addlsh1_n_ip2
+ { TRY_FUNFUN(mpn_addlsh1_n_ip2), TYPE_ADDLSH1_N_IP2 },
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n_ip2
+ { TRY_FUNFUN(mpn_addlsh2_n_ip2), TYPE_ADDLSH2_N_IP2 },
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n_ip2
+ { TRY_FUNFUN(mpn_addlsh_n_ip2), TYPE_ADDLSH_N_IP2 },
+#endif
#if HAVE_NATIVE_mpn_sublsh1_n
{ TRY(mpn_sublsh1_n), TYPE_SUBLSH1_N },
#endif
+#if HAVE_NATIVE_mpn_sublsh2_n
+ { TRY(mpn_sublsh2_n), TYPE_SUBLSH2_N },
+#endif
#if HAVE_NATIVE_mpn_sublsh_n
{ TRY(mpn_sublsh_n), TYPE_SUBLSH_N },
#endif
+#if HAVE_NATIVE_mpn_sublsh1_n_ip1
+ { TRY_FUNFUN(mpn_sublsh1_n_ip1), TYPE_SUBLSH1_N_IP1 },
+#endif
+#if HAVE_NATIVE_mpn_sublsh2_n_ip1
+ { TRY_FUNFUN(mpn_sublsh2_n_ip1), TYPE_SUBLSH2_N_IP1 },
+#endif
+#if HAVE_NATIVE_mpn_sublsh_n_ip1
+ { TRY_FUNFUN(mpn_sublsh_n_ip1), TYPE_SUBLSH_N_IP1 },
+#endif
#if HAVE_NATIVE_mpn_rsblsh1_n
{ TRY(mpn_rsblsh1_n), TYPE_RSBLSH1_N },
#endif
{ TRY(mpn_rsh1sub_n), TYPE_RSH1SUB_N },
#endif
+#if HAVE_NATIVE_mpn_addlsh1_nc
+ { TRY(mpn_addlsh1_nc), TYPE_ADDLSH1_NC },
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_nc
+ { TRY(mpn_addlsh2_nc), TYPE_ADDLSH2_NC },
+#endif
+#if HAVE_NATIVE_mpn_addlsh_nc
+ { TRY(mpn_addlsh_nc), TYPE_ADDLSH_NC },
+#endif
+#if HAVE_NATIVE_mpn_sublsh1_nc
+ { TRY(mpn_sublsh1_nc), TYPE_SUBLSH1_NC },
+#endif
+#if HAVE_NATIVE_mpn_sublsh2_nc
+ { TRY(mpn_sublsh2_nc), TYPE_SUBLSH2_NC },
+#endif
+#if HAVE_NATIVE_mpn_sublsh_nc
+ { TRY(mpn_sublsh_nc), TYPE_SUBLSH_NC },
+#endif
+#if HAVE_NATIVE_mpn_rsblsh1_nc
+ { TRY(mpn_rsblsh1_nc), TYPE_RSBLSH1_NC },
+#endif
+#if HAVE_NATIVE_mpn_rsblsh2_nc
+ { TRY(mpn_rsblsh2_nc), TYPE_RSBLSH2_NC },
+#endif
+#if HAVE_NATIVE_mpn_rsblsh_nc
+ { TRY(mpn_rsblsh_nc), TYPE_RSBLSH_NC },
+#endif
+
{ TRY_FUNFUN(mpn_and_n), TYPE_AND_N },
{ TRY_FUNFUN(mpn_andn_n), TYPE_ANDN_N },
{ TRY_FUNFUN(mpn_nand_n), TYPE_NAND_N },
#endif
{ TRY(mpn_divexact_1), TYPE_DIVEXACT_1 },
+ { TRY(mpn_bdiv_q_1), TYPE_BDIV_Q_1 },
{ TRY_FUNFUN(mpn_divexact_by3), TYPE_DIVEXACT_BY3 },
{ TRY(mpn_divexact_by3c), TYPE_DIVEXACT_BY3C },
#if HAVE_NATIVE_mpn_mul_4
{ TRY(mpn_mul_4), TYPE_MUL_4, 4 },
#endif
+#if HAVE_NATIVE_mpn_mul_5
+ { TRY(mpn_mul_5), TYPE_MUL_5, 5 },
+#endif
+#if HAVE_NATIVE_mpn_mul_6
+ { TRY(mpn_mul_6), TYPE_MUL_6, 6 },
+#endif
{ TRY(mpn_rshift), TYPE_RSHIFT },
{ TRY(mpn_lshift), TYPE_LSHIFT },
{ TRY(mpn_mul_basecase), TYPE_MUL_MN },
+ { TRY(mpn_mulmid_basecase), TYPE_MULMID_MN },
{ TRY(mpn_mullo_basecase), TYPE_MULLO_N },
#if SQR_TOOM2_THRESHOLD > 0
{ TRY(mpn_sqr_basecase), TYPE_SQR },
{ TRY_FUNFUN(mpn_toom44_mul), TYPE_MUL_N, MPN_TOOM44_MUL_MINSIZE },
{ TRY_FUNFUN(mpn_toom4_sqr), TYPE_SQR, MPN_TOOM4_SQR_MINSIZE },
+ { TRY(mpn_mulmid_n), TYPE_MULMID_N, 1 },
+ { TRY(mpn_mulmid), TYPE_MULMID_MN, 1 },
+ { TRY_FUNFUN(mpn_toom42_mulmid), TYPE_MULMID_N,
+ (2 * MPN_TOOM42_MULMID_MINSIZE - 1) },
+
{ TRY(mpn_gcd_1), TYPE_GCD_1 },
{ TRY(mpn_gcd), TYPE_GCD },
+ { TRY(mpz_legendre), TYPE_MPZ_LEGENDRE },
{ TRY(mpz_jacobi), TYPE_MPZ_JACOBI },
+ { TRY(mpz_kronecker), TYPE_MPZ_KRONECKER },
{ TRY(mpz_kronecker_ui), TYPE_MPZ_KRONECKER_UI },
{ TRY(mpz_kronecker_si), TYPE_MPZ_KRONECKER_SI },
{ TRY(mpz_ui_kronecker), TYPE_MPZ_UI_KRONECKER },
#if HAVE_MMAP && defined (MAP_ANON)
/* note must pass fd=-1 for MAP_ANON on BSD */
- p = mmap (NULL, nbytes, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
+ p = (mp_ptr) mmap (NULL, nbytes, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0);
if (p == (void *) -1)
{
fprintf (stderr, "Cannot mmap %#x anon bytes: %s\n",
ASSERT_ALWAYS (p != NULL);
#endif
- p = align_pointer (p, pagesize);
+ p = (mp_ptr) align_pointer (p, pagesize);
mprotect_maybe (p, REDZONE_BYTES, PROT_NONE);
p += REDZONE_LIMBS;
struct overlap_t {
int s[NUM_SOURCES];
} overlap_array[] = {
- { { -1, -1 } },
- { { 0, -1 } },
- { { -1, 0 } },
- { { 0, 0 } },
- { { 1, -1 } },
- { { -1, 1 } },
- { { 1, 1 } },
- { { 0, 1 } },
- { { 1, 0 } },
+ { { -1, -1, -1, -1, -1 } },
+ { { 0, -1, -1, -1, -1 } },
+ { { -1, 0, -1, -1, -1 } },
+ { { 0, 0, -1, -1, -1 } },
+ { { 1, -1, -1, -1, -1 } },
+ { { -1, 1, -1, -1, -1 } },
+ { { 1, 1, -1, -1, -1 } },
+ { { 0, 1, -1, -1, -1 } },
+ { { 1, 0, -1, -1, -1 } },
};
struct overlap_t *overlap, *overlap_limit;
(tr->overlap & OVERLAP_NONE ? 1 \
: tr->overlap & OVERLAP_NOT_SRCS ? 3 \
: tr->overlap & OVERLAP_NOT_SRC2 ? 2 \
+ : tr->overlap & OVERLAP_NOT_DST2 ? 4 \
: tr->dst[1] ? 9 \
: tr->src[1] ? 4 \
: tr->dst[0] ? 2 \
case TYPE_ADDLSH1_N:
case TYPE_ADDLSH2_N:
case TYPE_SUBLSH1_N:
+ case TYPE_SUBLSH2_N:
case TYPE_RSBLSH1_N:
case TYPE_RSBLSH2_N:
case TYPE_RSH1ADD_N:
e->retval = CALLING_CONVENTIONS (function)
(e->d[0].p, e->s[0].p, e->s[1].p, size, shift);
break;
+ case TYPE_ADDLSH_NC:
+ case TYPE_SUBLSH_NC:
+ case TYPE_RSBLSH_NC:
+ e->retval = CALLING_CONVENTIONS (function)
+ (e->d[0].p, e->s[0].p, e->s[1].p, size, shift, carry);
+ break;
+ case TYPE_ADDLSH1_NC:
+ case TYPE_ADDLSH2_NC:
+ case TYPE_SUBLSH1_NC:
+ case TYPE_SUBLSH2_NC:
+ case TYPE_RSBLSH1_NC:
+ case TYPE_RSBLSH2_NC:
case TYPE_ADD_NC:
case TYPE_SUB_NC:
+ case TYPE_ADDCND_N:
+ case TYPE_SUBCND_N:
e->retval = CALLING_CONVENTIONS (function)
(e->d[0].p, e->s[0].p, e->s[1].p, size, carry);
break;
+ case TYPE_ADD_ERR1_N:
+ case TYPE_SUB_ERR1_N:
+ e->retval = CALLING_CONVENTIONS (function)
+ (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p, size, carry);
+ break;
+ case TYPE_ADD_ERR2_N:
+ case TYPE_SUB_ERR2_N:
+ e->retval = CALLING_CONVENTIONS (function)
+ (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p, e->s[3].p, size, carry);
+ break;
+ case TYPE_ADD_ERR3_N:
+ case TYPE_SUB_ERR3_N:
+ e->retval = CALLING_CONVENTIONS (function)
+ (e->d[0].p, e->s[0].p, e->s[1].p, e->d[1].p, e->s[2].p, e->s[3].p, e->s[4].p, size, carry);
+ break;
case TYPE_MUL_1:
case TYPE_ADDMUL_1:
case TYPE_MUL_2:
case TYPE_MUL_3:
case TYPE_MUL_4:
+ case TYPE_MUL_5:
+ case TYPE_MUL_6:
if (size == 1)
abort ();
e->retval = CALLING_CONVENTIONS (function)
CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
break;
-
+ case TYPE_ADDLSH1_N_IP1:
+ case TYPE_ADDLSH2_N_IP1:
+ case TYPE_ADDLSH1_N_IP2:
+ case TYPE_ADDLSH2_N_IP2:
+ case TYPE_SUBLSH1_N_IP1:
+ case TYPE_SUBLSH2_N_IP1:
case TYPE_DIVEXACT_BY3:
e->retval = CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
break;
case TYPE_DIVMOD_1:
case TYPE_DIVEXACT_1:
+ case TYPE_BDIV_Q_1:
e->retval = CALLING_CONVENTIONS (function)
(e->d[0].p, e->s[0].p, size, divisor);
break;
}
break;
+ case TYPE_MPZ_LEGENDRE:
case TYPE_MPZ_JACOBI:
+ {
+ mpz_t a, b;
+ PTR(a) = e->s[0].p; SIZ(a) = (carry==0 ? size : -size);
+ PTR(b) = e->s[1].p; SIZ(b) = size2;
+ e->retval = CALLING_CONVENTIONS (function) (a, b);
+ }
+ break;
case TYPE_MPZ_KRONECKER:
{
mpz_t a, b;
break;
case TYPE_MUL_MN:
+ case TYPE_MULMID_MN:
CALLING_CONVENTIONS (function)
(e->d[0].p, e->s[0].p, size, e->s[1].p, size2);
break;
case TYPE_MULLO_N:
CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p, size);
break;
+ case TYPE_MULMID_N:
+ CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, e->s[1].p,
+ (size + 1) / 2);
+ break;
case TYPE_SQR:
CALLING_CONVENTIONS (function) (e->d[0].p, e->s[0].p, size);
break;
(e->s[0].p[0], e->s[0].p[1], e->d[0].p);
break;
+ case TYPE_ADDLSH_N_IP1:
+ case TYPE_ADDLSH_N_IP2:
+ case TYPE_SUBLSH_N_IP1:
case TYPE_LSHIFT:
case TYPE_LSHIFTC:
case TYPE_RSHIFT:
case SIZE_3:
d[i].size = 3;
break;
+ case SIZE_4:
+ d[i].size = 4;
+ break;
+ case SIZE_6:
+ d[i].size = 6;
+ break;
case SIZE_PLUS_1:
d[i].size = size+1;
d[i].size = size - size2 + 1;
break;
+ case SIZE_DIFF_PLUS_3:
+ d[i].size = size - size2 + 3;
+ break;
+
case SIZE_CEIL_HALF:
d[i].size = (size+1)/2;
break;
s[i].p[0] |= 1;
break;
+ case DATA_SRC1_ODD_PRIME:
+ if (i == 1)
+ {
+ if (refmpn_zero_p (s[i].p+1, SRC_SIZE(i)-1)
+ && s[i].p[0] <=3)
+ s[i].p[0] = 3;
+ else
+ {
+ mpz_t p;
+ mpz_init (p);
+ for (;;)
+ {
+ _mpz_realloc (p, SRC_SIZE(i));
+ MPN_COPY (PTR(p), s[i].p, SRC_SIZE(i));
+ SIZ(p) = SRC_SIZE(i);
+ MPN_NORMALIZE (PTR(p), SIZ(p));
+ mpz_nextprime (p, p);
+ if (mpz_size (p) <= SRC_SIZE(i))
+ break;
+
+ t_random (s[i].p, SRC_SIZE(i));
+ }
+ MPN_COPY (s[i].p, PTR(p), SIZ(p));
+ if (SIZ(p) < SRC_SIZE(i))
+ MPN_ZERO (s[i].p + SIZ(p), SRC_SIZE(i) - SIZ(p));
+ mpz_clear (p);
+ }
+ }
+ break;
+
case DATA_SRC1_HIGHBIT:
if (i == 1)
{
case DATA_SRC0_HIGHBIT:
if (i == 0)
- {
- s[i].p[size-1] |= GMP_NUMB_HIGHBIT;
- }
+ {
+ s[i].p[size-1] |= GMP_NUMB_HIGHBIT;
+ }
break;
case DATA_UDIV_QRNND:
#define SIZE_ITERATION \
for (size = MAX3 (option_firstsize, \
choice->minsize, \
- (tr->size == SIZE_ALLOW_ZERO) ? 0 : 1); \
+ (tr->size == SIZE_ALLOW_ZERO) ? 0 : 1), \
+ size += (tr->size == SIZE_ODD) && !(size & 1); \
size <= option_lastsize; \
- size++)
+ size += (tr->size == SIZE_ODD) ? 2 : 1)
#define SIZE2_FIRST \
(tr->size2 == SIZE_2 ? 2 \
: tr->size2 == SIZE_FRACTION ? option_firstsize2 \
+ : tr->size2 == SIZE_CEIL_HALF ? ((size + 1) / 2) \
: tr->size2 ? \
MAX (choice->minsize, (option_firstsize2 != 0 \
? option_firstsize2 : 1)) \
#define SIZE2_LAST \
(tr->size2 == SIZE_2 ? 2 \
: tr->size2 == SIZE_FRACTION ? FRACTION_COUNT-1 \
+ : tr->size2 == SIZE_CEIL_HALF ? ((size + 1) / 2) \
: tr->size2 ? size \
: 0)
/* Copyright 1996, 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
/* Memory allocation used during tests.
-Copyright 2001, 2002, 2007 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2007, 2013 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h> /* for abort */
#include <string.h> /* for memcpy, memcmp */
#include "gmp.h"
#include "gmp-impl.h"
+#include "tests.h"
#if GMP_LIMB_BITS == 64
#define PATTERN1 CNST_LIMB(0xcafebabedeadbeef)
#define PATTERN2 CNST_LIMB(0xdeadbeef)
#endif
+#if HAVE_INTPTR_T
+#define PTRLIMB(p) ((mp_limb_t) (intptr_t) p)
+#else
+#define PTRLIMB(p) ((mp_limb_t) (size_t) p)
+#endif
+
/* Each block allocated is a separate malloc, for the benefit of a redzoning
malloc debugger during development or when bug hunting.
ptr = (void *) ((gmp_intptr_t) rptr + sizeof (mp_limb_t));
*((mp_limb_t *) ((gmp_intptr_t) ptr - sizeof (mp_limb_t)))
- = PATTERN1 - ((mp_limb_t) ptr);
- PATTERN2_var = PATTERN2 - ((mp_limb_t) ptr);
+ = PATTERN1 - PTRLIMB (ptr);
+ PATTERN2_var = PATTERN2 - PTRLIMB (ptr);
memcpy ((void *) ((gmp_intptr_t) ptr + size), &PATTERN2_var, sizeof (mp_limb_t));
h->size = size;
}
if (*((mp_limb_t *) ((gmp_intptr_t) ptr - sizeof (mp_limb_t)))
- != PATTERN1 - ((mp_limb_t) ptr))
+ != PATTERN1 - PTRLIMB (ptr))
{
fprintf (stderr, "in realloc: redzone clobbered before block\n");
abort ();
}
- PATTERN2_var = PATTERN2 - ((mp_limb_t) ptr);
+ PATTERN2_var = PATTERN2 - PTRLIMB (ptr);
if (memcmp ((void *) ((gmp_intptr_t) ptr + h->size), &PATTERN2_var, sizeof (mp_limb_t)))
{
fprintf (stderr, "in realloc: redzone clobbered after block\n");
ptr = (void *) ((gmp_intptr_t) rptr + sizeof (mp_limb_t));
*((mp_limb_t *) ((gmp_intptr_t) ptr - sizeof (mp_limb_t)))
- = PATTERN1 - ((mp_limb_t) ptr);
- PATTERN2_var = PATTERN2 - ((mp_limb_t) ptr);
+ = PATTERN1 - PTRLIMB (ptr);
+ PATTERN2_var = PATTERN2 - PTRLIMB (ptr);
memcpy ((void *) ((gmp_intptr_t) ptr + new_size), &PATTERN2_var, sizeof (mp_limb_t));
h->size = new_size;
*hp = h->next; /* unlink */
if (*((mp_limb_t *) ((gmp_intptr_t) ptr - sizeof (mp_limb_t)))
- != PATTERN1 - ((mp_limb_t) ptr))
+ != PATTERN1 - PTRLIMB (ptr))
{
fprintf (stderr, "in free: redzone clobbered before block\n");
abort ();
}
- PATTERN2_var = PATTERN2 - ((mp_limb_t) ptr);
+ PATTERN2_var = PATTERN2 - PTRLIMB (ptr);
if (memcmp ((void *) ((gmp_intptr_t) ptr + h->size), &PATTERN2_var, sizeof (mp_limb_t)))
{
fprintf (stderr, "in free: redzone clobbered after block\n");
/* Miscellaneous test program support routines.
-Copyright 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2003, 2005, 2013 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include "config.h"
/* Only used if CPU calling conventions checking is available. */
-mp_limb_t (*calling_conventions_function) __GMP_PROTO ((ANYARGS));
+mp_limb_t (*calling_conventions_function) (ANYARGS);
/* Return p advanced to the next multiple of "align" bytes. "align" must be
size_t len;
char *t;
len = strlen (s);
- t = (*__gmp_allocate_func) (len+1);
+ t = (char *) (*__gmp_allocate_func) (len+1);
memcpy (t, s, len+1);
return t;
}
/* Call (*func)() with various random number generators. */
void
-call_rand_algs (void (*func) __GMP_PROTO ((const char *, gmp_randstate_ptr)))
+call_rand_algs (void (*func) (const char *, gmp_randstate_ptr))
{
gmp_randstate_t rstate;
mpz_t a;
int
tests_hardware_setround (int mode)
{
-#if HAVE_HOST_CPU_FAMILY_x86
+#if WANT_ASSEMBLY && HAVE_HOST_CPU_FAMILY_x86
int rc;
switch (mode) {
case 0: rc = 0; break; /* nearest */
int
tests_hardware_getround (void)
{
-#if HAVE_HOST_CPU_FAMILY_x86
+#if WANT_ASSEMBLY && HAVE_HOST_CPU_FAMILY_x86
switch ((x86_fstcw () & ~0xC00) >> 10) {
case 0: return 0; break; /* nearest */
case 1: return 3; break; /* down */
# Copyright 2001, 2002 Free Software Foundation, Inc.
#
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+# Public License for more details.
#
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/.
INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# Copyright 2001, 2002 Free Software Foundation, Inc.
#
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+# Public License for more details.
#
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/.
VPATH = @srcdir@
+am__make_dryrun = \
+ { \
+ am__dry=no; \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
+ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+ *) \
+ for am__flg in $$MAKEFLAGS; do \
+ case $$am__flg in \
+ *=*|--*) ;; \
+ *n*) am__dry=yes; break;; \
+ esac; \
+ done;; \
+ esac; \
+ test $$am__dry = yes; \
+ }
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
check_PROGRAMS = t-printf$(EXEEXT) t-scanf$(EXEEXT) t-locale$(EXEEXT)
subdir = tests/misc
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/configure.in
+ $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
t_locale_SOURCES = t-locale.c
-t_locale_OBJECTS = t-locale$U.$(OBJEXT)
+t_locale_OBJECTS = t-locale.$(OBJEXT)
t_locale_LDADD = $(LDADD)
t_locale_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_printf_SOURCES = t-printf.c
-t_printf_OBJECTS = t-printf$U.$(OBJEXT)
+t_printf_OBJECTS = t-printf.$(OBJEXT)
t_printf_LDADD = $(LDADD)
t_printf_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_scanf_SOURCES = t-scanf.c
-t_scanf_OBJECTS = t-scanf$U.$(OBJEXT)
+t_scanf_OBJECTS = t-scanf.$(OBJEXT)
t_scanf_LDADD = $(LDADD)
t_scanf_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
$(LDFLAGS) -o $@
SOURCES = t-locale.c t-printf.c t-scanf.c
DIST_SOURCES = t-locale.c t-printf.c t-scanf.c
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
ETAGS = etags
CTAGS = ctags
am__tty_colors = \
SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
STRIP = @STRIP@
TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
U_FOR_BUILD = @U_FOR_BUILD@
VERSION = @VERSION@
WITH_READLINE_01 = @WITH_READLINE_01@
mkdir_p = @mkdir_p@
mpn_objects = @mpn_objects@
mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-t-locale$(EXEEXT): $(t_locale_OBJECTS) $(t_locale_DEPENDENCIES)
+t-locale$(EXEEXT): $(t_locale_OBJECTS) $(t_locale_DEPENDENCIES) $(EXTRA_t_locale_DEPENDENCIES)
@rm -f t-locale$(EXEEXT)
$(LINK) $(t_locale_OBJECTS) $(t_locale_LDADD) $(LIBS)
-t-printf$(EXEEXT): $(t_printf_OBJECTS) $(t_printf_DEPENDENCIES)
+t-printf$(EXEEXT): $(t_printf_OBJECTS) $(t_printf_DEPENDENCIES) $(EXTRA_t_printf_DEPENDENCIES)
@rm -f t-printf$(EXEEXT)
$(LINK) $(t_printf_OBJECTS) $(t_printf_LDADD) $(LIBS)
-t-scanf$(EXEEXT): $(t_scanf_OBJECTS) $(t_scanf_DEPENDENCIES)
+t-scanf$(EXEEXT): $(t_scanf_OBJECTS) $(t_scanf_DEPENDENCIES) $(EXTRA_t_scanf_DEPENDENCIES)
@rm -f t-scanf$(EXEEXT)
$(LINK) $(t_scanf_OBJECTS) $(t_scanf_LDADD) $(LIBS)
distclean-compile:
-rm -f *.tab.c
-$(top_builddir)/ansi2knr:
- $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
- -test "$U" = "" || rm -f *_.c
.c.o:
$(COMPILE) -c $<
.c.lo:
$(LTCOMPILE) -c -o $@ $<
-t-locale_.c: t-locale.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-locale.c; then echo $(srcdir)/t-locale.c; else echo t-locale.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-printf_.c: t-printf.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-printf.c; then echo $(srcdir)/t-printf.c; else echo t-printf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-scanf_.c: t-scanf.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-scanf.c; then echo $(srcdir)/t-scanf.c; else echo t-scanf.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-locale_.$(OBJEXT) t-locale_.lo t-printf_.$(OBJEXT) t-printf_.lo \
-t-scanf_.$(OBJEXT) t-scanf_.lo : $(ANSI2KNR)
mostlyclean-libtool:
-rm -f *.lo
fi; \
dashes=`echo "$$dashes" | sed s/./=/g`; \
if test "$$failed" -eq 0; then \
- echo "$$grn$$dashes"; \
+ col="$$grn"; \
else \
- echo "$$red$$dashes"; \
+ col="$$red"; \
fi; \
- echo "$$banner"; \
- test -z "$$skipped" || echo "$$skipped"; \
- test -z "$$report" || echo "$$report"; \
- echo "$$dashes$$std"; \
+ echo "$${col}$$dashes$${std}"; \
+ echo "$${col}$$banner$${std}"; \
+ test -z "$$skipped" || echo "$${col}$$skipped$${std}"; \
+ test -z "$$report" || echo "$${col}$$report$${std}"; \
+ echo "$${col}$$dashes$${std}"; \
test "$$failed" -eq 0; \
else :; fi
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
mostlyclean: mostlyclean-am
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
uninstall-am:
-.MAKE: $(top_builddir)/ansi2knr check-am install-am install-strip
+.MAKE: check-am install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \
clean-checkPROGRAMS clean-generic clean-libtool ctags \
install-ps install-ps-am install-strip installcheck \
installcheck-am installdirs maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
- mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
- pdf-am ps ps-am tags uninstall uninstall-am
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags uninstall uninstall-am
$(top_builddir)/tests/libtests.la:
/* Test locale support, or attempt to do so.
-Copyright 2001, 2002 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2011 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#define _GNU_SOURCE /* for DECIMAL_POINT in glibc langinfo.h */
}
#else
-char *decimal_point;
+const char *decimal_point;
/* Replace the libc localeconv with one we can manipulate. */
#if HAVE_LOCALECONV
localeconv (void)
{
static struct lconv l;
- l.decimal_point = decimal_point;
+ l.decimal_point = (char *) decimal_point;
return &l;
}
#endif
{
#if defined (DECIMAL_POINT)
if (n == DECIMAL_POINT)
- return decimal_point;
+ return (char *) decimal_point;
#endif
#if defined (RADIXCHAR)
if (n == RADIXCHAR)
- return decimal_point;
+ return (char *) decimal_point;
#endif
- return "";
+ return (char *) "";
}
#endif
void
check_input (void)
{
- static char *point[] = {
+ static const char *point[] = {
".", ",", "WU", "STR", "ZTV***"
};
for (i = 0; i < numberof (point); i++)
{
- decimal_point = point[i];
+ decimal_point = (const char *) point[i];
for (neg = 0; neg <= 1; neg++)
{
Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
/* Usage: t-printf [-s]
return;
fmtsize = strlen (fmt_orig) + 1;
- fmt = (*__gmp_allocate_func) (fmtsize);
+ fmt = (char *) (*__gmp_allocate_func) (fmtsize);
for (p = fmt_orig, q = fmt; *p != '\0'; p++)
{
obstack_init (&ob);
got_len = gmp_obstack_vprintf (&ob, fmt, ap);
- got = obstack_base (&ob);
+ got = (char *) obstack_base (&ob);
ob_len = obstack_object_size (&ob);
if (got_len != want_len
Copyright 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
/* Usage: t-scanf [-s]
int option_libc_scanf = 0;
-typedef int (*fun_t) __GMP_PROTO ((const char *, const char *, void *, void *));
+typedef int (*fun_t) (const char *, const char *, void *, void *);
/* This problem was seen on powerpc7450-apple-darwin7.0.0, sscanf returns 0
int ret;
size = strlen (input) + 1;
- input_writable = (*__gmp_allocate_func) (size);
+ input_writable = (char *) (*__gmp_allocate_func) (size);
memcpy (input_writable, input, size);
if (a2 == NULL)
+++ /dev/null
-## Process this file with automake to generate Makefile.in
-
-# Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
-#
-# This file is part of the GNU MP Library.
-#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-
-
-INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
-LDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libmp.la
-
-if WANT_MPBSD
-MPBSD_check_OPTION = allfuns t-itom t-mtox
-endif
-
-check_PROGRAMS = $(MPBSD_check_OPTION)
-TESTS = $(check_PROGRAMS)
-
-# check linking only against libmp
-allfuns_LDADD = $(top_builddir)/libmp.la
-
-$(top_builddir)/tests/libtests.la:
- cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
+++ /dev/null
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
-# @configure_input@
-
-# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
-# This Makefile.in is free software; the Free Software Foundation
-# gives unlimited permission to copy and/or distribute it,
-# with or without modifications, as long as this notice is preserved.
-
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
-# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-# PARTICULAR PURPOSE.
-
-@SET_MAKE@
-
-# Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
-#
-# This file is part of the GNU MP Library.
-#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
-VPATH = @srcdir@
-pkgdatadir = $(datadir)/@PACKAGE@
-pkgincludedir = $(includedir)/@PACKAGE@
-pkglibdir = $(libdir)/@PACKAGE@
-pkglibexecdir = $(libexecdir)/@PACKAGE@
-am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
-install_sh_DATA = $(install_sh) -c -m 644
-install_sh_PROGRAM = $(install_sh) -c
-install_sh_SCRIPT = $(install_sh) -c
-INSTALL_HEADER = $(INSTALL_DATA)
-transform = $(program_transform_name)
-NORMAL_INSTALL = :
-PRE_INSTALL = :
-POST_INSTALL = :
-NORMAL_UNINSTALL = :
-PRE_UNINSTALL = :
-POST_UNINSTALL = :
-build_triplet = @build@
-host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
-check_PROGRAMS = $(am__EXEEXT_1)
-subdir = tests/mpbsd
-DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
-ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/configure.in
-am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
- $(ACLOCAL_M4)
-mkinstalldirs = $(install_sh) -d
-CONFIG_HEADER = $(top_builddir)/config.h
-CONFIG_CLEAN_FILES =
-CONFIG_CLEAN_VPATH_FILES =
-@WANT_MPBSD_TRUE@am__EXEEXT_1 = allfuns$(EXEEXT) t-itom$(EXEEXT) \
-@WANT_MPBSD_TRUE@ t-mtox$(EXEEXT)
-allfuns_SOURCES = allfuns.c
-allfuns_OBJECTS = allfuns$U.$(OBJEXT)
-allfuns_DEPENDENCIES = $(top_builddir)/libmp.la
-t_itom_SOURCES = t-itom.c
-t_itom_OBJECTS = t-itom$U.$(OBJEXT)
-t_itom_LDADD = $(LDADD)
-t_itom_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
- $(top_builddir)/libmp.la
-t_mtox_SOURCES = t-mtox.c
-t_mtox_OBJECTS = t-mtox$U.$(OBJEXT)
-t_mtox_LDADD = $(LDADD)
-t_mtox_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
- $(top_builddir)/libmp.la
-DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)
-depcomp =
-am__depfiles_maybe =
-COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
- $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
-LTCOMPILE = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
- --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \
- $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
-CCLD = $(CC)
-LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
- --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
- $(LDFLAGS) -o $@
-SOURCES = allfuns.c t-itom.c t-mtox.c
-DIST_SOURCES = allfuns.c t-itom.c t-mtox.c
-ETAGS = etags
-CTAGS = ctags
-am__tty_colors = \
-red=; grn=; lgn=; blu=; std=
-DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
-ABI = @ABI@
-ACLOCAL = @ACLOCAL@
-AMTAR = @AMTAR@
-AR = @AR@
-AS = @AS@
-ASMFLAGS = @ASMFLAGS@
-AUTOCONF = @AUTOCONF@
-AUTOHEADER = @AUTOHEADER@
-AUTOMAKE = @AUTOMAKE@
-AWK = @AWK@
-CALLING_CONVENTIONS_OBJS = @CALLING_CONVENTIONS_OBJS@
-CC = @CC@
-CCAS = @CCAS@
-CC_FOR_BUILD = @CC_FOR_BUILD@
-CFLAGS = @CFLAGS@
-CPP = @CPP@
-CPPFLAGS = @CPPFLAGS@
-CPP_FOR_BUILD = @CPP_FOR_BUILD@
-CXX = @CXX@
-CXXCPP = @CXXCPP@
-CXXFLAGS = @CXXFLAGS@
-CYGPATH_W = @CYGPATH_W@
-DEFN_LONG_LONG_LIMB = @DEFN_LONG_LONG_LIMB@
-DEFS = @DEFS@
-DLLTOOL = @DLLTOOL@
-DSYMUTIL = @DSYMUTIL@
-DUMPBIN = @DUMPBIN@
-ECHO_C = @ECHO_C@
-ECHO_N = @ECHO_N@
-ECHO_T = @ECHO_T@
-EGREP = @EGREP@
-EXEEXT = @EXEEXT@
-EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@
-FGREP = @FGREP@
-GMP_LDFLAGS = @GMP_LDFLAGS@
-GMP_LIMB_BITS = @GMP_LIMB_BITS@
-GMP_NAIL_BITS = @GMP_NAIL_BITS@
-GREP = @GREP@
-HAVE_CLOCK_01 = @HAVE_CLOCK_01@
-HAVE_CPUTIME_01 = @HAVE_CPUTIME_01@
-HAVE_GETRUSAGE_01 = @HAVE_GETRUSAGE_01@
-HAVE_GETTIMEOFDAY_01 = @HAVE_GETTIMEOFDAY_01@
-HAVE_HOST_CPU_FAMILY_power = @HAVE_HOST_CPU_FAMILY_power@
-HAVE_HOST_CPU_FAMILY_powerpc = @HAVE_HOST_CPU_FAMILY_powerpc@
-HAVE_SIGACTION_01 = @HAVE_SIGACTION_01@
-HAVE_SIGALTSTACK_01 = @HAVE_SIGALTSTACK_01@
-HAVE_SIGSTACK_01 = @HAVE_SIGSTACK_01@
-HAVE_STACK_T_01 = @HAVE_STACK_T_01@
-HAVE_SYS_RESOURCE_H_01 = @HAVE_SYS_RESOURCE_H_01@
-INSTALL = @INSTALL@
-INSTALL_DATA = @INSTALL_DATA@
-INSTALL_PROGRAM = @INSTALL_PROGRAM@
-INSTALL_SCRIPT = @INSTALL_SCRIPT@
-INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
-LD = @LD@
-LDFLAGS = @LDFLAGS@
-LEX = @LEX@
-LEXLIB = @LEXLIB@
-LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@
-LIBCURSES = @LIBCURSES@
-LIBGMPXX_LDFLAGS = @LIBGMPXX_LDFLAGS@
-LIBGMP_DLL = @LIBGMP_DLL@
-LIBGMP_LDFLAGS = @LIBGMP_LDFLAGS@
-LIBM = @LIBM@
-LIBM_FOR_BUILD = @LIBM_FOR_BUILD@
-LIBOBJS = @LIBOBJS@
-LIBREADLINE = @LIBREADLINE@
-LIBS = @LIBS@
-LIBTOOL = @LIBTOOL@
-LIPO = @LIPO@
-LN_S = @LN_S@
-LTLIBOBJS = @LTLIBOBJS@
-M4 = @M4@
-MAINT = @MAINT@
-MAKEINFO = @MAKEINFO@
-MANIFEST_TOOL = @MANIFEST_TOOL@
-MKDIR_P = @MKDIR_P@
-NM = @NM@
-NMEDIT = @NMEDIT@
-OBJDUMP = @OBJDUMP@
-OBJEXT = @OBJEXT@
-OTOOL = @OTOOL@
-OTOOL64 = @OTOOL64@
-PACKAGE = @PACKAGE@
-PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
-PACKAGE_NAME = @PACKAGE_NAME@
-PACKAGE_STRING = @PACKAGE_STRING@
-PACKAGE_TARNAME = @PACKAGE_TARNAME@
-PACKAGE_URL = @PACKAGE_URL@
-PACKAGE_VERSION = @PACKAGE_VERSION@
-PATH_SEPARATOR = @PATH_SEPARATOR@
-RANLIB = @RANLIB@
-SED = @SED@
-SET_MAKE = @SET_MAKE@
-SHELL = @SHELL@
-SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
-STRIP = @STRIP@
-TAL_OBJECT = @TAL_OBJECT@
-TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
-U_FOR_BUILD = @U_FOR_BUILD@
-VERSION = @VERSION@
-WITH_READLINE_01 = @WITH_READLINE_01@
-YACC = @YACC@
-YFLAGS = @YFLAGS@
-abs_builddir = @abs_builddir@
-abs_srcdir = @abs_srcdir@
-abs_top_builddir = @abs_top_builddir@
-abs_top_srcdir = @abs_top_srcdir@
-ac_ct_AR = @ac_ct_AR@
-ac_ct_CC = @ac_ct_CC@
-ac_ct_CXX = @ac_ct_CXX@
-ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
-am__leading_dot = @am__leading_dot@
-am__tar = @am__tar@
-am__untar = @am__untar@
-bindir = @bindir@
-build = @build@
-build_alias = @build_alias@
-build_cpu = @build_cpu@
-build_os = @build_os@
-build_vendor = @build_vendor@
-builddir = @builddir@
-datadir = @datadir@
-datarootdir = @datarootdir@
-docdir = @docdir@
-dvidir = @dvidir@
-exec_prefix = @exec_prefix@
-gmp_srclinks = @gmp_srclinks@
-host = @host@
-host_alias = @host_alias@
-host_cpu = @host_cpu@
-host_os = @host_os@
-host_vendor = @host_vendor@
-htmldir = @htmldir@
-includedir = @includedir@
-infodir = @infodir@
-install_sh = @install_sh@
-libdir = @libdir@
-libexecdir = @libexecdir@
-localedir = @localedir@
-localstatedir = @localstatedir@
-mandir = @mandir@
-mkdir_p = @mkdir_p@
-mpn_objects = @mpn_objects@
-mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
-oldincludedir = @oldincludedir@
-pdfdir = @pdfdir@
-prefix = @prefix@
-program_transform_name = @program_transform_name@
-psdir = @psdir@
-sbindir = @sbindir@
-sharedstatedir = @sharedstatedir@
-srcdir = @srcdir@
-sysconfdir = @sysconfdir@
-target_alias = @target_alias@
-top_build_prefix = @top_build_prefix@
-top_builddir = @top_builddir@
-top_srcdir = @top_srcdir@
-INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
-LDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libmp.la
-@WANT_MPBSD_TRUE@MPBSD_check_OPTION = allfuns t-itom t-mtox
-TESTS = $(check_PROGRAMS)
-
-# check linking only against libmp
-allfuns_LDADD = $(top_builddir)/libmp.la
-all: all-am
-
-.SUFFIXES:
-.SUFFIXES: .c .lo .o .obj
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
- @for dep in $?; do \
- case '$(am__configure_deps)' in \
- *$$dep*) \
- ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
- && { if test -f $@; then exit 0; else break; fi; }; \
- exit 1;; \
- esac; \
- done; \
- echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu --ignore-deps tests/mpbsd/Makefile'; \
- $(am__cd) $(top_srcdir) && \
- $(AUTOMAKE) --gnu --ignore-deps tests/mpbsd/Makefile
-.PRECIOUS: Makefile
-Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
- @case '$?' in \
- *config.status*) \
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
- *) \
- echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
- cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
- esac;
-
-$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-
-$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
- cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
-$(am__aclocal_m4_deps):
-
-clean-checkPROGRAMS:
- @list='$(check_PROGRAMS)'; test -n "$$list" || exit 0; \
- echo " rm -f" $$list; \
- rm -f $$list || exit $$?; \
- test -n "$(EXEEXT)" || exit 0; \
- list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
- echo " rm -f" $$list; \
- rm -f $$list
-allfuns$(EXEEXT): $(allfuns_OBJECTS) $(allfuns_DEPENDENCIES)
- @rm -f allfuns$(EXEEXT)
- $(LINK) $(allfuns_OBJECTS) $(allfuns_LDADD) $(LIBS)
-t-itom$(EXEEXT): $(t_itom_OBJECTS) $(t_itom_DEPENDENCIES)
- @rm -f t-itom$(EXEEXT)
- $(LINK) $(t_itom_OBJECTS) $(t_itom_LDADD) $(LIBS)
-t-mtox$(EXEEXT): $(t_mtox_OBJECTS) $(t_mtox_DEPENDENCIES)
- @rm -f t-mtox$(EXEEXT)
- $(LINK) $(t_mtox_OBJECTS) $(t_mtox_LDADD) $(LIBS)
-
-mostlyclean-compile:
- -rm -f *.$(OBJEXT)
-
-distclean-compile:
- -rm -f *.tab.c
-$(top_builddir)/ansi2knr:
- $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
- -test "$U" = "" || rm -f *_.c
-
-.c.o:
- $(COMPILE) -c $<
-
-.c.obj:
- $(COMPILE) -c `$(CYGPATH_W) '$<'`
-
-.c.lo:
- $(LTCOMPILE) -c -o $@ $<
-allfuns_.c: allfuns.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/allfuns.c; then echo $(srcdir)/allfuns.c; else echo allfuns.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-itom_.c: t-itom.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-itom.c; then echo $(srcdir)/t-itom.c; else echo t-itom.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-mtox_.c: t-mtox.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mtox.c; then echo $(srcdir)/t-mtox.c; else echo t-mtox.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-allfuns_.$(OBJEXT) allfuns_.lo t-itom_.$(OBJEXT) t-itom_.lo \
-t-mtox_.$(OBJEXT) t-mtox_.lo : $(ANSI2KNR)
-
-mostlyclean-libtool:
- -rm -f *.lo
-
-clean-libtool:
- -rm -rf .libs _libs
-
-ID: $(HEADERS) $(SOURCES) $(LISP) $(TAGS_FILES)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- mkid -fID $$unique
-tags: TAGS
-
-TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- set x; \
- here=`pwd`; \
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- shift; \
- if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
- test -n "$$unique" || unique=$$empty_fix; \
- if test $$# -gt 0; then \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- "$$@" $$unique; \
- else \
- $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
- $$unique; \
- fi; \
- fi
-ctags: CTAGS
-CTAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) \
- $(TAGS_FILES) $(LISP)
- list='$(SOURCES) $(HEADERS) $(LISP) $(TAGS_FILES)'; \
- unique=`for i in $$list; do \
- if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
- done | \
- $(AWK) '{ files[$$0] = 1; nonempty = 1; } \
- END { if (nonempty) { for (i in files) print i; }; }'`; \
- test -z "$(CTAGS_ARGS)$$unique" \
- || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
- $$unique
-
-GTAGS:
- here=`$(am__cd) $(top_builddir) && pwd` \
- && $(am__cd) $(top_srcdir) \
- && gtags -i $(GTAGS_ARGS) "$$here"
-
-distclean-tags:
- -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
-
-check-TESTS: $(TESTS)
- @failed=0; all=0; xfail=0; xpass=0; skip=0; \
- srcdir=$(srcdir); export srcdir; \
- list=' $(TESTS) '; \
- $(am__tty_colors); \
- if test -n "$$list"; then \
- for tst in $$list; do \
- if test -f ./$$tst; then dir=./; \
- elif test -f $$tst; then dir=; \
- else dir="$(srcdir)/"; fi; \
- if $(TESTS_ENVIRONMENT) $${dir}$$tst; then \
- all=`expr $$all + 1`; \
- case " $(XFAIL_TESTS) " in \
- *[\ \ ]$$tst[\ \ ]*) \
- xpass=`expr $$xpass + 1`; \
- failed=`expr $$failed + 1`; \
- col=$$red; res=XPASS; \
- ;; \
- *) \
- col=$$grn; res=PASS; \
- ;; \
- esac; \
- elif test $$? -ne 77; then \
- all=`expr $$all + 1`; \
- case " $(XFAIL_TESTS) " in \
- *[\ \ ]$$tst[\ \ ]*) \
- xfail=`expr $$xfail + 1`; \
- col=$$lgn; res=XFAIL; \
- ;; \
- *) \
- failed=`expr $$failed + 1`; \
- col=$$red; res=FAIL; \
- ;; \
- esac; \
- else \
- skip=`expr $$skip + 1`; \
- col=$$blu; res=SKIP; \
- fi; \
- echo "$${col}$$res$${std}: $$tst"; \
- done; \
- if test "$$all" -eq 1; then \
- tests="test"; \
- All=""; \
- else \
- tests="tests"; \
- All="All "; \
- fi; \
- if test "$$failed" -eq 0; then \
- if test "$$xfail" -eq 0; then \
- banner="$$All$$all $$tests passed"; \
- else \
- if test "$$xfail" -eq 1; then failures=failure; else failures=failures; fi; \
- banner="$$All$$all $$tests behaved as expected ($$xfail expected $$failures)"; \
- fi; \
- else \
- if test "$$xpass" -eq 0; then \
- banner="$$failed of $$all $$tests failed"; \
- else \
- if test "$$xpass" -eq 1; then passes=pass; else passes=passes; fi; \
- banner="$$failed of $$all $$tests did not behave as expected ($$xpass unexpected $$passes)"; \
- fi; \
- fi; \
- dashes="$$banner"; \
- skipped=""; \
- if test "$$skip" -ne 0; then \
- if test "$$skip" -eq 1; then \
- skipped="($$skip test was not run)"; \
- else \
- skipped="($$skip tests were not run)"; \
- fi; \
- test `echo "$$skipped" | wc -c` -le `echo "$$banner" | wc -c` || \
- dashes="$$skipped"; \
- fi; \
- report=""; \
- if test "$$failed" -ne 0 && test -n "$(PACKAGE_BUGREPORT)"; then \
- report="Please report to $(PACKAGE_BUGREPORT)"; \
- test `echo "$$report" | wc -c` -le `echo "$$banner" | wc -c` || \
- dashes="$$report"; \
- fi; \
- dashes=`echo "$$dashes" | sed s/./=/g`; \
- if test "$$failed" -eq 0; then \
- echo "$$grn$$dashes"; \
- else \
- echo "$$red$$dashes"; \
- fi; \
- echo "$$banner"; \
- test -z "$$skipped" || echo "$$skipped"; \
- test -z "$$report" || echo "$$report"; \
- echo "$$dashes$$std"; \
- test "$$failed" -eq 0; \
- else :; fi
-
-distdir: $(DISTFILES)
- @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
- list='$(DISTFILES)'; \
- dist_files=`for file in $$list; do echo $$file; done | \
- sed -e "s|^$$srcdirstrip/||;t" \
- -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
- case $$dist_files in \
- */*) $(MKDIR_P) `echo "$$dist_files" | \
- sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
- sort -u` ;; \
- esac; \
- for file in $$dist_files; do \
- if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
- if test -d $$d/$$file; then \
- dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
- if test -d "$(distdir)/$$file"; then \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
- cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
- find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
- fi; \
- cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
- else \
- test -f "$(distdir)/$$file" \
- || cp -p $$d/$$file "$(distdir)/$$file" \
- || exit 1; \
- fi; \
- done
-check-am: all-am
- $(MAKE) $(AM_MAKEFLAGS) $(check_PROGRAMS)
- $(MAKE) $(AM_MAKEFLAGS) check-TESTS
-check: check-am
-all-am: Makefile
-installdirs:
-install: install-am
-install-exec: install-exec-am
-install-data: install-data-am
-uninstall: uninstall-am
-
-install-am: all-am
- @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-
-installcheck: installcheck-am
-install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
-mostlyclean-generic:
-
-clean-generic:
-
-distclean-generic:
- -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
- -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
-
-maintainer-clean-generic:
- @echo "This command is intended for maintainers to use"
- @echo "it deletes files that may require special tools to rebuild."
-clean: clean-am
-
-clean-am: clean-checkPROGRAMS clean-generic clean-libtool \
- mostlyclean-am
-
-distclean: distclean-am
- -rm -f Makefile
-distclean-am: clean-am distclean-compile distclean-generic \
- distclean-tags
-
-dvi: dvi-am
-
-dvi-am:
-
-html: html-am
-
-html-am:
-
-info: info-am
-
-info-am:
-
-install-data-am:
-
-install-dvi: install-dvi-am
-
-install-dvi-am:
-
-install-exec-am:
-
-install-html: install-html-am
-
-install-html-am:
-
-install-info: install-info-am
-
-install-info-am:
-
-install-man:
-
-install-pdf: install-pdf-am
-
-install-pdf-am:
-
-install-ps: install-ps-am
-
-install-ps-am:
-
-installcheck-am:
-
-maintainer-clean: maintainer-clean-am
- -rm -f Makefile
-maintainer-clean-am: distclean-am maintainer-clean-generic
-
-mostlyclean: mostlyclean-am
-
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
- mostlyclean-libtool
-
-pdf: pdf-am
-
-pdf-am:
-
-ps: ps-am
-
-ps-am:
-
-uninstall-am:
-
-.MAKE: $(top_builddir)/ansi2knr check-am install-am install-strip
-
-.PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \
- clean-checkPROGRAMS clean-generic clean-libtool ctags \
- distclean distclean-compile distclean-generic \
- distclean-libtool distclean-tags distdir dvi dvi-am html \
- html-am info info-am install install-am install-data \
- install-data-am install-dvi install-dvi-am install-exec \
- install-exec-am install-html install-html-am install-info \
- install-info-am install-man install-pdf install-pdf-am \
- install-ps install-ps-am install-strip installcheck \
- installcheck-am installdirs maintainer-clean \
- maintainer-clean-generic mostlyclean mostlyclean-compile \
- mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
- pdf-am ps ps-am tags uninstall uninstall-am
-
-
-$(top_builddir)/tests/libtests.la:
- cd $(top_builddir)/tests; $(MAKE) $(AM_MAKEFLAGS) libtests.la
-
-# Tell versions [3.59,3.63) of GNU make to not export all variables.
-# Otherwise a system limit (for SysV at least) may be exceeded.
-.NOEXPORT:
+++ /dev/null
-/* A test program doing nothing really, just linking to all the BSD MP
- functions that're supposed to exist.
-
-Copyright 2000, 2001 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "mp.h"
-
-int
-main (int argc, char *argv[])
-{
- MINT *a, *b, *c, *d;
- short h;
-
- mp_set_memory_functions (NULL, NULL, NULL);
- a = itom (123);
- b = xtom ("DEADBEEF");
- c = itom (0);
- d = itom (0);
- move (a, b);
- madd (a, b, c);
- msub (a, b, c);
- mult (a, b, c);
- mdiv (b, a, c, d);
- sdiv (b, 2, c, &h);
- msqrt (a, c, d);
- pow (b, a, a, c);
- rpow (a, 3, c);
- gcd (a, b, c);
- mcmp (a, b);
- if (argc > 1)
- {
- min (c);
- mout (a);
- }
- mtox (b);
- mfree(a);
-
- exit (0);
-}
+++ /dev/null
-/* Test itom.
-
-Copyright 2000, 2001 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "mp.h"
-#include "tests.h"
-
-#define SGN(x) ((x) < 0 ? -1 : (x) == 0 ? 0 : 1)
-
-
-void
-check_data (void)
-{
- static const struct {
- short m;
- mp_size_t want_size;
- mp_limb_t want_limb;
- } data[] = {
-
- { 0L, 0 },
- { 1L, 1, 1 },
- { -1L, -1, 1 },
-
- { SHRT_MAX, 1, SHRT_MAX },
- { -SHRT_MAX, -1, SHRT_MAX },
- { SHRT_MIN, -1, -SHRT_MIN },
- };
-
- MINT *m;
- int i;
-
- for (i = 0; i < numberof (data); i++)
- {
- m = itom (data[i].m);
- if (m->_mp_size != data[i].want_size
- || (m->_mp_size != 0 && m->_mp_d[0] != data[i].want_limb))
- {
- printf ("itom wrong on data[%d]\n", i);
- abort();
- }
- mfree (m);
- }
-}
-
-
-int
-main (void)
-{
- tests_start ();
-
- check_data ();
-
- tests_end ();
- exit (0);
-}
+++ /dev/null
-/* Test mtox.
-
-Copyright 2002 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
-
-#include <string.h> /* for strcmp, strlen */
-#include <stdlib.h> /* for abort */
-#include <stdio.h>
-#include "gmp.h"
-#include "gmp-impl.h"
-#include "mp.h"
-#include "tests.h"
-
-
-void
-check_random (void)
-{
- mpz_t z;
- int i;
- char *got, *want;
- gmp_randstate_ptr rands = RANDS;
-
- mpz_init (z);
-
- for (i = 0; i < 1000; i++)
- {
- mpz_erandomb (z, rands, 6 * GMP_LIMB_BITS);
- got = mtox (z);
- want = mpz_get_str (NULL, 16, z);
- if (strcmp (got, want) != 0)
- {
- printf ("mtox wrong result\n");
- printf (" got \"%s\"\n", got);
- printf (" want \"%s\"\n", want);
- abort ();
- }
- (*__gmp_free_func) (got, strlen (got) + 1);
- (*__gmp_free_func) (want, strlen (want) + 1);
- }
-
- mpz_clear (z);
-}
-
-void
-check_mem (void)
-{
- MINT *m;
- char *s;
-
- m = itom (0);
- s = mtox (m);
- if (! tests_memory_valid (s))
- {
- printf ("Skipping t-mtox, cannot test libgmp and libmp memory together\n");
- exit (0);
- }
- mfree (m);
- (*__gmp_free_func) (s, strlen (s) + 1);
-}
-
-
-int
-main (void)
-{
- tests_start ();
-
- check_mem ();
- check_random ();
-
- tests_end ();
- exit (0);
-}
# Copyright 1996, 1999, 2000, 2001, 2002, 2003, 2004 Free Software
# Foundation, Inc.
#
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+# Public License for more details.
#
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/.
INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# Copyright 1996, 1999, 2000, 2001, 2002, 2003, 2004 Free Software
# Foundation, Inc.
#
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+# Public License for more details.
#
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/.
VPATH = @srcdir@
+am__make_dryrun = \
+ { \
+ am__dry=no; \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
+ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+ *) \
+ for am__flg in $$MAKEFLAGS; do \
+ case $$am__flg in \
+ *=*|--*) ;; \
+ *n*) am__dry=yes; break;; \
+ esac; \
+ done;; \
+ esac; \
+ test $$am__dry = yes; \
+ }
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
check_PROGRAMS = t-add$(EXEEXT) t-sub$(EXEEXT) t-conv$(EXEEXT) \
t-sqrt$(EXEEXT) t-sqrt_ui$(EXEEXT) t-muldiv$(EXEEXT) \
t-dm2exp$(EXEEXT) reuse$(EXEEXT) t-cmp_d$(EXEEXT) \
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/configure.in
+ $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
reuse_SOURCES = reuse.c
-reuse_OBJECTS = reuse$U.$(OBJEXT)
+reuse_OBJECTS = reuse.$(OBJEXT)
reuse_LDADD = $(LDADD)
reuse_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_add_SOURCES = t-add.c
-t_add_OBJECTS = t-add$U.$(OBJEXT)
+t_add_OBJECTS = t-add.$(OBJEXT)
t_add_LDADD = $(LDADD)
t_add_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_cmp_d_SOURCES = t-cmp_d.c
-t_cmp_d_OBJECTS = t-cmp_d$U.$(OBJEXT)
+t_cmp_d_OBJECTS = t-cmp_d.$(OBJEXT)
t_cmp_d_LDADD = $(LDADD)
t_cmp_d_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_cmp_si_SOURCES = t-cmp_si.c
-t_cmp_si_OBJECTS = t-cmp_si$U.$(OBJEXT)
+t_cmp_si_OBJECTS = t-cmp_si.$(OBJEXT)
t_cmp_si_LDADD = $(LDADD)
t_cmp_si_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_conv_SOURCES = t-conv.c
-t_conv_OBJECTS = t-conv$U.$(OBJEXT)
+t_conv_OBJECTS = t-conv.$(OBJEXT)
t_conv_LDADD = $(LDADD)
t_conv_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_div_SOURCES = t-div.c
-t_div_OBJECTS = t-div$U.$(OBJEXT)
+t_div_OBJECTS = t-div.$(OBJEXT)
t_div_LDADD = $(LDADD)
t_div_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_dm2exp_SOURCES = t-dm2exp.c
-t_dm2exp_OBJECTS = t-dm2exp$U.$(OBJEXT)
+t_dm2exp_OBJECTS = t-dm2exp.$(OBJEXT)
t_dm2exp_LDADD = $(LDADD)
t_dm2exp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_eq_SOURCES = t-eq.c
-t_eq_OBJECTS = t-eq$U.$(OBJEXT)
+t_eq_OBJECTS = t-eq.$(OBJEXT)
t_eq_LDADD = $(LDADD)
t_eq_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_fits_SOURCES = t-fits.c
-t_fits_OBJECTS = t-fits$U.$(OBJEXT)
+t_fits_OBJECTS = t-fits.$(OBJEXT)
t_fits_LDADD = $(LDADD)
t_fits_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_get_d_SOURCES = t-get_d.c
-t_get_d_OBJECTS = t-get_d$U.$(OBJEXT)
+t_get_d_OBJECTS = t-get_d.$(OBJEXT)
t_get_d_LDADD = $(LDADD)
t_get_d_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_get_d_2exp_SOURCES = t-get_d_2exp.c
-t_get_d_2exp_OBJECTS = t-get_d_2exp$U.$(OBJEXT)
+t_get_d_2exp_OBJECTS = t-get_d_2exp.$(OBJEXT)
t_get_d_2exp_LDADD = $(LDADD)
t_get_d_2exp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_get_si_SOURCES = t-get_si.c
-t_get_si_OBJECTS = t-get_si$U.$(OBJEXT)
+t_get_si_OBJECTS = t-get_si.$(OBJEXT)
t_get_si_LDADD = $(LDADD)
t_get_si_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_get_ui_SOURCES = t-get_ui.c
-t_get_ui_OBJECTS = t-get_ui$U.$(OBJEXT)
+t_get_ui_OBJECTS = t-get_ui.$(OBJEXT)
t_get_ui_LDADD = $(LDADD)
t_get_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_gsprec_SOURCES = t-gsprec.c
-t_gsprec_OBJECTS = t-gsprec$U.$(OBJEXT)
+t_gsprec_OBJECTS = t-gsprec.$(OBJEXT)
t_gsprec_LDADD = $(LDADD)
t_gsprec_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_inp_str_SOURCES = t-inp_str.c
-t_inp_str_OBJECTS = t-inp_str$U.$(OBJEXT)
+t_inp_str_OBJECTS = t-inp_str.$(OBJEXT)
t_inp_str_LDADD = $(LDADD)
t_inp_str_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_int_p_SOURCES = t-int_p.c
-t_int_p_OBJECTS = t-int_p$U.$(OBJEXT)
+t_int_p_OBJECTS = t-int_p.$(OBJEXT)
t_int_p_LDADD = $(LDADD)
t_int_p_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_mul_ui_SOURCES = t-mul_ui.c
-t_mul_ui_OBJECTS = t-mul_ui$U.$(OBJEXT)
+t_mul_ui_OBJECTS = t-mul_ui.$(OBJEXT)
t_mul_ui_LDADD = $(LDADD)
t_mul_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_muldiv_SOURCES = t-muldiv.c
-t_muldiv_OBJECTS = t-muldiv$U.$(OBJEXT)
+t_muldiv_OBJECTS = t-muldiv.$(OBJEXT)
t_muldiv_LDADD = $(LDADD)
t_muldiv_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_set_SOURCES = t-set.c
-t_set_OBJECTS = t-set$U.$(OBJEXT)
+t_set_OBJECTS = t-set.$(OBJEXT)
t_set_LDADD = $(LDADD)
t_set_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_set_q_SOURCES = t-set_q.c
-t_set_q_OBJECTS = t-set_q$U.$(OBJEXT)
+t_set_q_OBJECTS = t-set_q.$(OBJEXT)
t_set_q_LDADD = $(LDADD)
t_set_q_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_set_si_SOURCES = t-set_si.c
-t_set_si_OBJECTS = t-set_si$U.$(OBJEXT)
+t_set_si_OBJECTS = t-set_si.$(OBJEXT)
t_set_si_LDADD = $(LDADD)
t_set_si_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_set_ui_SOURCES = t-set_ui.c
-t_set_ui_OBJECTS = t-set_ui$U.$(OBJEXT)
+t_set_ui_OBJECTS = t-set_ui.$(OBJEXT)
t_set_ui_LDADD = $(LDADD)
t_set_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_sqrt_SOURCES = t-sqrt.c
-t_sqrt_OBJECTS = t-sqrt$U.$(OBJEXT)
+t_sqrt_OBJECTS = t-sqrt.$(OBJEXT)
t_sqrt_LDADD = $(LDADD)
t_sqrt_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_sqrt_ui_SOURCES = t-sqrt_ui.c
-t_sqrt_ui_OBJECTS = t-sqrt_ui$U.$(OBJEXT)
+t_sqrt_ui_OBJECTS = t-sqrt_ui.$(OBJEXT)
t_sqrt_ui_LDADD = $(LDADD)
t_sqrt_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_sub_SOURCES = t-sub.c
-t_sub_OBJECTS = t-sub$U.$(OBJEXT)
+t_sub_OBJECTS = t-sub.$(OBJEXT)
t_sub_LDADD = $(LDADD)
t_sub_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_trunc_SOURCES = t-trunc.c
-t_trunc_OBJECTS = t-trunc$U.$(OBJEXT)
+t_trunc_OBJECTS = t-trunc.$(OBJEXT)
t_trunc_LDADD = $(LDADD)
t_trunc_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_ui_div_SOURCES = t-ui_div.c
-t_ui_div_OBJECTS = t-ui_div$U.$(OBJEXT)
+t_ui_div_OBJECTS = t-ui_div.$(OBJEXT)
t_ui_div_LDADD = $(LDADD)
t_ui_div_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t-get_ui.c t-gsprec.c t-inp_str.c t-int_p.c t-mul_ui.c \
t-muldiv.c t-set.c t-set_q.c t-set_si.c t-set_ui.c t-sqrt.c \
t-sqrt_ui.c t-sub.c t-trunc.c t-ui_div.c
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
ETAGS = etags
CTAGS = ctags
am__tty_colors = \
SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
STRIP = @STRIP@
TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
U_FOR_BUILD = @U_FOR_BUILD@
VERSION = @VERSION@
WITH_READLINE_01 = @WITH_READLINE_01@
mkdir_p = @mkdir_p@
mpn_objects = @mpn_objects@
mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-reuse$(EXEEXT): $(reuse_OBJECTS) $(reuse_DEPENDENCIES)
+reuse$(EXEEXT): $(reuse_OBJECTS) $(reuse_DEPENDENCIES) $(EXTRA_reuse_DEPENDENCIES)
@rm -f reuse$(EXEEXT)
$(LINK) $(reuse_OBJECTS) $(reuse_LDADD) $(LIBS)
-t-add$(EXEEXT): $(t_add_OBJECTS) $(t_add_DEPENDENCIES)
+t-add$(EXEEXT): $(t_add_OBJECTS) $(t_add_DEPENDENCIES) $(EXTRA_t_add_DEPENDENCIES)
@rm -f t-add$(EXEEXT)
$(LINK) $(t_add_OBJECTS) $(t_add_LDADD) $(LIBS)
-t-cmp_d$(EXEEXT): $(t_cmp_d_OBJECTS) $(t_cmp_d_DEPENDENCIES)
+t-cmp_d$(EXEEXT): $(t_cmp_d_OBJECTS) $(t_cmp_d_DEPENDENCIES) $(EXTRA_t_cmp_d_DEPENDENCIES)
@rm -f t-cmp_d$(EXEEXT)
$(LINK) $(t_cmp_d_OBJECTS) $(t_cmp_d_LDADD) $(LIBS)
-t-cmp_si$(EXEEXT): $(t_cmp_si_OBJECTS) $(t_cmp_si_DEPENDENCIES)
+t-cmp_si$(EXEEXT): $(t_cmp_si_OBJECTS) $(t_cmp_si_DEPENDENCIES) $(EXTRA_t_cmp_si_DEPENDENCIES)
@rm -f t-cmp_si$(EXEEXT)
$(LINK) $(t_cmp_si_OBJECTS) $(t_cmp_si_LDADD) $(LIBS)
-t-conv$(EXEEXT): $(t_conv_OBJECTS) $(t_conv_DEPENDENCIES)
+t-conv$(EXEEXT): $(t_conv_OBJECTS) $(t_conv_DEPENDENCIES) $(EXTRA_t_conv_DEPENDENCIES)
@rm -f t-conv$(EXEEXT)
$(LINK) $(t_conv_OBJECTS) $(t_conv_LDADD) $(LIBS)
-t-div$(EXEEXT): $(t_div_OBJECTS) $(t_div_DEPENDENCIES)
+t-div$(EXEEXT): $(t_div_OBJECTS) $(t_div_DEPENDENCIES) $(EXTRA_t_div_DEPENDENCIES)
@rm -f t-div$(EXEEXT)
$(LINK) $(t_div_OBJECTS) $(t_div_LDADD) $(LIBS)
-t-dm2exp$(EXEEXT): $(t_dm2exp_OBJECTS) $(t_dm2exp_DEPENDENCIES)
+t-dm2exp$(EXEEXT): $(t_dm2exp_OBJECTS) $(t_dm2exp_DEPENDENCIES) $(EXTRA_t_dm2exp_DEPENDENCIES)
@rm -f t-dm2exp$(EXEEXT)
$(LINK) $(t_dm2exp_OBJECTS) $(t_dm2exp_LDADD) $(LIBS)
-t-eq$(EXEEXT): $(t_eq_OBJECTS) $(t_eq_DEPENDENCIES)
+t-eq$(EXEEXT): $(t_eq_OBJECTS) $(t_eq_DEPENDENCIES) $(EXTRA_t_eq_DEPENDENCIES)
@rm -f t-eq$(EXEEXT)
$(LINK) $(t_eq_OBJECTS) $(t_eq_LDADD) $(LIBS)
-t-fits$(EXEEXT): $(t_fits_OBJECTS) $(t_fits_DEPENDENCIES)
+t-fits$(EXEEXT): $(t_fits_OBJECTS) $(t_fits_DEPENDENCIES) $(EXTRA_t_fits_DEPENDENCIES)
@rm -f t-fits$(EXEEXT)
$(LINK) $(t_fits_OBJECTS) $(t_fits_LDADD) $(LIBS)
-t-get_d$(EXEEXT): $(t_get_d_OBJECTS) $(t_get_d_DEPENDENCIES)
+t-get_d$(EXEEXT): $(t_get_d_OBJECTS) $(t_get_d_DEPENDENCIES) $(EXTRA_t_get_d_DEPENDENCIES)
@rm -f t-get_d$(EXEEXT)
$(LINK) $(t_get_d_OBJECTS) $(t_get_d_LDADD) $(LIBS)
-t-get_d_2exp$(EXEEXT): $(t_get_d_2exp_OBJECTS) $(t_get_d_2exp_DEPENDENCIES)
+t-get_d_2exp$(EXEEXT): $(t_get_d_2exp_OBJECTS) $(t_get_d_2exp_DEPENDENCIES) $(EXTRA_t_get_d_2exp_DEPENDENCIES)
@rm -f t-get_d_2exp$(EXEEXT)
$(LINK) $(t_get_d_2exp_OBJECTS) $(t_get_d_2exp_LDADD) $(LIBS)
-t-get_si$(EXEEXT): $(t_get_si_OBJECTS) $(t_get_si_DEPENDENCIES)
+t-get_si$(EXEEXT): $(t_get_si_OBJECTS) $(t_get_si_DEPENDENCIES) $(EXTRA_t_get_si_DEPENDENCIES)
@rm -f t-get_si$(EXEEXT)
$(LINK) $(t_get_si_OBJECTS) $(t_get_si_LDADD) $(LIBS)
-t-get_ui$(EXEEXT): $(t_get_ui_OBJECTS) $(t_get_ui_DEPENDENCIES)
+t-get_ui$(EXEEXT): $(t_get_ui_OBJECTS) $(t_get_ui_DEPENDENCIES) $(EXTRA_t_get_ui_DEPENDENCIES)
@rm -f t-get_ui$(EXEEXT)
$(LINK) $(t_get_ui_OBJECTS) $(t_get_ui_LDADD) $(LIBS)
-t-gsprec$(EXEEXT): $(t_gsprec_OBJECTS) $(t_gsprec_DEPENDENCIES)
+t-gsprec$(EXEEXT): $(t_gsprec_OBJECTS) $(t_gsprec_DEPENDENCIES) $(EXTRA_t_gsprec_DEPENDENCIES)
@rm -f t-gsprec$(EXEEXT)
$(LINK) $(t_gsprec_OBJECTS) $(t_gsprec_LDADD) $(LIBS)
-t-inp_str$(EXEEXT): $(t_inp_str_OBJECTS) $(t_inp_str_DEPENDENCIES)
+t-inp_str$(EXEEXT): $(t_inp_str_OBJECTS) $(t_inp_str_DEPENDENCIES) $(EXTRA_t_inp_str_DEPENDENCIES)
@rm -f t-inp_str$(EXEEXT)
$(LINK) $(t_inp_str_OBJECTS) $(t_inp_str_LDADD) $(LIBS)
-t-int_p$(EXEEXT): $(t_int_p_OBJECTS) $(t_int_p_DEPENDENCIES)
+t-int_p$(EXEEXT): $(t_int_p_OBJECTS) $(t_int_p_DEPENDENCIES) $(EXTRA_t_int_p_DEPENDENCIES)
@rm -f t-int_p$(EXEEXT)
$(LINK) $(t_int_p_OBJECTS) $(t_int_p_LDADD) $(LIBS)
-t-mul_ui$(EXEEXT): $(t_mul_ui_OBJECTS) $(t_mul_ui_DEPENDENCIES)
+t-mul_ui$(EXEEXT): $(t_mul_ui_OBJECTS) $(t_mul_ui_DEPENDENCIES) $(EXTRA_t_mul_ui_DEPENDENCIES)
@rm -f t-mul_ui$(EXEEXT)
$(LINK) $(t_mul_ui_OBJECTS) $(t_mul_ui_LDADD) $(LIBS)
-t-muldiv$(EXEEXT): $(t_muldiv_OBJECTS) $(t_muldiv_DEPENDENCIES)
+t-muldiv$(EXEEXT): $(t_muldiv_OBJECTS) $(t_muldiv_DEPENDENCIES) $(EXTRA_t_muldiv_DEPENDENCIES)
@rm -f t-muldiv$(EXEEXT)
$(LINK) $(t_muldiv_OBJECTS) $(t_muldiv_LDADD) $(LIBS)
-t-set$(EXEEXT): $(t_set_OBJECTS) $(t_set_DEPENDENCIES)
+t-set$(EXEEXT): $(t_set_OBJECTS) $(t_set_DEPENDENCIES) $(EXTRA_t_set_DEPENDENCIES)
@rm -f t-set$(EXEEXT)
$(LINK) $(t_set_OBJECTS) $(t_set_LDADD) $(LIBS)
-t-set_q$(EXEEXT): $(t_set_q_OBJECTS) $(t_set_q_DEPENDENCIES)
+t-set_q$(EXEEXT): $(t_set_q_OBJECTS) $(t_set_q_DEPENDENCIES) $(EXTRA_t_set_q_DEPENDENCIES)
@rm -f t-set_q$(EXEEXT)
$(LINK) $(t_set_q_OBJECTS) $(t_set_q_LDADD) $(LIBS)
-t-set_si$(EXEEXT): $(t_set_si_OBJECTS) $(t_set_si_DEPENDENCIES)
+t-set_si$(EXEEXT): $(t_set_si_OBJECTS) $(t_set_si_DEPENDENCIES) $(EXTRA_t_set_si_DEPENDENCIES)
@rm -f t-set_si$(EXEEXT)
$(LINK) $(t_set_si_OBJECTS) $(t_set_si_LDADD) $(LIBS)
-t-set_ui$(EXEEXT): $(t_set_ui_OBJECTS) $(t_set_ui_DEPENDENCIES)
+t-set_ui$(EXEEXT): $(t_set_ui_OBJECTS) $(t_set_ui_DEPENDENCIES) $(EXTRA_t_set_ui_DEPENDENCIES)
@rm -f t-set_ui$(EXEEXT)
$(LINK) $(t_set_ui_OBJECTS) $(t_set_ui_LDADD) $(LIBS)
-t-sqrt$(EXEEXT): $(t_sqrt_OBJECTS) $(t_sqrt_DEPENDENCIES)
+t-sqrt$(EXEEXT): $(t_sqrt_OBJECTS) $(t_sqrt_DEPENDENCIES) $(EXTRA_t_sqrt_DEPENDENCIES)
@rm -f t-sqrt$(EXEEXT)
$(LINK) $(t_sqrt_OBJECTS) $(t_sqrt_LDADD) $(LIBS)
-t-sqrt_ui$(EXEEXT): $(t_sqrt_ui_OBJECTS) $(t_sqrt_ui_DEPENDENCIES)
+t-sqrt_ui$(EXEEXT): $(t_sqrt_ui_OBJECTS) $(t_sqrt_ui_DEPENDENCIES) $(EXTRA_t_sqrt_ui_DEPENDENCIES)
@rm -f t-sqrt_ui$(EXEEXT)
$(LINK) $(t_sqrt_ui_OBJECTS) $(t_sqrt_ui_LDADD) $(LIBS)
-t-sub$(EXEEXT): $(t_sub_OBJECTS) $(t_sub_DEPENDENCIES)
+t-sub$(EXEEXT): $(t_sub_OBJECTS) $(t_sub_DEPENDENCIES) $(EXTRA_t_sub_DEPENDENCIES)
@rm -f t-sub$(EXEEXT)
$(LINK) $(t_sub_OBJECTS) $(t_sub_LDADD) $(LIBS)
-t-trunc$(EXEEXT): $(t_trunc_OBJECTS) $(t_trunc_DEPENDENCIES)
+t-trunc$(EXEEXT): $(t_trunc_OBJECTS) $(t_trunc_DEPENDENCIES) $(EXTRA_t_trunc_DEPENDENCIES)
@rm -f t-trunc$(EXEEXT)
$(LINK) $(t_trunc_OBJECTS) $(t_trunc_LDADD) $(LIBS)
-t-ui_div$(EXEEXT): $(t_ui_div_OBJECTS) $(t_ui_div_DEPENDENCIES)
+t-ui_div$(EXEEXT): $(t_ui_div_OBJECTS) $(t_ui_div_DEPENDENCIES) $(EXTRA_t_ui_div_DEPENDENCIES)
@rm -f t-ui_div$(EXEEXT)
$(LINK) $(t_ui_div_OBJECTS) $(t_ui_div_LDADD) $(LIBS)
distclean-compile:
-rm -f *.tab.c
-$(top_builddir)/ansi2knr:
- $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
- -test "$U" = "" || rm -f *_.c
.c.o:
$(COMPILE) -c $<
.c.lo:
$(LTCOMPILE) -c -o $@ $<
-reuse_.c: reuse.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/reuse.c; then echo $(srcdir)/reuse.c; else echo reuse.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-add_.c: t-add.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-add.c; then echo $(srcdir)/t-add.c; else echo t-add.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-cmp_d_.c: t-cmp_d.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cmp_d.c; then echo $(srcdir)/t-cmp_d.c; else echo t-cmp_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-cmp_si_.c: t-cmp_si.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cmp_si.c; then echo $(srcdir)/t-cmp_si.c; else echo t-cmp_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-conv_.c: t-conv.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-conv.c; then echo $(srcdir)/t-conv.c; else echo t-conv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-div_.c: t-div.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-div.c; then echo $(srcdir)/t-div.c; else echo t-div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-dm2exp_.c: t-dm2exp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-dm2exp.c; then echo $(srcdir)/t-dm2exp.c; else echo t-dm2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-eq_.c: t-eq.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-eq.c; then echo $(srcdir)/t-eq.c; else echo t-eq.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-fits_.c: t-fits.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-fits.c; then echo $(srcdir)/t-fits.c; else echo t-fits.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-get_d_.c: t-get_d.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_d.c; then echo $(srcdir)/t-get_d.c; else echo t-get_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-get_d_2exp_.c: t-get_d_2exp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_d_2exp.c; then echo $(srcdir)/t-get_d_2exp.c; else echo t-get_d_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-get_si_.c: t-get_si.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_si.c; then echo $(srcdir)/t-get_si.c; else echo t-get_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-get_ui_.c: t-get_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_ui.c; then echo $(srcdir)/t-get_ui.c; else echo t-get_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-gsprec_.c: t-gsprec.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-gsprec.c; then echo $(srcdir)/t-gsprec.c; else echo t-gsprec.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-inp_str_.c: t-inp_str.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-inp_str.c; then echo $(srcdir)/t-inp_str.c; else echo t-inp_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-int_p_.c: t-int_p.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-int_p.c; then echo $(srcdir)/t-int_p.c; else echo t-int_p.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-mul_ui_.c: t-mul_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mul_ui.c; then echo $(srcdir)/t-mul_ui.c; else echo t-mul_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-muldiv_.c: t-muldiv.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-muldiv.c; then echo $(srcdir)/t-muldiv.c; else echo t-muldiv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-set_.c: t-set.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set.c; then echo $(srcdir)/t-set.c; else echo t-set.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-set_q_.c: t-set_q.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set_q.c; then echo $(srcdir)/t-set_q.c; else echo t-set_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-set_si_.c: t-set_si.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set_si.c; then echo $(srcdir)/t-set_si.c; else echo t-set_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-set_ui_.c: t-set_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set_ui.c; then echo $(srcdir)/t-set_ui.c; else echo t-set_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-sqrt_.c: t-sqrt.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-sqrt.c; then echo $(srcdir)/t-sqrt.c; else echo t-sqrt.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-sqrt_ui_.c: t-sqrt_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-sqrt_ui.c; then echo $(srcdir)/t-sqrt_ui.c; else echo t-sqrt_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-sub_.c: t-sub.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-sub.c; then echo $(srcdir)/t-sub.c; else echo t-sub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-trunc_.c: t-trunc.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-trunc.c; then echo $(srcdir)/t-trunc.c; else echo t-trunc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-ui_div_.c: t-ui_div.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-ui_div.c; then echo $(srcdir)/t-ui_div.c; else echo t-ui_div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-reuse_.$(OBJEXT) reuse_.lo t-add_.$(OBJEXT) t-add_.lo \
-t-cmp_d_.$(OBJEXT) t-cmp_d_.lo t-cmp_si_.$(OBJEXT) t-cmp_si_.lo \
-t-conv_.$(OBJEXT) t-conv_.lo t-div_.$(OBJEXT) t-div_.lo \
-t-dm2exp_.$(OBJEXT) t-dm2exp_.lo t-eq_.$(OBJEXT) t-eq_.lo \
-t-fits_.$(OBJEXT) t-fits_.lo t-get_d_.$(OBJEXT) t-get_d_.lo \
-t-get_d_2exp_.$(OBJEXT) t-get_d_2exp_.lo t-get_si_.$(OBJEXT) \
-t-get_si_.lo t-get_ui_.$(OBJEXT) t-get_ui_.lo t-gsprec_.$(OBJEXT) \
-t-gsprec_.lo t-inp_str_.$(OBJEXT) t-inp_str_.lo t-int_p_.$(OBJEXT) \
-t-int_p_.lo t-mul_ui_.$(OBJEXT) t-mul_ui_.lo t-muldiv_.$(OBJEXT) \
-t-muldiv_.lo t-set_.$(OBJEXT) t-set_.lo t-set_q_.$(OBJEXT) t-set_q_.lo \
-t-set_si_.$(OBJEXT) t-set_si_.lo t-set_ui_.$(OBJEXT) t-set_ui_.lo \
-t-sqrt_.$(OBJEXT) t-sqrt_.lo t-sqrt_ui_.$(OBJEXT) t-sqrt_ui_.lo \
-t-sub_.$(OBJEXT) t-sub_.lo t-trunc_.$(OBJEXT) t-trunc_.lo \
-t-ui_div_.$(OBJEXT) t-ui_div_.lo : $(ANSI2KNR)
mostlyclean-libtool:
-rm -f *.lo
fi; \
dashes=`echo "$$dashes" | sed s/./=/g`; \
if test "$$failed" -eq 0; then \
- echo "$$grn$$dashes"; \
+ col="$$grn"; \
else \
- echo "$$red$$dashes"; \
+ col="$$red"; \
fi; \
- echo "$$banner"; \
- test -z "$$skipped" || echo "$$skipped"; \
- test -z "$$report" || echo "$$report"; \
- echo "$$dashes$$std"; \
+ echo "$${col}$$dashes$${std}"; \
+ echo "$${col}$$banner$${std}"; \
+ test -z "$$skipped" || echo "$${col}$$skipped$${std}"; \
+ test -z "$$report" || echo "$${col}$$report$${std}"; \
+ echo "$${col}$$dashes$${std}"; \
test "$$failed" -eq 0; \
else :; fi
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
mostlyclean: mostlyclean-am
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
uninstall-am:
-.MAKE: $(top_builddir)/ansi2knr check-am install-am install-strip
+.MAKE: check-am install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \
clean-checkPROGRAMS clean-generic clean-libtool ctags \
install-ps install-ps-am install-strip installcheck \
installcheck-am installdirs maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
- mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
- pdf-am ps ps-am tags uninstall uninstall-am
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags uninstall uninstall-am
$(top_builddir)/tests/libtests.la:
/* Test that routines allow reusing a source variable as destination.
-Copyright 1996, 2000, 2001, 2002 Free Software Foundation, Inc.
+Copyright 1996, 2000, 2001, 2002, 2012 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
#define EXPO 32
#endif
-void dump_abort __GMP_PROTO ((char *, mpf_t, mpf_t));
+void dump_abort (const char *, mpf_t, mpf_t);
-typedef void (*dss_func) __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
+typedef void (*dss_func) (mpf_ptr, mpf_srcptr, mpf_srcptr);
dss_func dss_funcs[] =
{
mpf_div, mpf_add, mpf_mul, mpf_sub,
};
-char *dss_func_names[] =
+const char *dss_func_names[] =
{
"mpf_div", "mpf_add", "mpf_mul", "mpf_sub",
};
-typedef void (*dsi_func) __GMP_PROTO ((mpf_ptr, mpf_srcptr, unsigned long int));
+typedef void (*dsi_func) (mpf_ptr, mpf_srcptr, unsigned long int);
dsi_func dsi_funcs[] =
{
mpf_div_ui, mpf_add_ui, mpf_mul_ui, mpf_sub_ui,
- mpf_mul_2exp, mpf_div_2exp
+ mpf_mul_2exp, mpf_div_2exp, mpf_pow_ui
};
-char *dsi_func_names[] =
+const char *dsi_func_names[] =
{
"mpf_div_ui", "mpf_add_ui", "mpf_mul_ui", "mpf_sub_ui",
- "mpf_mul_2exp", "mpf_div_2exp"
+ "mpf_mul_2exp", "mpf_div_2exp", "mpf_pow_ui"
};
-typedef void (*dis_func) __GMP_PROTO ((mpf_ptr, unsigned long int, mpf_srcptr));
+typedef void (*dis_func) (mpf_ptr, unsigned long int, mpf_srcptr);
dis_func dis_funcs[] =
{
mpf_ui_div, mpf_ui_sub,
};
-char *dis_func_names[] =
+const char *dis_func_names[] =
{
"mpf_ui_div", "mpf_ui_sub",
};
}
void
-dump_abort (char *name, mpf_t res1, mpf_t res2)
+dump_abort (const char *name, mpf_t res1, mpf_t res2)
{
printf ("failure in %s:\n", name);
mpf_dump (res1);
}
#if 0
-void mpf_abs __GMP_PROTO ((mpf_ptr, mpf_srcptr));
-void mpf_sqrt __GMP_PROTO ((mpf_ptr, mpf_srcptr));
-void mpf_neg __GMP_PROTO ((mpf_ptr, mpf_srcptr));
+void mpf_abs (mpf_ptr, mpf_srcptr);
+void mpf_sqrt (mpf_ptr, mpf_srcptr);
+void mpf_neg (mpf_ptr, mpf_srcptr);
#endif
#endif /* ! DLL_EXPORT */
Copyright 1996, 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2001, 2003, 2003, 2005 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2000, 2001, 2004 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 1996, 2000, 2001, 2008 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2004 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 1996, 2000, 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
/* Test mpf_eq.
-Copyright 2009 Free Software Foundation, Inc.
+Copyright 2009, 2012 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
void dump_abort (mpf_t, mpf_t, int, int, int, int, int, long);
void hexdump (mpf_t);
-int
-main (int argc, char **argv)
+void
+check_data (void)
+{
+ static const struct
+ {
+ struct {
+ int exp, size;
+ mp_limb_t d[10];
+ } x, y;
+ mp_bitcnt_t bits;
+ int want;
+
+ } data[] = {
+ { { 0, 0, { 0 } }, { 0, 0, { 0 } }, 0, 1 },
+
+ { { 0, 1, { 7 } }, { 0, 1, { 7 } }, 0, 1 },
+ { { 0, 1, { 7 } }, { 0, 1, { 7 } }, 17, 1 },
+ { { 0, 1, { 7 } }, { 0, 1, { 7 } }, 4711, 1 },
+
+ { { 0, 1, { 7 } }, { 0, 1, { 6 } }, 0, 1 },
+ { { 0, 1, { 7 } }, { 0, 1, { 6 } }, 2, 1 },
+ { { 0, 1, { 7 } }, { 0, 1, { 6 } }, 3, 0 },
+
+ { { 0, 0, { 0 } }, { 0, 1, { 1 } }, 0, 0 },
+ { { 0, 1, { 1 } }, { 0,-1 ,{ 1 } }, 0, 0 },
+ { { 1, 1, { 1 } }, { 0, 1, { 1 } }, 0, 0 },
+
+ { { 0, 1, { 8 } }, { 0, 1, { 4 } }, 0, 0 },
+
+ { { 0, 2, { 0, 3 } }, { 0, 1, { 3 } }, 1000, 1 },
+ };
+
+ mpf_t x, y;
+ int got, got_swapped;
+ int i;
+ mp_trace_base = 16;
+
+ for (i = 0; i < numberof (data); i++)
+ {
+ PTR(x) = (mp_ptr) data[i].x.d;
+ SIZ(x) = data[i].x.size;
+ EXP(x) = data[i].x.exp;
+ PREC(x) = numberof (data[i].x.d);
+ MPF_CHECK_FORMAT (x);
+
+ PTR(y) = (mp_ptr) data[i].y.d;
+ SIZ(y) = data[i].y.size;
+ EXP(y) = data[i].y.exp;
+ PREC(y) = numberof (data[i].y.d);
+ MPF_CHECK_FORMAT (y);
+
+ got = mpf_eq (x, y, data[i].bits);
+ got_swapped = mpf_eq (y, x, data[i].bits);
+
+ if (got != got_swapped || got != data[i].want)
+ {
+ printf ("check_data() wrong reault at data[%d]\n", i);
+ mpf_trace ("x ", x);
+ mpf_trace ("y ", y);
+ printf ("got %d\n", got);
+ printf ("got_swapped %d\n", got_swapped);
+ printf ("want %d\n", data[i].want);
+ abort ();
+ }
+ }
+}
+
+void
+check_random (long reps)
{
- unsigned long test, reps = 10000;
+ unsigned long test;
+ gmp_randstate_ptr rands = RANDS;
mpf_t a, b, x;
- gmp_randstate_ptr rands;
mpz_t ds;
int hibits, lshift1, lshift2;
int xtra;
#define LSHIFT1 10
#define LSHIFT2 10
- if (argc > 1)
- reps = strtol (argv[1], 0, 0);
-
- tests_start ();
-
- rands = RANDS;
-
mpf_set_default_prec ((1 << HIBITS) + (1 << LSHIFT1) + (1 << LSHIFT2));
mpz_init (ds);
insert_random_low_zero_limbs (a, rands);
insert_random_low_zero_limbs (b, rands);
- if (mpf_eq (a, b, lshift1 + hibits) == 0)
+ if (mpf_eq (a, b, lshift1 + hibits) == 0 ||
+ mpf_eq (b, a, lshift1 + hibits) == 0)
{
dump_abort (a, b, lshift1 + hibits, lshift1, lshift2, hibits, 1, test);
}
for (xtra = 1; xtra < 100; xtra++)
- if (mpf_eq (a, b, lshift1 + hibits + xtra) != 0)
+ if (mpf_eq (a, b, lshift1 + hibits + xtra) != 0 ||
+ mpf_eq (b, a, lshift1 + hibits + xtra) != 0)
{
dump_abort (a, b, lshift1 + hibits + xtra, lshift1, lshift2, hibits, 0, test);
}
mpf_clears (a, b, x, NULL);
mpz_clear (ds);
- tests_end ();
- exit (0);
}
void
printf (" ");
}
}
+
+int
+main (int argc, char *argv[])
+{
+ long reps = 10000;
+
+ if (argc == 2)
+ reps = strtol (argv[1], 0, 0);
+
+ tests_start ();
+
+ check_data ();
+ check_random (reps);
+
+ tests_end ();
+ exit (0);
+}
Copyright 2001, 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
/* Test mpf_get_d and mpf_set_d.
- Copyright 1996, 1999, 2000, 2001, 2009 Free Software Foundation, Inc.
+Copyright 1996, 1999, 2000, 2001, 2009 Free Software Foundation, Inc.
- This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
- The GNU MP Library is free software; you can redistribute it and/or modify
- it under the terms of the GNU Lesser General Public License as published by
- the Free Software Foundation; either version 3 of the License, or (at your
- option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
- The GNU MP Library is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
- License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
- You should have received a copy of the GNU Lesser General Public License
- along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
#include "gmp.h"
#include "tests.h"
-#if defined (__vax__)
+#if defined (__vax) || defined (__vax__)
#define LOW_BOUND 1e-38
#define HIGH_BOUND 8e37
#endif
Copyright 2002, 2003 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2000, 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2004 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2000, 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2001, 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include "config.h"
{ "125", 10, "125", 3 },
{ "125e1", 10, "1250", 5 },
+ { "12e+2", 10, "1200", 5 },
{ "125e-1", 10, "12.5", 6 },
{ "ff", 16, "255", 2 },
Copyright 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2004 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
check_various (void)
{
mpf_t u, got, want;
- char *s;
+ const char *s;
mpf_init2 (u, 2*8*sizeof(long));
mpf_init2 (got, 2*8*sizeof(long));
Copyright 1996, 2000, 2001, 2003 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
-/* Test mpf_set.
+/* Test mpf_set, mpf_init_set.
-Copyright 2004 Free Software Foundation, Inc.
+Copyright 2004, 2012 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
mpf_clear (f);
}
+void
+check_random (long reps)
+{
+ unsigned long test;
+ gmp_randstate_ptr rands;
+ mpf_t a, b;
+ mpz_t z;
+ int precbits;
+
+#define PRECBITS 10
+
+ rands = RANDS;
+
+ mpz_init (z);
+ mpf_init2 (a, 1 << PRECBITS);
+
+ for (test = 0; test < reps; test++)
+ {
+ mpz_urandomb (z, rands, PRECBITS + 1);
+ precbits = mpz_get_ui (z) + 1;
+ mpz_urandomb (z, rands, precbits);
+ mpz_setbit (z, precbits - 1); /* make sure msb is set */
+ mpf_set_z (a, z);
+ if (precbits & 1)
+ mpf_neg (a, a);
+ mpz_urandomb (z, rands, PRECBITS);
+ mpf_div_2exp (a, a, mpz_get_ui (z) + 1);
+ mpz_urandomb (z, rands, PRECBITS);
+ precbits -= mpz_get_ui (z);
+ if (precbits <= 0)
+ precbits = 1 - precbits;
+ mpf_set_default_prec (precbits);
+
+ mpf_init_set (b, a);
+ MPF_CHECK_FORMAT (b);
+ if (!mpf_eq (a, b, precbits))
+ {
+ printf ("mpf_init_set wrong.\n");
+ abort();
+ }
+
+ mpf_set_ui (b, 0);
+ mpf_set (b, a);
+ MPF_CHECK_FORMAT (b);
+ if (!mpf_eq (a, b, precbits))
+ {
+ printf ("mpf_set wrong.\n");
+ abort();
+ }
+
+ mpf_clear (b);
+ }
+
+ mpf_clear (a);
+ mpz_clear (z);
+}
+
int
-main (void)
+main (int argc, char *argv[])
{
+ long reps = 10000;
+
tests_start ();
+ TESTS_REPS (reps, argv, argc);
check_reuse ();
+ check_random (reps);
tests_end ();
exit (0);
Copyright 2004 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2000, 2001, 2003 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2000, 2001, 2003 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
#else
{ ULONG_MAX, 2, { ULONG_MAX & GMP_NUMB_MASK,
ULONG_MAX >> GMP_NUMB_BITS } },
- { LONG_HIGHBIT, 2, { 0,
+ { ULONG_HIGHBIT, 2, { 0,
ULONG_HIGHBIT >> GMP_NUMB_BITS } },
#endif
};
Copyright 1996, 2001, 2004 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2004 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 1996, 2001, 2004 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2001, 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2004 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
## Process this file with automake to generate Makefile.in
-# Copyright 2001, 2002, 2003, 2009, 2010, 2012 Free Software Foundation, Inc.
+# Copyright 2001, 2002, 2003, 2009, 2010, 2011, 2012 Free Software Foundation,
+# Inc.
#
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+# Public License for more details.
#
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/.
INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
check_PROGRAMS = t-asmtype t-aors_1 t-divrem_1 t-mod_1 t-fat t-get_d \
t-instrument t-iord_u t-mp_bases t-perfsqr t-scan logic \
t-toom22 t-toom32 t-toom33 t-toom42 t-toom43 t-toom44 \
- t-toom52 t-toom53 t-toom62 t-toom63 t-toom6h t-toom8h \
- t-mul t-mullo t-mulmod_bnm1 t-sqrmod_bnm1 \
- t-hgcd t-matrix22 t-invert t-div t-bdiv
+ t-toom52 t-toom53 t-toom54 t-toom62 t-toom63 t-toom6h t-toom8h \
+ t-toom2-sqr t-toom3-sqr t-toom4-sqr t-toom6-sqr t-toom8-sqr \
+ t-mul t-mullo t-mulmod_bnm1 t-sqrmod_bnm1 t-mulmid \
+ t-hgcd t-hgcd_appr t-matrix22 t-invert t-div t-bdiv \
+ t-broot t-brootinv
-EXTRA_DIST = toom-shared.h
+EXTRA_DIST = toom-shared.h toom-sqr-shared.h
TESTS = $(check_PROGRAMS)
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@SET_MAKE@
-# Copyright 2001, 2002, 2003, 2009, 2010, 2012 Free Software Foundation, Inc.
+# Copyright 2001, 2002, 2003, 2009, 2010, 2011, 2012 Free Software Foundation,
+# Inc.
#
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+# Public License for more details.
#
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/.
VPATH = @srcdir@
+am__make_dryrun = \
+ { \
+ am__dry=no; \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
+ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+ *) \
+ for am__flg in $$MAKEFLAGS; do \
+ case $$am__flg in \
+ *=*|--*) ;; \
+ *n*) am__dry=yes; break;; \
+ esac; \
+ done;; \
+ esac; \
+ test $$am__dry = yes; \
+ }
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
check_PROGRAMS = t-asmtype$(EXEEXT) t-aors_1$(EXEEXT) \
t-divrem_1$(EXEEXT) t-mod_1$(EXEEXT) t-fat$(EXEEXT) \
t-get_d$(EXEEXT) t-instrument$(EXEEXT) t-iord_u$(EXEEXT) \
logic$(EXEEXT) t-toom22$(EXEEXT) t-toom32$(EXEEXT) \
t-toom33$(EXEEXT) t-toom42$(EXEEXT) t-toom43$(EXEEXT) \
t-toom44$(EXEEXT) t-toom52$(EXEEXT) t-toom53$(EXEEXT) \
- t-toom62$(EXEEXT) t-toom63$(EXEEXT) t-toom6h$(EXEEXT) \
- t-toom8h$(EXEEXT) t-mul$(EXEEXT) t-mullo$(EXEEXT) \
- t-mulmod_bnm1$(EXEEXT) t-sqrmod_bnm1$(EXEEXT) t-hgcd$(EXEEXT) \
+ t-toom54$(EXEEXT) t-toom62$(EXEEXT) t-toom63$(EXEEXT) \
+ t-toom6h$(EXEEXT) t-toom8h$(EXEEXT) t-toom2-sqr$(EXEEXT) \
+ t-toom3-sqr$(EXEEXT) t-toom4-sqr$(EXEEXT) t-toom6-sqr$(EXEEXT) \
+ t-toom8-sqr$(EXEEXT) t-mul$(EXEEXT) t-mullo$(EXEEXT) \
+ t-mulmod_bnm1$(EXEEXT) t-sqrmod_bnm1$(EXEEXT) \
+ t-mulmid$(EXEEXT) t-hgcd$(EXEEXT) t-hgcd_appr$(EXEEXT) \
t-matrix22$(EXEEXT) t-invert$(EXEEXT) t-div$(EXEEXT) \
- t-bdiv$(EXEEXT)
+ t-bdiv$(EXEEXT) t-broot$(EXEEXT) t-brootinv$(EXEEXT)
subdir = tests/mpn
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/configure.in
+ $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
logic_SOURCES = logic.c
-logic_OBJECTS = logic$U.$(OBJEXT)
+logic_OBJECTS = logic.$(OBJEXT)
logic_LDADD = $(LDADD)
logic_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_aors_1_SOURCES = t-aors_1.c
-t_aors_1_OBJECTS = t-aors_1$U.$(OBJEXT)
+t_aors_1_OBJECTS = t-aors_1.$(OBJEXT)
t_aors_1_LDADD = $(LDADD)
t_aors_1_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_asmtype_SOURCES = t-asmtype.c
-t_asmtype_OBJECTS = t-asmtype$U.$(OBJEXT)
+t_asmtype_OBJECTS = t-asmtype.$(OBJEXT)
t_asmtype_LDADD = $(LDADD)
t_asmtype_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_bdiv_SOURCES = t-bdiv.c
-t_bdiv_OBJECTS = t-bdiv$U.$(OBJEXT)
+t_bdiv_OBJECTS = t-bdiv.$(OBJEXT)
t_bdiv_LDADD = $(LDADD)
t_bdiv_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
+t_broot_SOURCES = t-broot.c
+t_broot_OBJECTS = t-broot.$(OBJEXT)
+t_broot_LDADD = $(LDADD)
+t_broot_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+ $(top_builddir)/libgmp.la
+t_brootinv_SOURCES = t-brootinv.c
+t_brootinv_OBJECTS = t-brootinv.$(OBJEXT)
+t_brootinv_LDADD = $(LDADD)
+t_brootinv_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+ $(top_builddir)/libgmp.la
t_div_SOURCES = t-div.c
-t_div_OBJECTS = t-div$U.$(OBJEXT)
+t_div_OBJECTS = t-div.$(OBJEXT)
t_div_LDADD = $(LDADD)
t_div_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_divrem_1_SOURCES = t-divrem_1.c
-t_divrem_1_OBJECTS = t-divrem_1$U.$(OBJEXT)
+t_divrem_1_OBJECTS = t-divrem_1.$(OBJEXT)
t_divrem_1_LDADD = $(LDADD)
t_divrem_1_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_fat_SOURCES = t-fat.c
-t_fat_OBJECTS = t-fat$U.$(OBJEXT)
+t_fat_OBJECTS = t-fat.$(OBJEXT)
t_fat_LDADD = $(LDADD)
t_fat_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_get_d_SOURCES = t-get_d.c
-t_get_d_OBJECTS = t-get_d$U.$(OBJEXT)
+t_get_d_OBJECTS = t-get_d.$(OBJEXT)
t_get_d_LDADD = $(LDADD)
t_get_d_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_hgcd_SOURCES = t-hgcd.c
-t_hgcd_OBJECTS = t-hgcd$U.$(OBJEXT)
+t_hgcd_OBJECTS = t-hgcd.$(OBJEXT)
t_hgcd_LDADD = $(LDADD)
t_hgcd_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
+t_hgcd_appr_SOURCES = t-hgcd_appr.c
+t_hgcd_appr_OBJECTS = t-hgcd_appr.$(OBJEXT)
+t_hgcd_appr_LDADD = $(LDADD)
+t_hgcd_appr_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+ $(top_builddir)/libgmp.la
t_instrument_SOURCES = t-instrument.c
-t_instrument_OBJECTS = t-instrument$U.$(OBJEXT)
+t_instrument_OBJECTS = t-instrument.$(OBJEXT)
t_instrument_LDADD = $(LDADD)
t_instrument_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_invert_SOURCES = t-invert.c
-t_invert_OBJECTS = t-invert$U.$(OBJEXT)
+t_invert_OBJECTS = t-invert.$(OBJEXT)
t_invert_LDADD = $(LDADD)
t_invert_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_iord_u_SOURCES = t-iord_u.c
-t_iord_u_OBJECTS = t-iord_u$U.$(OBJEXT)
+t_iord_u_OBJECTS = t-iord_u.$(OBJEXT)
t_iord_u_LDADD = $(LDADD)
t_iord_u_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_matrix22_SOURCES = t-matrix22.c
-t_matrix22_OBJECTS = t-matrix22$U.$(OBJEXT)
+t_matrix22_OBJECTS = t-matrix22.$(OBJEXT)
t_matrix22_LDADD = $(LDADD)
t_matrix22_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_mod_1_SOURCES = t-mod_1.c
-t_mod_1_OBJECTS = t-mod_1$U.$(OBJEXT)
+t_mod_1_OBJECTS = t-mod_1.$(OBJEXT)
t_mod_1_LDADD = $(LDADD)
t_mod_1_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_mp_bases_SOURCES = t-mp_bases.c
-t_mp_bases_OBJECTS = t-mp_bases$U.$(OBJEXT)
+t_mp_bases_OBJECTS = t-mp_bases.$(OBJEXT)
t_mp_bases_LDADD = $(LDADD)
t_mp_bases_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_mul_SOURCES = t-mul.c
-t_mul_OBJECTS = t-mul$U.$(OBJEXT)
+t_mul_OBJECTS = t-mul.$(OBJEXT)
t_mul_LDADD = $(LDADD)
t_mul_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_mullo_SOURCES = t-mullo.c
-t_mullo_OBJECTS = t-mullo$U.$(OBJEXT)
+t_mullo_OBJECTS = t-mullo.$(OBJEXT)
t_mullo_LDADD = $(LDADD)
t_mullo_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
+t_mulmid_SOURCES = t-mulmid.c
+t_mulmid_OBJECTS = t-mulmid.$(OBJEXT)
+t_mulmid_LDADD = $(LDADD)
+t_mulmid_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+ $(top_builddir)/libgmp.la
t_mulmod_bnm1_SOURCES = t-mulmod_bnm1.c
-t_mulmod_bnm1_OBJECTS = t-mulmod_bnm1$U.$(OBJEXT)
+t_mulmod_bnm1_OBJECTS = t-mulmod_bnm1.$(OBJEXT)
t_mulmod_bnm1_LDADD = $(LDADD)
t_mulmod_bnm1_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_perfsqr_SOURCES = t-perfsqr.c
-t_perfsqr_OBJECTS = t-perfsqr$U.$(OBJEXT)
+t_perfsqr_OBJECTS = t-perfsqr.$(OBJEXT)
t_perfsqr_LDADD = $(LDADD)
t_perfsqr_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_scan_SOURCES = t-scan.c
-t_scan_OBJECTS = t-scan$U.$(OBJEXT)
+t_scan_OBJECTS = t-scan.$(OBJEXT)
t_scan_LDADD = $(LDADD)
t_scan_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_sqrmod_bnm1_SOURCES = t-sqrmod_bnm1.c
-t_sqrmod_bnm1_OBJECTS = t-sqrmod_bnm1$U.$(OBJEXT)
+t_sqrmod_bnm1_OBJECTS = t-sqrmod_bnm1.$(OBJEXT)
t_sqrmod_bnm1_LDADD = $(LDADD)
t_sqrmod_bnm1_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
+t_toom2_sqr_SOURCES = t-toom2-sqr.c
+t_toom2_sqr_OBJECTS = t-toom2-sqr.$(OBJEXT)
+t_toom2_sqr_LDADD = $(LDADD)
+t_toom2_sqr_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+ $(top_builddir)/libgmp.la
t_toom22_SOURCES = t-toom22.c
-t_toom22_OBJECTS = t-toom22$U.$(OBJEXT)
+t_toom22_OBJECTS = t-toom22.$(OBJEXT)
t_toom22_LDADD = $(LDADD)
t_toom22_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
+t_toom3_sqr_SOURCES = t-toom3-sqr.c
+t_toom3_sqr_OBJECTS = t-toom3-sqr.$(OBJEXT)
+t_toom3_sqr_LDADD = $(LDADD)
+t_toom3_sqr_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+ $(top_builddir)/libgmp.la
t_toom32_SOURCES = t-toom32.c
-t_toom32_OBJECTS = t-toom32$U.$(OBJEXT)
+t_toom32_OBJECTS = t-toom32.$(OBJEXT)
t_toom32_LDADD = $(LDADD)
t_toom32_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_toom33_SOURCES = t-toom33.c
-t_toom33_OBJECTS = t-toom33$U.$(OBJEXT)
+t_toom33_OBJECTS = t-toom33.$(OBJEXT)
t_toom33_LDADD = $(LDADD)
t_toom33_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
+t_toom4_sqr_SOURCES = t-toom4-sqr.c
+t_toom4_sqr_OBJECTS = t-toom4-sqr.$(OBJEXT)
+t_toom4_sqr_LDADD = $(LDADD)
+t_toom4_sqr_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+ $(top_builddir)/libgmp.la
t_toom42_SOURCES = t-toom42.c
-t_toom42_OBJECTS = t-toom42$U.$(OBJEXT)
+t_toom42_OBJECTS = t-toom42.$(OBJEXT)
t_toom42_LDADD = $(LDADD)
t_toom42_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_toom43_SOURCES = t-toom43.c
-t_toom43_OBJECTS = t-toom43$U.$(OBJEXT)
+t_toom43_OBJECTS = t-toom43.$(OBJEXT)
t_toom43_LDADD = $(LDADD)
t_toom43_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_toom44_SOURCES = t-toom44.c
-t_toom44_OBJECTS = t-toom44$U.$(OBJEXT)
+t_toom44_OBJECTS = t-toom44.$(OBJEXT)
t_toom44_LDADD = $(LDADD)
t_toom44_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_toom52_SOURCES = t-toom52.c
-t_toom52_OBJECTS = t-toom52$U.$(OBJEXT)
+t_toom52_OBJECTS = t-toom52.$(OBJEXT)
t_toom52_LDADD = $(LDADD)
t_toom52_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_toom53_SOURCES = t-toom53.c
-t_toom53_OBJECTS = t-toom53$U.$(OBJEXT)
+t_toom53_OBJECTS = t-toom53.$(OBJEXT)
t_toom53_LDADD = $(LDADD)
t_toom53_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
+t_toom54_SOURCES = t-toom54.c
+t_toom54_OBJECTS = t-toom54.$(OBJEXT)
+t_toom54_LDADD = $(LDADD)
+t_toom54_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+ $(top_builddir)/libgmp.la
+t_toom6_sqr_SOURCES = t-toom6-sqr.c
+t_toom6_sqr_OBJECTS = t-toom6-sqr.$(OBJEXT)
+t_toom6_sqr_LDADD = $(LDADD)
+t_toom6_sqr_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+ $(top_builddir)/libgmp.la
t_toom62_SOURCES = t-toom62.c
-t_toom62_OBJECTS = t-toom62$U.$(OBJEXT)
+t_toom62_OBJECTS = t-toom62.$(OBJEXT)
t_toom62_LDADD = $(LDADD)
t_toom62_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_toom63_SOURCES = t-toom63.c
-t_toom63_OBJECTS = t-toom63$U.$(OBJEXT)
+t_toom63_OBJECTS = t-toom63.$(OBJEXT)
t_toom63_LDADD = $(LDADD)
t_toom63_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_toom6h_SOURCES = t-toom6h.c
-t_toom6h_OBJECTS = t-toom6h$U.$(OBJEXT)
+t_toom6h_OBJECTS = t-toom6h.$(OBJEXT)
t_toom6h_LDADD = $(LDADD)
t_toom6h_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
+t_toom8_sqr_SOURCES = t-toom8-sqr.c
+t_toom8_sqr_OBJECTS = t-toom8-sqr.$(OBJEXT)
+t_toom8_sqr_LDADD = $(LDADD)
+t_toom8_sqr_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+ $(top_builddir)/libgmp.la
t_toom8h_SOURCES = t-toom8h.c
-t_toom8h_OBJECTS = t-toom8h$U.$(OBJEXT)
+t_toom8h_OBJECTS = t-toom8h.$(OBJEXT)
t_toom8h_LDADD = $(LDADD)
t_toom8h_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
$(LDFLAGS) -o $@
-SOURCES = logic.c t-aors_1.c t-asmtype.c t-bdiv.c t-div.c t-divrem_1.c \
- t-fat.c t-get_d.c t-hgcd.c t-instrument.c t-invert.c \
- t-iord_u.c t-matrix22.c t-mod_1.c t-mp_bases.c t-mul.c \
- t-mullo.c t-mulmod_bnm1.c t-perfsqr.c t-scan.c t-sqrmod_bnm1.c \
- t-toom22.c t-toom32.c t-toom33.c t-toom42.c t-toom43.c \
- t-toom44.c t-toom52.c t-toom53.c t-toom62.c t-toom63.c \
- t-toom6h.c t-toom8h.c
-DIST_SOURCES = logic.c t-aors_1.c t-asmtype.c t-bdiv.c t-div.c \
- t-divrem_1.c t-fat.c t-get_d.c t-hgcd.c t-instrument.c \
- t-invert.c t-iord_u.c t-matrix22.c t-mod_1.c t-mp_bases.c \
- t-mul.c t-mullo.c t-mulmod_bnm1.c t-perfsqr.c t-scan.c \
- t-sqrmod_bnm1.c t-toom22.c t-toom32.c t-toom33.c t-toom42.c \
- t-toom43.c t-toom44.c t-toom52.c t-toom53.c t-toom62.c \
- t-toom63.c t-toom6h.c t-toom8h.c
+SOURCES = logic.c t-aors_1.c t-asmtype.c t-bdiv.c t-broot.c \
+ t-brootinv.c t-div.c t-divrem_1.c t-fat.c t-get_d.c t-hgcd.c \
+ t-hgcd_appr.c t-instrument.c t-invert.c t-iord_u.c \
+ t-matrix22.c t-mod_1.c t-mp_bases.c t-mul.c t-mullo.c \
+ t-mulmid.c t-mulmod_bnm1.c t-perfsqr.c t-scan.c \
+ t-sqrmod_bnm1.c t-toom2-sqr.c t-toom22.c t-toom3-sqr.c \
+ t-toom32.c t-toom33.c t-toom4-sqr.c t-toom42.c t-toom43.c \
+ t-toom44.c t-toom52.c t-toom53.c t-toom54.c t-toom6-sqr.c \
+ t-toom62.c t-toom63.c t-toom6h.c t-toom8-sqr.c t-toom8h.c
+DIST_SOURCES = logic.c t-aors_1.c t-asmtype.c t-bdiv.c t-broot.c \
+ t-brootinv.c t-div.c t-divrem_1.c t-fat.c t-get_d.c t-hgcd.c \
+ t-hgcd_appr.c t-instrument.c t-invert.c t-iord_u.c \
+ t-matrix22.c t-mod_1.c t-mp_bases.c t-mul.c t-mullo.c \
+ t-mulmid.c t-mulmod_bnm1.c t-perfsqr.c t-scan.c \
+ t-sqrmod_bnm1.c t-toom2-sqr.c t-toom22.c t-toom3-sqr.c \
+ t-toom32.c t-toom33.c t-toom4-sqr.c t-toom42.c t-toom43.c \
+ t-toom44.c t-toom52.c t-toom53.c t-toom54.c t-toom6-sqr.c \
+ t-toom62.c t-toom63.c t-toom6h.c t-toom8-sqr.c t-toom8h.c
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
ETAGS = etags
CTAGS = ctags
am__tty_colors = \
SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
STRIP = @STRIP@
TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
U_FOR_BUILD = @U_FOR_BUILD@
VERSION = @VERSION@
WITH_READLINE_01 = @WITH_READLINE_01@
mkdir_p = @mkdir_p@
mpn_objects = @mpn_objects@
mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
top_srcdir = @top_srcdir@
INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
LDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
-EXTRA_DIST = toom-shared.h
+EXTRA_DIST = toom-shared.h toom-sqr-shared.h
TESTS = $(check_PROGRAMS)
all: all-am
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-logic$(EXEEXT): $(logic_OBJECTS) $(logic_DEPENDENCIES)
+logic$(EXEEXT): $(logic_OBJECTS) $(logic_DEPENDENCIES) $(EXTRA_logic_DEPENDENCIES)
@rm -f logic$(EXEEXT)
$(LINK) $(logic_OBJECTS) $(logic_LDADD) $(LIBS)
-t-aors_1$(EXEEXT): $(t_aors_1_OBJECTS) $(t_aors_1_DEPENDENCIES)
+t-aors_1$(EXEEXT): $(t_aors_1_OBJECTS) $(t_aors_1_DEPENDENCIES) $(EXTRA_t_aors_1_DEPENDENCIES)
@rm -f t-aors_1$(EXEEXT)
$(LINK) $(t_aors_1_OBJECTS) $(t_aors_1_LDADD) $(LIBS)
-t-asmtype$(EXEEXT): $(t_asmtype_OBJECTS) $(t_asmtype_DEPENDENCIES)
+t-asmtype$(EXEEXT): $(t_asmtype_OBJECTS) $(t_asmtype_DEPENDENCIES) $(EXTRA_t_asmtype_DEPENDENCIES)
@rm -f t-asmtype$(EXEEXT)
$(LINK) $(t_asmtype_OBJECTS) $(t_asmtype_LDADD) $(LIBS)
-t-bdiv$(EXEEXT): $(t_bdiv_OBJECTS) $(t_bdiv_DEPENDENCIES)
+t-bdiv$(EXEEXT): $(t_bdiv_OBJECTS) $(t_bdiv_DEPENDENCIES) $(EXTRA_t_bdiv_DEPENDENCIES)
@rm -f t-bdiv$(EXEEXT)
$(LINK) $(t_bdiv_OBJECTS) $(t_bdiv_LDADD) $(LIBS)
-t-div$(EXEEXT): $(t_div_OBJECTS) $(t_div_DEPENDENCIES)
+t-broot$(EXEEXT): $(t_broot_OBJECTS) $(t_broot_DEPENDENCIES) $(EXTRA_t_broot_DEPENDENCIES)
+ @rm -f t-broot$(EXEEXT)
+ $(LINK) $(t_broot_OBJECTS) $(t_broot_LDADD) $(LIBS)
+t-brootinv$(EXEEXT): $(t_brootinv_OBJECTS) $(t_brootinv_DEPENDENCIES) $(EXTRA_t_brootinv_DEPENDENCIES)
+ @rm -f t-brootinv$(EXEEXT)
+ $(LINK) $(t_brootinv_OBJECTS) $(t_brootinv_LDADD) $(LIBS)
+t-div$(EXEEXT): $(t_div_OBJECTS) $(t_div_DEPENDENCIES) $(EXTRA_t_div_DEPENDENCIES)
@rm -f t-div$(EXEEXT)
$(LINK) $(t_div_OBJECTS) $(t_div_LDADD) $(LIBS)
-t-divrem_1$(EXEEXT): $(t_divrem_1_OBJECTS) $(t_divrem_1_DEPENDENCIES)
+t-divrem_1$(EXEEXT): $(t_divrem_1_OBJECTS) $(t_divrem_1_DEPENDENCIES) $(EXTRA_t_divrem_1_DEPENDENCIES)
@rm -f t-divrem_1$(EXEEXT)
$(LINK) $(t_divrem_1_OBJECTS) $(t_divrem_1_LDADD) $(LIBS)
-t-fat$(EXEEXT): $(t_fat_OBJECTS) $(t_fat_DEPENDENCIES)
+t-fat$(EXEEXT): $(t_fat_OBJECTS) $(t_fat_DEPENDENCIES) $(EXTRA_t_fat_DEPENDENCIES)
@rm -f t-fat$(EXEEXT)
$(LINK) $(t_fat_OBJECTS) $(t_fat_LDADD) $(LIBS)
-t-get_d$(EXEEXT): $(t_get_d_OBJECTS) $(t_get_d_DEPENDENCIES)
+t-get_d$(EXEEXT): $(t_get_d_OBJECTS) $(t_get_d_DEPENDENCIES) $(EXTRA_t_get_d_DEPENDENCIES)
@rm -f t-get_d$(EXEEXT)
$(LINK) $(t_get_d_OBJECTS) $(t_get_d_LDADD) $(LIBS)
-t-hgcd$(EXEEXT): $(t_hgcd_OBJECTS) $(t_hgcd_DEPENDENCIES)
+t-hgcd$(EXEEXT): $(t_hgcd_OBJECTS) $(t_hgcd_DEPENDENCIES) $(EXTRA_t_hgcd_DEPENDENCIES)
@rm -f t-hgcd$(EXEEXT)
$(LINK) $(t_hgcd_OBJECTS) $(t_hgcd_LDADD) $(LIBS)
-t-instrument$(EXEEXT): $(t_instrument_OBJECTS) $(t_instrument_DEPENDENCIES)
+t-hgcd_appr$(EXEEXT): $(t_hgcd_appr_OBJECTS) $(t_hgcd_appr_DEPENDENCIES) $(EXTRA_t_hgcd_appr_DEPENDENCIES)
+ @rm -f t-hgcd_appr$(EXEEXT)
+ $(LINK) $(t_hgcd_appr_OBJECTS) $(t_hgcd_appr_LDADD) $(LIBS)
+t-instrument$(EXEEXT): $(t_instrument_OBJECTS) $(t_instrument_DEPENDENCIES) $(EXTRA_t_instrument_DEPENDENCIES)
@rm -f t-instrument$(EXEEXT)
$(LINK) $(t_instrument_OBJECTS) $(t_instrument_LDADD) $(LIBS)
-t-invert$(EXEEXT): $(t_invert_OBJECTS) $(t_invert_DEPENDENCIES)
+t-invert$(EXEEXT): $(t_invert_OBJECTS) $(t_invert_DEPENDENCIES) $(EXTRA_t_invert_DEPENDENCIES)
@rm -f t-invert$(EXEEXT)
$(LINK) $(t_invert_OBJECTS) $(t_invert_LDADD) $(LIBS)
-t-iord_u$(EXEEXT): $(t_iord_u_OBJECTS) $(t_iord_u_DEPENDENCIES)
+t-iord_u$(EXEEXT): $(t_iord_u_OBJECTS) $(t_iord_u_DEPENDENCIES) $(EXTRA_t_iord_u_DEPENDENCIES)
@rm -f t-iord_u$(EXEEXT)
$(LINK) $(t_iord_u_OBJECTS) $(t_iord_u_LDADD) $(LIBS)
-t-matrix22$(EXEEXT): $(t_matrix22_OBJECTS) $(t_matrix22_DEPENDENCIES)
+t-matrix22$(EXEEXT): $(t_matrix22_OBJECTS) $(t_matrix22_DEPENDENCIES) $(EXTRA_t_matrix22_DEPENDENCIES)
@rm -f t-matrix22$(EXEEXT)
$(LINK) $(t_matrix22_OBJECTS) $(t_matrix22_LDADD) $(LIBS)
-t-mod_1$(EXEEXT): $(t_mod_1_OBJECTS) $(t_mod_1_DEPENDENCIES)
+t-mod_1$(EXEEXT): $(t_mod_1_OBJECTS) $(t_mod_1_DEPENDENCIES) $(EXTRA_t_mod_1_DEPENDENCIES)
@rm -f t-mod_1$(EXEEXT)
$(LINK) $(t_mod_1_OBJECTS) $(t_mod_1_LDADD) $(LIBS)
-t-mp_bases$(EXEEXT): $(t_mp_bases_OBJECTS) $(t_mp_bases_DEPENDENCIES)
+t-mp_bases$(EXEEXT): $(t_mp_bases_OBJECTS) $(t_mp_bases_DEPENDENCIES) $(EXTRA_t_mp_bases_DEPENDENCIES)
@rm -f t-mp_bases$(EXEEXT)
$(LINK) $(t_mp_bases_OBJECTS) $(t_mp_bases_LDADD) $(LIBS)
-t-mul$(EXEEXT): $(t_mul_OBJECTS) $(t_mul_DEPENDENCIES)
+t-mul$(EXEEXT): $(t_mul_OBJECTS) $(t_mul_DEPENDENCIES) $(EXTRA_t_mul_DEPENDENCIES)
@rm -f t-mul$(EXEEXT)
$(LINK) $(t_mul_OBJECTS) $(t_mul_LDADD) $(LIBS)
-t-mullo$(EXEEXT): $(t_mullo_OBJECTS) $(t_mullo_DEPENDENCIES)
+t-mullo$(EXEEXT): $(t_mullo_OBJECTS) $(t_mullo_DEPENDENCIES) $(EXTRA_t_mullo_DEPENDENCIES)
@rm -f t-mullo$(EXEEXT)
$(LINK) $(t_mullo_OBJECTS) $(t_mullo_LDADD) $(LIBS)
-t-mulmod_bnm1$(EXEEXT): $(t_mulmod_bnm1_OBJECTS) $(t_mulmod_bnm1_DEPENDENCIES)
+t-mulmid$(EXEEXT): $(t_mulmid_OBJECTS) $(t_mulmid_DEPENDENCIES) $(EXTRA_t_mulmid_DEPENDENCIES)
+ @rm -f t-mulmid$(EXEEXT)
+ $(LINK) $(t_mulmid_OBJECTS) $(t_mulmid_LDADD) $(LIBS)
+t-mulmod_bnm1$(EXEEXT): $(t_mulmod_bnm1_OBJECTS) $(t_mulmod_bnm1_DEPENDENCIES) $(EXTRA_t_mulmod_bnm1_DEPENDENCIES)
@rm -f t-mulmod_bnm1$(EXEEXT)
$(LINK) $(t_mulmod_bnm1_OBJECTS) $(t_mulmod_bnm1_LDADD) $(LIBS)
-t-perfsqr$(EXEEXT): $(t_perfsqr_OBJECTS) $(t_perfsqr_DEPENDENCIES)
+t-perfsqr$(EXEEXT): $(t_perfsqr_OBJECTS) $(t_perfsqr_DEPENDENCIES) $(EXTRA_t_perfsqr_DEPENDENCIES)
@rm -f t-perfsqr$(EXEEXT)
$(LINK) $(t_perfsqr_OBJECTS) $(t_perfsqr_LDADD) $(LIBS)
-t-scan$(EXEEXT): $(t_scan_OBJECTS) $(t_scan_DEPENDENCIES)
+t-scan$(EXEEXT): $(t_scan_OBJECTS) $(t_scan_DEPENDENCIES) $(EXTRA_t_scan_DEPENDENCIES)
@rm -f t-scan$(EXEEXT)
$(LINK) $(t_scan_OBJECTS) $(t_scan_LDADD) $(LIBS)
-t-sqrmod_bnm1$(EXEEXT): $(t_sqrmod_bnm1_OBJECTS) $(t_sqrmod_bnm1_DEPENDENCIES)
+t-sqrmod_bnm1$(EXEEXT): $(t_sqrmod_bnm1_OBJECTS) $(t_sqrmod_bnm1_DEPENDENCIES) $(EXTRA_t_sqrmod_bnm1_DEPENDENCIES)
@rm -f t-sqrmod_bnm1$(EXEEXT)
$(LINK) $(t_sqrmod_bnm1_OBJECTS) $(t_sqrmod_bnm1_LDADD) $(LIBS)
-t-toom22$(EXEEXT): $(t_toom22_OBJECTS) $(t_toom22_DEPENDENCIES)
+t-toom2-sqr$(EXEEXT): $(t_toom2_sqr_OBJECTS) $(t_toom2_sqr_DEPENDENCIES) $(EXTRA_t_toom2_sqr_DEPENDENCIES)
+ @rm -f t-toom2-sqr$(EXEEXT)
+ $(LINK) $(t_toom2_sqr_OBJECTS) $(t_toom2_sqr_LDADD) $(LIBS)
+t-toom22$(EXEEXT): $(t_toom22_OBJECTS) $(t_toom22_DEPENDENCIES) $(EXTRA_t_toom22_DEPENDENCIES)
@rm -f t-toom22$(EXEEXT)
$(LINK) $(t_toom22_OBJECTS) $(t_toom22_LDADD) $(LIBS)
-t-toom32$(EXEEXT): $(t_toom32_OBJECTS) $(t_toom32_DEPENDENCIES)
+t-toom3-sqr$(EXEEXT): $(t_toom3_sqr_OBJECTS) $(t_toom3_sqr_DEPENDENCIES) $(EXTRA_t_toom3_sqr_DEPENDENCIES)
+ @rm -f t-toom3-sqr$(EXEEXT)
+ $(LINK) $(t_toom3_sqr_OBJECTS) $(t_toom3_sqr_LDADD) $(LIBS)
+t-toom32$(EXEEXT): $(t_toom32_OBJECTS) $(t_toom32_DEPENDENCIES) $(EXTRA_t_toom32_DEPENDENCIES)
@rm -f t-toom32$(EXEEXT)
$(LINK) $(t_toom32_OBJECTS) $(t_toom32_LDADD) $(LIBS)
-t-toom33$(EXEEXT): $(t_toom33_OBJECTS) $(t_toom33_DEPENDENCIES)
+t-toom33$(EXEEXT): $(t_toom33_OBJECTS) $(t_toom33_DEPENDENCIES) $(EXTRA_t_toom33_DEPENDENCIES)
@rm -f t-toom33$(EXEEXT)
$(LINK) $(t_toom33_OBJECTS) $(t_toom33_LDADD) $(LIBS)
-t-toom42$(EXEEXT): $(t_toom42_OBJECTS) $(t_toom42_DEPENDENCIES)
+t-toom4-sqr$(EXEEXT): $(t_toom4_sqr_OBJECTS) $(t_toom4_sqr_DEPENDENCIES) $(EXTRA_t_toom4_sqr_DEPENDENCIES)
+ @rm -f t-toom4-sqr$(EXEEXT)
+ $(LINK) $(t_toom4_sqr_OBJECTS) $(t_toom4_sqr_LDADD) $(LIBS)
+t-toom42$(EXEEXT): $(t_toom42_OBJECTS) $(t_toom42_DEPENDENCIES) $(EXTRA_t_toom42_DEPENDENCIES)
@rm -f t-toom42$(EXEEXT)
$(LINK) $(t_toom42_OBJECTS) $(t_toom42_LDADD) $(LIBS)
-t-toom43$(EXEEXT): $(t_toom43_OBJECTS) $(t_toom43_DEPENDENCIES)
+t-toom43$(EXEEXT): $(t_toom43_OBJECTS) $(t_toom43_DEPENDENCIES) $(EXTRA_t_toom43_DEPENDENCIES)
@rm -f t-toom43$(EXEEXT)
$(LINK) $(t_toom43_OBJECTS) $(t_toom43_LDADD) $(LIBS)
-t-toom44$(EXEEXT): $(t_toom44_OBJECTS) $(t_toom44_DEPENDENCIES)
+t-toom44$(EXEEXT): $(t_toom44_OBJECTS) $(t_toom44_DEPENDENCIES) $(EXTRA_t_toom44_DEPENDENCIES)
@rm -f t-toom44$(EXEEXT)
$(LINK) $(t_toom44_OBJECTS) $(t_toom44_LDADD) $(LIBS)
-t-toom52$(EXEEXT): $(t_toom52_OBJECTS) $(t_toom52_DEPENDENCIES)
+t-toom52$(EXEEXT): $(t_toom52_OBJECTS) $(t_toom52_DEPENDENCIES) $(EXTRA_t_toom52_DEPENDENCIES)
@rm -f t-toom52$(EXEEXT)
$(LINK) $(t_toom52_OBJECTS) $(t_toom52_LDADD) $(LIBS)
-t-toom53$(EXEEXT): $(t_toom53_OBJECTS) $(t_toom53_DEPENDENCIES)
+t-toom53$(EXEEXT): $(t_toom53_OBJECTS) $(t_toom53_DEPENDENCIES) $(EXTRA_t_toom53_DEPENDENCIES)
@rm -f t-toom53$(EXEEXT)
$(LINK) $(t_toom53_OBJECTS) $(t_toom53_LDADD) $(LIBS)
-t-toom62$(EXEEXT): $(t_toom62_OBJECTS) $(t_toom62_DEPENDENCIES)
+t-toom54$(EXEEXT): $(t_toom54_OBJECTS) $(t_toom54_DEPENDENCIES) $(EXTRA_t_toom54_DEPENDENCIES)
+ @rm -f t-toom54$(EXEEXT)
+ $(LINK) $(t_toom54_OBJECTS) $(t_toom54_LDADD) $(LIBS)
+t-toom6-sqr$(EXEEXT): $(t_toom6_sqr_OBJECTS) $(t_toom6_sqr_DEPENDENCIES) $(EXTRA_t_toom6_sqr_DEPENDENCIES)
+ @rm -f t-toom6-sqr$(EXEEXT)
+ $(LINK) $(t_toom6_sqr_OBJECTS) $(t_toom6_sqr_LDADD) $(LIBS)
+t-toom62$(EXEEXT): $(t_toom62_OBJECTS) $(t_toom62_DEPENDENCIES) $(EXTRA_t_toom62_DEPENDENCIES)
@rm -f t-toom62$(EXEEXT)
$(LINK) $(t_toom62_OBJECTS) $(t_toom62_LDADD) $(LIBS)
-t-toom63$(EXEEXT): $(t_toom63_OBJECTS) $(t_toom63_DEPENDENCIES)
+t-toom63$(EXEEXT): $(t_toom63_OBJECTS) $(t_toom63_DEPENDENCIES) $(EXTRA_t_toom63_DEPENDENCIES)
@rm -f t-toom63$(EXEEXT)
$(LINK) $(t_toom63_OBJECTS) $(t_toom63_LDADD) $(LIBS)
-t-toom6h$(EXEEXT): $(t_toom6h_OBJECTS) $(t_toom6h_DEPENDENCIES)
+t-toom6h$(EXEEXT): $(t_toom6h_OBJECTS) $(t_toom6h_DEPENDENCIES) $(EXTRA_t_toom6h_DEPENDENCIES)
@rm -f t-toom6h$(EXEEXT)
$(LINK) $(t_toom6h_OBJECTS) $(t_toom6h_LDADD) $(LIBS)
-t-toom8h$(EXEEXT): $(t_toom8h_OBJECTS) $(t_toom8h_DEPENDENCIES)
+t-toom8-sqr$(EXEEXT): $(t_toom8_sqr_OBJECTS) $(t_toom8_sqr_DEPENDENCIES) $(EXTRA_t_toom8_sqr_DEPENDENCIES)
+ @rm -f t-toom8-sqr$(EXEEXT)
+ $(LINK) $(t_toom8_sqr_OBJECTS) $(t_toom8_sqr_LDADD) $(LIBS)
+t-toom8h$(EXEEXT): $(t_toom8h_OBJECTS) $(t_toom8h_DEPENDENCIES) $(EXTRA_t_toom8h_DEPENDENCIES)
@rm -f t-toom8h$(EXEEXT)
$(LINK) $(t_toom8h_OBJECTS) $(t_toom8h_LDADD) $(LIBS)
distclean-compile:
-rm -f *.tab.c
-$(top_builddir)/ansi2knr:
- $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
- -test "$U" = "" || rm -f *_.c
.c.o:
$(COMPILE) -c $<
.c.lo:
$(LTCOMPILE) -c -o $@ $<
-logic_.c: logic.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/logic.c; then echo $(srcdir)/logic.c; else echo logic.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-aors_1_.c: t-aors_1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-aors_1.c; then echo $(srcdir)/t-aors_1.c; else echo t-aors_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-asmtype_.c: t-asmtype.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-asmtype.c; then echo $(srcdir)/t-asmtype.c; else echo t-asmtype.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-bdiv_.c: t-bdiv.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-bdiv.c; then echo $(srcdir)/t-bdiv.c; else echo t-bdiv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-div_.c: t-div.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-div.c; then echo $(srcdir)/t-div.c; else echo t-div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-divrem_1_.c: t-divrem_1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-divrem_1.c; then echo $(srcdir)/t-divrem_1.c; else echo t-divrem_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-fat_.c: t-fat.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-fat.c; then echo $(srcdir)/t-fat.c; else echo t-fat.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-get_d_.c: t-get_d.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_d.c; then echo $(srcdir)/t-get_d.c; else echo t-get_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-hgcd_.c: t-hgcd.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-hgcd.c; then echo $(srcdir)/t-hgcd.c; else echo t-hgcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-instrument_.c: t-instrument.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-instrument.c; then echo $(srcdir)/t-instrument.c; else echo t-instrument.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-invert_.c: t-invert.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-invert.c; then echo $(srcdir)/t-invert.c; else echo t-invert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-iord_u_.c: t-iord_u.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-iord_u.c; then echo $(srcdir)/t-iord_u.c; else echo t-iord_u.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-matrix22_.c: t-matrix22.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-matrix22.c; then echo $(srcdir)/t-matrix22.c; else echo t-matrix22.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-mod_1_.c: t-mod_1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mod_1.c; then echo $(srcdir)/t-mod_1.c; else echo t-mod_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-mp_bases_.c: t-mp_bases.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mp_bases.c; then echo $(srcdir)/t-mp_bases.c; else echo t-mp_bases.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-mul_.c: t-mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mul.c; then echo $(srcdir)/t-mul.c; else echo t-mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-mullo_.c: t-mullo.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mullo.c; then echo $(srcdir)/t-mullo.c; else echo t-mullo.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-mulmod_bnm1_.c: t-mulmod_bnm1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mulmod_bnm1.c; then echo $(srcdir)/t-mulmod_bnm1.c; else echo t-mulmod_bnm1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-perfsqr_.c: t-perfsqr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-perfsqr.c; then echo $(srcdir)/t-perfsqr.c; else echo t-perfsqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-scan_.c: t-scan.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-scan.c; then echo $(srcdir)/t-scan.c; else echo t-scan.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-sqrmod_bnm1_.c: t-sqrmod_bnm1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-sqrmod_bnm1.c; then echo $(srcdir)/t-sqrmod_bnm1.c; else echo t-sqrmod_bnm1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-toom22_.c: t-toom22.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom22.c; then echo $(srcdir)/t-toom22.c; else echo t-toom22.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-toom32_.c: t-toom32.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom32.c; then echo $(srcdir)/t-toom32.c; else echo t-toom32.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-toom33_.c: t-toom33.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom33.c; then echo $(srcdir)/t-toom33.c; else echo t-toom33.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-toom42_.c: t-toom42.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom42.c; then echo $(srcdir)/t-toom42.c; else echo t-toom42.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-toom43_.c: t-toom43.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom43.c; then echo $(srcdir)/t-toom43.c; else echo t-toom43.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-toom44_.c: t-toom44.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom44.c; then echo $(srcdir)/t-toom44.c; else echo t-toom44.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-toom52_.c: t-toom52.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom52.c; then echo $(srcdir)/t-toom52.c; else echo t-toom52.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-toom53_.c: t-toom53.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom53.c; then echo $(srcdir)/t-toom53.c; else echo t-toom53.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-toom62_.c: t-toom62.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom62.c; then echo $(srcdir)/t-toom62.c; else echo t-toom62.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-toom63_.c: t-toom63.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom63.c; then echo $(srcdir)/t-toom63.c; else echo t-toom63.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-toom6h_.c: t-toom6h.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom6h.c; then echo $(srcdir)/t-toom6h.c; else echo t-toom6h.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-toom8h_.c: t-toom8h.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-toom8h.c; then echo $(srcdir)/t-toom8h.c; else echo t-toom8h.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-logic_.$(OBJEXT) logic_.lo t-aors_1_.$(OBJEXT) t-aors_1_.lo \
-t-asmtype_.$(OBJEXT) t-asmtype_.lo t-bdiv_.$(OBJEXT) t-bdiv_.lo \
-t-div_.$(OBJEXT) t-div_.lo t-divrem_1_.$(OBJEXT) t-divrem_1_.lo \
-t-fat_.$(OBJEXT) t-fat_.lo t-get_d_.$(OBJEXT) t-get_d_.lo \
-t-hgcd_.$(OBJEXT) t-hgcd_.lo t-instrument_.$(OBJEXT) t-instrument_.lo \
-t-invert_.$(OBJEXT) t-invert_.lo t-iord_u_.$(OBJEXT) t-iord_u_.lo \
-t-matrix22_.$(OBJEXT) t-matrix22_.lo t-mod_1_.$(OBJEXT) t-mod_1_.lo \
-t-mp_bases_.$(OBJEXT) t-mp_bases_.lo t-mul_.$(OBJEXT) t-mul_.lo \
-t-mullo_.$(OBJEXT) t-mullo_.lo t-mulmod_bnm1_.$(OBJEXT) \
-t-mulmod_bnm1_.lo t-perfsqr_.$(OBJEXT) t-perfsqr_.lo t-scan_.$(OBJEXT) \
-t-scan_.lo t-sqrmod_bnm1_.$(OBJEXT) t-sqrmod_bnm1_.lo \
-t-toom22_.$(OBJEXT) t-toom22_.lo t-toom32_.$(OBJEXT) t-toom32_.lo \
-t-toom33_.$(OBJEXT) t-toom33_.lo t-toom42_.$(OBJEXT) t-toom42_.lo \
-t-toom43_.$(OBJEXT) t-toom43_.lo t-toom44_.$(OBJEXT) t-toom44_.lo \
-t-toom52_.$(OBJEXT) t-toom52_.lo t-toom53_.$(OBJEXT) t-toom53_.lo \
-t-toom62_.$(OBJEXT) t-toom62_.lo t-toom63_.$(OBJEXT) t-toom63_.lo \
-t-toom6h_.$(OBJEXT) t-toom6h_.lo t-toom8h_.$(OBJEXT) t-toom8h_.lo : \
-$(ANSI2KNR)
mostlyclean-libtool:
-rm -f *.lo
fi; \
dashes=`echo "$$dashes" | sed s/./=/g`; \
if test "$$failed" -eq 0; then \
- echo "$$grn$$dashes"; \
+ col="$$grn"; \
else \
- echo "$$red$$dashes"; \
+ col="$$red"; \
fi; \
- echo "$$banner"; \
- test -z "$$skipped" || echo "$$skipped"; \
- test -z "$$report" || echo "$$report"; \
- echo "$$dashes$$std"; \
+ echo "$${col}$$dashes$${std}"; \
+ echo "$${col}$$banner$${std}"; \
+ test -z "$$skipped" || echo "$${col}$$skipped$${std}"; \
+ test -z "$$report" || echo "$${col}$$report$${std}"; \
+ echo "$${col}$$dashes$${std}"; \
test "$$failed" -eq 0; \
else :; fi
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
mostlyclean: mostlyclean-am
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
uninstall-am:
-.MAKE: $(top_builddir)/ansi2knr check-am install-am install-strip
+.MAKE: check-am install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \
clean-checkPROGRAMS clean-generic clean-libtool ctags \
install-ps install-ps-am install-strip installcheck \
installcheck-am installdirs maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
- mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
- pdf-am ps ps-am tags uninstall uninstall-am
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags uninstall uninstall-am
$(top_builddir)/tests/libtests.la:
/* Test mpn_and, mpn_ior, mpn_xor, mpn_andn, mpn_iorn, mpn_xnor, mpn_nand, and
mpn_nior.
-Copyright 2011, 2012 Free Software Foundation, Inc.
+Copyright 2011, 2012, 2013 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdlib.h>
#include <stdio.h>
+/* Fake native prevalence of the tested operations, so that we actually test
+ the compiled functions, i.e., the ones which users will reach. The inlined
+ variants will be tested through tests/mpz/logic.c. */
+#define HAVE_NATIVE_mpn_com 1
+#define HAVE_NATIVE_mpn_and_n 1
+#define HAVE_NATIVE_mpn_andn_n 1
+#define HAVE_NATIVE_mpn_nand_n 1
+#define HAVE_NATIVE_mpn_ior_n 1
+#define HAVE_NATIVE_mpn_iorn_n 1
+#define HAVE_NATIVE_mpn_nior_n 1
+#define HAVE_NATIVE_mpn_xor_n 1
+#define HAVE_NATIVE_mpn_xnor_n 1
+
#include "gmp.h"
#include "gmp-impl.h"
#include "tests.h"
+
void
check_one (mp_srcptr refp, mp_srcptr rp, mp_srcptr ap, mp_srcptr bp, mp_size_t n, char *funcname)
{
if (mpn_cmp (refp, rp, n))
{
- printf ("ERROR in mpn_%s_n\n", funcname);
+ printf ("ERROR in mpn_%s\n", funcname);
printf ("a: "); mpn_dump (ap, n);
printf ("b: "); mpn_dump (bp, n);
printf ("r: "); mpn_dump (rp, n);
refmpn_and_n (refp, ap, bp, n);
mpn_and_n (rp, ap, bp, n);
- check_one (refp, rp, ap, bp, n, "and");
+ check_one (refp, rp, ap, bp, n, "and_n");
refmpn_ior_n (refp, ap, bp, n);
mpn_ior_n (rp, ap, bp, n);
- check_one (refp, rp, ap, bp, n, "ior");
+ check_one (refp, rp, ap, bp, n, "ior_n");
refmpn_xor_n (refp, ap, bp, n);
mpn_xor_n (rp, ap, bp, n);
- check_one (refp, rp, ap, bp, n, "xor");
+ check_one (refp, rp, ap, bp, n, "xor_n");
refmpn_andn_n (refp, ap, bp, n);
mpn_andn_n (rp, ap, bp, n);
- check_one (refp, rp, ap, bp, n, "andn");
+ check_one (refp, rp, ap, bp, n, "andn_n");
refmpn_iorn_n (refp, ap, bp, n);
mpn_iorn_n (rp, ap, bp, n);
- check_one (refp, rp, ap, bp, n, "iorn");
+ check_one (refp, rp, ap, bp, n, "iorn_n");
refmpn_nand_n (refp, ap, bp, n);
mpn_nand_n (rp, ap, bp, n);
- check_one (refp, rp, ap, bp, n, "nand");
+ check_one (refp, rp, ap, bp, n, "nand_n");
refmpn_nior_n (refp, ap, bp, n);
mpn_nior_n (rp, ap, bp, n);
- check_one (refp, rp, ap, bp, n, "nior");
+ check_one (refp, rp, ap, bp, n, "nior_n");
refmpn_xnor_n (refp, ap, bp, n);
mpn_xnor_n (rp, ap, bp, n);
- check_one (refp, rp, ap, bp, n, "xnor");
+ check_one (refp, rp, ap, bp, n, "xnor_n");
+
+ refmpn_com (refp, ap, n);
+ mpn_com (rp, ap, n);
+ check_one (refp, rp, ap, bp, n, "com");
}
}
Copyright 2001, 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
got, data[i].want, data[i].size); \
} while (0)
-typedef mp_limb_t (*mpn_aors_1_t)
- __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_limb_t));
-mpn_aors_1_t fudge __GMP_PROTO ((mpn_aors_1_t));
+typedef mp_limb_t (*mpn_aors_1_t) (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mpn_aors_1_t fudge (mpn_aors_1_t);
void
Copyright 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
/* Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; either version 3 of the License, or (at your option) any later
-version.
+This file is part of the GNU MP Library test suite.
-This program is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-PARTICULAR PURPOSE. See the GNU General Public License for more details.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program. If not, see http://www.gnu.org/licenses/. */
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdlib.h> /* for strtol */
void
check_one (mp_ptr qp, mp_srcptr rp, mp_limb_t rh,
- mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, char *fname)
+ mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, const char *fname)
{
mp_size_t qn;
int cmp;
check_one (qp, NULL, 0, np, nn, dp, dn, "mpn_dcpi1_bdiv_q");
}
+ if (nn > dn)
+ {
+ /* Test mpn_bdiv_qr */
+ itch = mpn_bdiv_qr_itch (nn, dn);
+ if (itch + 1 > alloc)
+ {
+ scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
+ alloc = itch + 1;
+ }
+ scratch[itch] = ran;
+ MPN_ZERO (qp, nn - dn);
+ MPN_ZERO (rp, dn);
+ rp[dn] = rran1;
+ rh = mpn_bdiv_qr (qp, rp, np, nn, dp, dn, scratch);
+ ASSERT_ALWAYS (ran == scratch[itch]);
+ ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
+ ASSERT_ALWAYS (rp[-1] == rran0); ASSERT_ALWAYS (rp[dn] == rran1);
+
+ check_one (qp, rp, rh, np, nn, dp, dn, "mpn_bdiv_qr");
+ }
+
if (nn - dn < 2 || dn < 2)
continue;
--- /dev/null
+/* Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+
+#include <stdlib.h> /* for strtol */
+#include <stdio.h> /* for printf */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests/tests.h"
+
+#define MAX_LIMBS 150
+#define COUNT 500
+
+int
+main (int argc, char **argv)
+{
+ gmp_randstate_ptr rands;
+
+ mp_ptr ap, rp, pp, scratch;
+ int count = COUNT;
+ unsigned i;
+ TMP_DECL;
+
+ TMP_MARK;
+
+ if (argc > 1)
+ {
+ char *end;
+ count = strtol (argv[1], &end, 0);
+ if (*end || count <= 0)
+ {
+ fprintf (stderr, "Invalid test count: %s.\n", argv[1]);
+ return 1;
+ }
+ }
+
+ tests_start ();
+ rands = RANDS;
+
+ ap = TMP_ALLOC_LIMBS (MAX_LIMBS);
+ rp = TMP_ALLOC_LIMBS (MAX_LIMBS);
+ pp = TMP_ALLOC_LIMBS (MAX_LIMBS);
+ scratch = TMP_ALLOC_LIMBS (3*MAX_LIMBS); /* For mpn_powlo */
+
+ for (i = 0; i < count; i++)
+ {
+ mp_size_t n;
+ mp_limb_t k;
+ int c;
+
+ n = 1 + gmp_urandomm_ui (rands, MAX_LIMBS);
+
+ if (i & 1)
+ mpn_random2 (ap, n);
+ else
+ mpn_random (ap, n);
+
+ ap[0] |= 1;
+
+ if (i < 100)
+ k = 3 + 2*i;
+ else
+ {
+ mpn_random (&k, 1);
+ if (k < 3)
+ k = 3;
+ else
+ k |= 1;
+ }
+ mpn_broot (rp, ap, n, k);
+ mpn_powlo (pp, rp, &k, 1, n, scratch);
+
+ MPN_CMP (c, ap, pp, n);
+ if (c != 0)
+ {
+ gmp_fprintf (stderr,
+ "mpn_broot returned bad result: %u limbs\n",
+ (unsigned) n);
+ gmp_fprintf (stderr, "k = %Mx\n", k);
+ gmp_fprintf (stderr, "a = %Nx\n", ap, n);
+ gmp_fprintf (stderr, "r = %Nx\n", rp, n);
+ gmp_fprintf (stderr, "r^n = %Nx\n", pp, n);
+ abort ();
+ }
+ }
+ TMP_FREE;
+ tests_end ();
+ return 0;
+}
--- /dev/null
+/* Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+
+#include <stdlib.h> /* for strtol */
+#include <stdio.h> /* for printf */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests/tests.h"
+
+#define MAX_LIMBS 150
+#define COUNT 500
+
+int
+main (int argc, char **argv)
+{
+ gmp_randstate_ptr rands;
+
+ mp_ptr ap, rp, pp, app, scratch;
+ int count = COUNT;
+ unsigned i;
+ TMP_DECL;
+
+ TMP_MARK;
+
+ if (argc > 1)
+ {
+ char *end;
+ count = strtol (argv[1], &end, 0);
+ if (*end || count <= 0)
+ {
+ fprintf (stderr, "Invalid test count: %s.\n", argv[1]);
+ return 1;
+ }
+ }
+
+ tests_start ();
+ rands = RANDS;
+
+ ap = TMP_ALLOC_LIMBS (MAX_LIMBS);
+ rp = TMP_ALLOC_LIMBS (MAX_LIMBS);
+ pp = TMP_ALLOC_LIMBS (MAX_LIMBS);
+ app = TMP_ALLOC_LIMBS (MAX_LIMBS);
+ scratch = TMP_ALLOC_LIMBS (5*MAX_LIMBS);
+
+ for (i = 0; i < count; i++)
+ {
+ mp_size_t n;
+ mp_limb_t k;
+ int c;
+
+ n = 1 + gmp_urandomm_ui (rands, MAX_LIMBS);
+
+ if (i & 1)
+ mpn_random2 (ap, n);
+ else
+ mpn_random (ap, n);
+
+ ap[0] |= 1;
+
+ if (i < 100)
+ k = 3 + 2*i;
+ else
+ {
+ mpn_random (&k, 1);
+ if (k < 3)
+ k = 3;
+ else
+ k |= 1;
+ }
+ mpn_brootinv (rp, ap, n, k, scratch);
+ mpn_powlo (pp, rp, &k, 1, n, scratch);
+ mpn_mullo_n (app, ap, pp, n);
+
+ if (app[0] != 1 || !mpn_zero_p (app+1, n-1))
+ {
+ gmp_fprintf (stderr,
+ "mpn_brootinv returned bad result: %u limbs\n",
+ (unsigned) n);
+ gmp_fprintf (stderr, "k = %Mx\n", k);
+ gmp_fprintf (stderr, "a = %Nx\n", ap, n);
+ gmp_fprintf (stderr, "r = %Nx\n", rp, n);
+ gmp_fprintf (stderr, "r^n = %Nx\n", pp, n);
+ gmp_fprintf (stderr, "a r^n = %Nx\n", app, n);
+ abort ();
+ }
+ }
+ TMP_FREE;
+ tests_end ();
+ return 0;
+}
-/* Copyright 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+/* Copyright 2006, 2007, 2009, 2010, 2013 Free Software Foundation, Inc.
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; either version 3 of the License, or (at your option) any later
-version.
+This file is part of the GNU MP Library test suite.
-This program is distributed in the hope that it will be useful, but WITHOUT ANY
-WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
-PARTICULAR PURPOSE. See the GNU General Public License for more details.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
You should have received a copy of the GNU General Public License along with
-this program. If not, see http://www.gnu.org/licenses/. */
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdlib.h> /* for strtol */
puts ("");
}
-static unsigned long test;
+static signed long test;
static void
check_one (mp_ptr qp, mp_srcptr rp,
mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn,
- char *fname, mp_limb_t q_allowed_err)
+ const char *fname, mp_limb_t q_allowed_err)
{
mp_size_t qn = nn - dn + 1;
mp_ptr tp;
tvalue = "Q*D";
error:
printf ("\r*******************************************************************************\n");
- printf ("%s failed test %lu: %s\n", fname, test, msg);
+ printf ("%s failed test %ld: %s\n", fname, test, msg);
printf ("N= "); dumpy (np, nn);
printf ("D= "); dumpy (dp, dn);
printf ("Q= "); dumpy (qp, qn);
{
gmp_randstate_ptr rands;
unsigned long maxnbits, maxdbits, nbits, dbits;
- mpz_t n, d, q, r, tz;
+ mpz_t n, d, q, r, tz, junk;
mp_size_t maxnn, maxdn, nn, dn, clearn, i;
- mp_ptr np, dp, qp, rp;
+ mp_ptr np, dup, dnp, qp, rp, junkp;
mp_limb_t t;
gmp_pi1_t dinv;
- int count = COUNT;
+ long count = COUNT;
mp_ptr scratch;
mp_limb_t ran;
mp_size_t alloc, itch;
}
}
-
maxdbits = MAX_DN;
maxnbits = MAX_NN;
mpz_init (q);
mpz_init (r);
mpz_init (tz);
+ mpz_init (junk);
maxnn = maxnbits / GMP_NUMB_BITS + 1;
maxdn = maxdbits / GMP_NUMB_BITS + 1;
qp = TMP_ALLOC_LIMBS (maxnn + 2) + 1;
rp = TMP_ALLOC_LIMBS (maxnn + 2) + 1;
+ dnp = TMP_ALLOC_LIMBS (maxdn);
alloc = 1;
scratch = __GMP_ALLOCATE_FUNC_LIMBS (alloc);
- for (test = 0; test < count;)
+ for (test = -300; test < count; test++)
{
- do
- {
- nbits = random_word (rands) % (maxnbits - GMP_NUMB_BITS) + 2 * GMP_NUMB_BITS;
- if (maxdbits > nbits)
- dbits = random_word (rands) % nbits + 1;
- else
- dbits = random_word (rands) % maxdbits + 1;
- }
- while (nbits < dbits);
+ nbits = random_word (rands) % (maxnbits - GMP_NUMB_BITS) + 2 * GMP_NUMB_BITS;
+
+ if (test < 0)
+ dbits = (test + 300) % (nbits - 1) + 1;
+ else
+ dbits = random_word (rands) % (nbits - 1) % maxdbits + 1;
#if RAND_UNIFORM
#define RANDFUNC mpz_urandomb
RANDFUNC (d, rands, dbits);
while (mpz_sgn (d) == 0);
dn = SIZ (d);
- dp = PTR (d);
- dp[dn - 1] |= GMP_NUMB_HIGHBIT;
+ dup = PTR (d);
+ MPN_COPY (dnp, dup, dn);
+ dnp[dn - 1] |= GMP_NUMB_HIGHBIT;
if (test % 2 == 0)
{
ASSERT_ALWAYS (nn <= maxnn);
ASSERT_ALWAYS (dn <= maxdn);
+ mpz_urandomb (junk, rands, nbits);
+ junkp = PTR (junk);
+
np = PTR (n);
mpz_urandomb (tz, rands, 32);
t = mpz_get_ui (tz);
if (t % 17 == 0)
- dp[dn - 1] = GMP_NUMB_MAX;
+ {
+ dnp[dn - 1] = GMP_NUMB_MAX;
+ dup[dn - 1] = GMP_NUMB_MAX;
+ }
switch ((int) t % 16)
{
np[i] = 0;
break;
case 1:
- mpn_sub_1 (np + nn - dn, dp, dn, random_word (rands));
+ mpn_sub_1 (np + nn - dn, dnp, dn, random_word (rands));
break;
case 2:
- mpn_add_1 (np + nn - dn, dp, dn, random_word (rands));
+ mpn_add_1 (np + nn - dn, dnp, dn, random_word (rands));
break;
}
- test++;
-
- invert_pi1 (dinv, dp[dn - 1], dp[dn - 2]);
+ if (dn >= 2)
+ invert_pi1 (dinv, dnp[dn - 1], dnp[dn - 2]);
rran0 = random_word (rands);
rran1 = random_word (rands);
{
MPN_COPY (rp, np, nn);
if (nn > dn)
- MPN_ZERO (qp, nn - dn);
- qp[nn - dn] = mpn_sbpi1_div_qr (qp, rp, nn, dp, dn, dinv.inv32);
- check_one (qp, rp, np, nn, dp, dn, "mpn_sbpi1_div_qr", 0);
+ MPN_COPY (qp, junkp, nn - dn);
+ qp[nn - dn] = mpn_sbpi1_div_qr (qp, rp, nn, dnp, dn, dinv.inv32);
+ check_one (qp, rp, np, nn, dnp, dn, "mpn_sbpi1_div_qr", 0);
}
/* Test mpn_sbpi1_divappr_q */
{
MPN_COPY (rp, np, nn);
if (nn > dn)
- MPN_ZERO (qp, nn - dn);
- qp[nn - dn] = mpn_sbpi1_divappr_q (qp, rp, nn, dp, dn, dinv.inv32);
- check_one (qp, NULL, np, nn, dp, dn, "mpn_sbpi1_divappr_q", 1);
+ MPN_COPY (qp, junkp, nn - dn);
+ qp[nn - dn] = mpn_sbpi1_divappr_q (qp, rp, nn, dnp, dn, dinv.inv32);
+ check_one (qp, NULL, np, nn, dnp, dn, "mpn_sbpi1_divappr_q", 1);
}
/* Test mpn_sbpi1_div_q */
{
MPN_COPY (rp, np, nn);
if (nn > dn)
- MPN_ZERO (qp, nn - dn);
- qp[nn - dn] = mpn_sbpi1_div_q (qp, rp, nn, dp, dn, dinv.inv32);
- check_one (qp, NULL, np, nn, dp, dn, "mpn_sbpi1_div_q", 0);
+ MPN_COPY (qp, junkp, nn - dn);
+ qp[nn - dn] = mpn_sbpi1_div_q (qp, rp, nn, dnp, dn, dinv.inv32);
+ check_one (qp, NULL, np, nn, dnp, dn, "mpn_sbpi1_div_q", 0);
}
+
+ /* Test mpn_sb_div_qr_sec */
+ itch = 3 * nn + 4;
+ if (itch + 1 > alloc)
+ {
+ scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
+ alloc = itch + 1;
+ }
+ scratch[itch] = ran;
+ MPN_COPY (rp, np, nn);
+ if (nn >= dn)
+ MPN_COPY (qp, junkp, nn - dn + 1);
+ mpn_sb_div_qr_sec (qp, rp, nn, dup, dn, scratch);
+ ASSERT_ALWAYS (ran == scratch[itch]);
+ check_one (qp, rp, np, nn, dup, dn, "mpn_sb_div_qr_sec", 0);
+
+ /* Test mpn_sb_div_r_sec */
+ itch = nn + 2 * dn + 2;
+ if (itch + 1 > alloc)
+ {
+ scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
+ alloc = itch + 1;
+ }
+ scratch[itch] = ran;
+ MPN_COPY (rp, np, nn);
+ mpn_sb_div_r_sec (rp, nn, dup, dn, scratch);
+ ASSERT_ALWAYS (ran == scratch[itch]);
+ /* Note: Since check_one cannot cope with random-only functions, we
+ pass qp[] from the previous function, mpn_sb_div_qr_sec. */
+ check_one (qp, rp, np, nn, dup, dn, "mpn_sb_div_r_sec", 0);
}
/* Test mpn_dcpi1_div_qr */
{
MPN_COPY (rp, np, nn);
if (nn > dn)
- MPN_ZERO (qp, nn - dn);
- qp[nn - dn] = mpn_dcpi1_div_qr (qp, rp, nn, dp, dn, &dinv);
+ MPN_COPY (qp, junkp, nn - dn);
+ qp[nn - dn] = mpn_dcpi1_div_qr (qp, rp, nn, dnp, dn, &dinv);
ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
ASSERT_ALWAYS (rp[-1] == rran0);
- check_one (qp, rp, np, nn, dp, dn, "mpn_dcpi1_div_qr", 0);
+ check_one (qp, rp, np, nn, dnp, dn, "mpn_dcpi1_div_qr", 0);
}
/* Test mpn_dcpi1_divappr_q */
{
MPN_COPY (rp, np, nn);
if (nn > dn)
- MPN_ZERO (qp, nn - dn);
- qp[nn - dn] = mpn_dcpi1_divappr_q (qp, rp, nn, dp, dn, &dinv);
+ MPN_COPY (qp, junkp, nn - dn);
+ qp[nn - dn] = mpn_dcpi1_divappr_q (qp, rp, nn, dnp, dn, &dinv);
ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
ASSERT_ALWAYS (rp[-1] == rran0);
- check_one (qp, NULL, np, nn, dp, dn, "mpn_dcpi1_divappr_q", 1);
+ check_one (qp, NULL, np, nn, dnp, dn, "mpn_dcpi1_divappr_q", 1);
}
/* Test mpn_dcpi1_div_q */
{
MPN_COPY (rp, np, nn);
if (nn > dn)
- MPN_ZERO (qp, nn - dn);
- qp[nn - dn] = mpn_dcpi1_div_q (qp, rp, nn, dp, dn, &dinv);
+ MPN_COPY (qp, junkp, nn - dn);
+ qp[nn - dn] = mpn_dcpi1_div_q (qp, rp, nn, dnp, dn, &dinv);
ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
ASSERT_ALWAYS (rp[-1] == rran0);
- check_one (qp, NULL, np, nn, dp, dn, "mpn_dcpi1_div_q", 0);
+ check_one (qp, NULL, np, nn, dnp, dn, "mpn_dcpi1_div_q", 0);
}
/* Test mpn_mu_div_qr */
alloc = itch + 1;
}
scratch[itch] = ran;
- MPN_ZERO (qp, nn - dn);
+ MPN_COPY (qp, junkp, nn - dn);
MPN_ZERO (rp, dn);
rp[dn] = rran1;
- qp[nn - dn] = mpn_mu_div_qr (qp, rp, np, nn, dp, dn, scratch);
+ qp[nn - dn] = mpn_mu_div_qr (qp, rp, np, nn, dnp, dn, scratch);
ASSERT_ALWAYS (ran == scratch[itch]);
ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
ASSERT_ALWAYS (rp[-1] == rran0); ASSERT_ALWAYS (rp[dn] == rran1);
- check_one (qp, rp, np, nn, dp, dn, "mpn_mu_div_qr", 0);
+ check_one (qp, rp, np, nn, dnp, dn, "mpn_mu_div_qr", 0);
}
/* Test mpn_mu_divappr_q */
alloc = itch + 1;
}
scratch[itch] = ran;
- MPN_ZERO (qp, nn - dn);
- qp[nn - dn] = mpn_mu_divappr_q (qp, np, nn, dp, dn, scratch);
+ MPN_COPY (qp, junkp, nn - dn);
+ qp[nn - dn] = mpn_mu_divappr_q (qp, np, nn, dnp, dn, scratch);
ASSERT_ALWAYS (ran == scratch[itch]);
ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
- check_one (qp, NULL, np, nn, dp, dn, "mpn_mu_divappr_q", 4);
+ check_one (qp, NULL, np, nn, dnp, dn, "mpn_mu_divappr_q", 4);
}
/* Test mpn_mu_div_q */
alloc = itch + 1;
}
scratch[itch] = ran;
- MPN_ZERO (qp, nn - dn);
- qp[nn - dn] = mpn_mu_div_q (qp, np, nn, dp, dn, scratch);
+ MPN_COPY (qp, junkp, nn - dn);
+ qp[nn - dn] = mpn_mu_div_q (qp, np, nn, dnp, dn, scratch);
ASSERT_ALWAYS (ran == scratch[itch]);
ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
- check_one (qp, NULL, np, nn, dp, dn, "mpn_mu_div_q", 0);
+ check_one (qp, NULL, np, nn, dnp, dn, "mpn_mu_div_q", 0);
}
-
if (1)
{
itch = nn + 1;
alloc = itch + 1;
}
scratch[itch] = ran;
- mpn_div_q (qp, np, nn, dp, dn, scratch);
+ mpn_div_q (qp, np, nn, dup, dn, scratch);
ASSERT_ALWAYS (ran == scratch[itch]);
ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
- check_one (qp, NULL, np, nn, dp, dn, "mpn_div_q", 0);
+ check_one (qp, NULL, np, nn, dup, dn, "mpn_div_q", 0);
}
- /* Finally, test mpn_div_q without msb set. */
- dp[dn - 1] &= ~GMP_NUMB_HIGHBIT;
- if (dp[dn - 1] == 0)
- continue;
-
- itch = nn + 1;
- if (itch + 1> alloc)
+ if (dn >= 2 && nn >= 2)
{
- scratch = __GMP_REALLOCATE_FUNC_LIMBS (scratch, alloc, itch + 1);
- alloc = itch + 1;
+ mp_limb_t qh;
+
+ /* mpn_divrem_2 */
+ MPN_COPY (rp, np, nn);
+ qp[nn - 2] = qp[nn-1] = qran1;
+
+ qh = mpn_divrem_2 (qp, 0, rp, nn, dnp + dn - 2);
+ ASSERT_ALWAYS (qp[nn - 2] == qran1);
+ ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - 1] == qran1);
+ qp[nn - 2] = qh;
+ check_one (qp, rp, np, nn, dnp + dn - 2, 2, "mpn_divrem_2", 0);
+
+ /* Missing: divrem_2 with fraction limbs. */
+
+ /* mpn_div_qr_2 */
+ qp[nn - 2] = qran1;
+
+ qh = mpn_div_qr_2 (qp, rp, np, nn, dup + dn - 2);
+ ASSERT_ALWAYS (qp[nn - 2] == qran1);
+ ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - 1] == qran1);
+ qp[nn - 2] = qh;
+ check_one (qp, rp, np, nn, dup + dn - 2, 2, "mpn_div_qr_2", 0);
}
- scratch[itch] = ran;
- mpn_div_q (qp, np, nn, dp, dn, scratch);
- ASSERT_ALWAYS (ran == scratch[itch]);
- ASSERT_ALWAYS (qp[-1] == qran0); ASSERT_ALWAYS (qp[nn - dn + 1] == qran1);
- check_one (qp, NULL, np, nn, dp, dn, "mpn_div_q", 0);
}
__GMP_FREE_FUNC_LIMBS (scratch, alloc);
mpz_clear (q);
mpz_clear (r);
mpz_clear (tz);
+ mpz_clear (junk);
tests_end ();
return 0;
Copyright 2003 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
{ { 5 }, 1, 2, 0,
{ 2 }, 1},
+ /* Exercises the q update in the nl == constant 0 case of
+ udiv_qrnnd_preinv3. Test case copied from t-fat.c. */
+ { { 287 }, 1, 7, 1,
+ { 0, 41 }, 0 },
+
#if GMP_NUMB_BITS == 32
{ { 0x3C }, 1, 0xF2, 1,
{ 0x3F789854, 0 }, 0x98 },
/* Test fat binary setups.
-Copyright 2003 Free Software Foundation, Inc.
+Copyright 2003, 2012 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
/* dummies when not a fat binary */
#if ! WANT_FAT_BINARY
struct cpuvec_t {
- int initialized;
+ int dummy;
};
struct cpuvec_t __gmpn_cpuvec;
#define ITERATE_FAT_THRESHOLDS() do { } while (0)
ASSERT_ALWAYS (wp[1] == 0);
}
+ memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
+ for (i = 0; i < 2; i++)
+ {
+ xp[0] = 5;
+ yp[0] = 7;
+ mpn_mullo_basecase (wp, xp, yp, (mp_size_t) 1);
+ ASSERT_ALWAYS (wp[0] == 35);
+ }
+
#if HAVE_NATIVE_mpn_preinv_divrem_1 && GMP_NAIL_BITS == 0
memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec));
for (i = 0; i < 2; i++)
}
}
-/* Expect the first use of a each fat threshold to invoke the necessary
+/* Expect the first use of each fat threshold to invoke the necessary
initialization. */
void
check_thresholds (void)
{
#define ITERATE(name,field) \
do { \
+ __gmpn_cpuvec_initialized = 0; \
memcpy (&__gmpn_cpuvec, &initial_cpuvec, sizeof (__gmpn_cpuvec)); \
ASSERT_ALWAYS (name != 0); \
ASSERT_ALWAYS (name == __gmpn_cpuvec.field); \
- ASSERT_ALWAYS (__gmpn_cpuvec.initialized); \
+ ASSERT_ALWAYS (__gmpn_cpuvec_initialized); \
} while (0)
ITERATE_FAT_THRESHOLDS ();
Copyright 2002, 2003, 2004 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
/* Note that we don't use <limits.h> for LONG_MIN, but instead our own
definition in gmp-impl.h. In gcc 2.95.4 (debian 3.0) under
};
/* FIXME: It'd be better to base this on the float format. */
-#ifdef __vax
+#if defined (__vax) || defined (__vax__)
int limit = 127; /* vax fp numbers have limited range */
#else
int limit = 511;
exp = exp_table[exp_i];
want_bit = bit + exp;
- if (want_bit > limit || want_bit < -limit)
+ if (want_bit >= limit || want_bit <= -limit)
continue;
want = 1.0;
check_ieee_denorm ();
check_ieee_overflow ();
check_0x81c25113 ();
+#if ! (defined (__vax) || defined (__vax__))
check_rand ();
+#endif
tests_end ();
exit (0);
-/* Test mpz_gcd, mpz_gcdext, and mpz_gcd_ui.
+/* Test mpn_hgcd.
Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004 Free
Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
#include "gmp-impl.h"
#include "tests.h"
-static mp_size_t one_test __GMP_PROTO ((mpz_t, mpz_t, int));
-static void debug_mp __GMP_PROTO ((mpz_t, int));
+static mp_size_t one_test (mpz_t, mpz_t, int);
+static void debug_mp (mpz_t, int);
#define MIN_OPERAND_SIZE 2
mpz_t m[2][2];
};
-static void hgcd_ref_init __GMP_PROTO ((struct hgcd_ref *hgcd));
-static void hgcd_ref_clear __GMP_PROTO ((struct hgcd_ref *hgcd));
-static int hgcd_ref __GMP_PROTO ((struct hgcd_ref *hgcd, mpz_t a, mpz_t b));
-static int hgcd_ref_equal __GMP_PROTO ((const struct hgcd_matrix *hgcd, const struct hgcd_ref *ref));
+static void hgcd_ref_init (struct hgcd_ref *);
+static void hgcd_ref_clear (struct hgcd_ref *);
+static int hgcd_ref (struct hgcd_ref *, mpz_t, mpz_t);
+static int hgcd_ref_equal (const struct hgcd_matrix *, const struct hgcd_ref *);
int
main (int argc, char **argv)
{
/* Generate plain operands with unknown gcd. These types of operands
have proven to trigger certain bugs in development versions of the
- gcd code. The "hgcd->row[3].rsize > M" ASSERT is not triggered by
- the division chain code below, but that is most likely just a result
- of that other ASSERTs are triggered before it. */
+ gcd code. */
mpz_urandomb (bs, rands, 32);
size_range = mpz_get_ui (bs) % 13 + 2;
--- /dev/null
+/* Test mpn_hgcd_appr.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2011 Free
+Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+static mp_size_t one_test (mpz_t, mpz_t, int);
+static void debug_mp (mpz_t, int);
+
+#define MIN_OPERAND_SIZE 2
+
+struct hgcd_ref
+{
+ mpz_t m[2][2];
+};
+
+static void hgcd_ref_init (struct hgcd_ref *hgcd);
+static void hgcd_ref_clear (struct hgcd_ref *hgcd);
+static int hgcd_ref (struct hgcd_ref *hgcd, mpz_t a, mpz_t b);
+static int hgcd_ref_equal (const struct hgcd_ref *, const struct hgcd_ref *);
+static int hgcd_appr_valid_p (mpz_t, mpz_t, mp_size_t, struct hgcd_ref *,
+ mpz_t, mpz_t, mp_size_t, struct hgcd_matrix *);
+
+static int verbose_flag = 0;
+
+int
+main (int argc, char **argv)
+{
+ mpz_t op1, op2, temp1, temp2;
+ int i, j, chain_len;
+ gmp_randstate_ptr rands;
+ mpz_t bs;
+ unsigned long size_range;
+
+ if (argc > 1)
+ {
+ if (strcmp (argv[1], "-v") == 0)
+ verbose_flag = 1;
+ else
+ {
+ fprintf (stderr, "Invalid argument.\n");
+ return 1;
+ }
+ }
+
+ tests_start ();
+ rands = RANDS;
+
+ mpz_init (bs);
+ mpz_init (op1);
+ mpz_init (op2);
+ mpz_init (temp1);
+ mpz_init (temp2);
+
+ for (i = 0; i < 15; i++)
+ {
+ /* Generate plain operands with unknown gcd. These types of operands
+ have proven to trigger certain bugs in development versions of the
+ gcd code. */
+
+ mpz_urandomb (bs, rands, 32);
+ size_range = mpz_get_ui (bs) % 13 + 2;
+
+ mpz_urandomb (bs, rands, size_range);
+ mpz_urandomb (op1, rands, mpz_get_ui (bs) + MIN_OPERAND_SIZE);
+ mpz_urandomb (bs, rands, size_range);
+ mpz_urandomb (op2, rands, mpz_get_ui (bs) + MIN_OPERAND_SIZE);
+
+ if (mpz_cmp (op1, op2) < 0)
+ mpz_swap (op1, op2);
+
+ if (mpz_size (op1) > 0)
+ one_test (op1, op2, i);
+
+ /* Generate a division chain backwards, allowing otherwise
+ unlikely huge quotients. */
+
+ mpz_set_ui (op1, 0);
+ mpz_urandomb (bs, rands, 32);
+ mpz_urandomb (bs, rands, mpz_get_ui (bs) % 16 + 1);
+ mpz_rrandomb (op2, rands, mpz_get_ui (bs));
+ mpz_add_ui (op2, op2, 1);
+
+#if 0
+ chain_len = 1000000;
+#else
+ mpz_urandomb (bs, rands, 32);
+ chain_len = mpz_get_ui (bs) % (GMP_NUMB_BITS * GCD_DC_THRESHOLD / 256);
+#endif
+
+ for (j = 0; j < chain_len; j++)
+ {
+ mpz_urandomb (bs, rands, 32);
+ mpz_urandomb (bs, rands, mpz_get_ui (bs) % 12 + 1);
+ mpz_rrandomb (temp2, rands, mpz_get_ui (bs) + 1);
+ mpz_add_ui (temp2, temp2, 1);
+ mpz_mul (temp1, op2, temp2);
+ mpz_add (op1, op1, temp1);
+
+ /* Don't generate overly huge operands. */
+ if (SIZ (op1) > 3 * GCD_DC_THRESHOLD)
+ break;
+
+ mpz_urandomb (bs, rands, 32);
+ mpz_urandomb (bs, rands, mpz_get_ui (bs) % 12 + 1);
+ mpz_rrandomb (temp2, rands, mpz_get_ui (bs) + 1);
+ mpz_add_ui (temp2, temp2, 1);
+ mpz_mul (temp1, op1, temp2);
+ mpz_add (op2, op2, temp1);
+
+ /* Don't generate overly huge operands. */
+ if (SIZ (op2) > 3 * GCD_DC_THRESHOLD)
+ break;
+ }
+ if (mpz_cmp (op1, op2) < 0)
+ mpz_swap (op1, op2);
+
+ if (mpz_size (op1) > 0)
+ one_test (op1, op2, i);
+ }
+
+ mpz_clear (bs);
+ mpz_clear (op1);
+ mpz_clear (op2);
+ mpz_clear (temp1);
+ mpz_clear (temp2);
+
+ tests_end ();
+ exit (0);
+}
+
+static void
+debug_mp (mpz_t x, int base)
+{
+ mpz_out_str (stderr, base, x); fputc ('\n', stderr);
+}
+
+static int
+mpz_mpn_equal (const mpz_t a, mp_srcptr bp, mp_size_t bsize);
+
+static mp_size_t
+one_test (mpz_t a, mpz_t b, int i)
+{
+ struct hgcd_matrix hgcd;
+ struct hgcd_ref ref;
+
+ mpz_t ref_r0;
+ mpz_t ref_r1;
+ mpz_t hgcd_r0;
+ mpz_t hgcd_r1;
+
+ int res[2];
+ mp_size_t asize;
+ mp_size_t bsize;
+
+ mp_size_t hgcd_init_scratch;
+ mp_size_t hgcd_scratch;
+
+ mp_ptr hgcd_init_tp;
+ mp_ptr hgcd_tp;
+ mp_limb_t marker[4];
+
+ asize = a->_mp_size;
+ bsize = b->_mp_size;
+
+ ASSERT (asize >= bsize);
+
+ hgcd_init_scratch = MPN_HGCD_MATRIX_INIT_ITCH (asize);
+ hgcd_init_tp = refmpn_malloc_limbs (hgcd_init_scratch + 2) + 1;
+ mpn_hgcd_matrix_init (&hgcd, asize, hgcd_init_tp);
+
+ hgcd_scratch = mpn_hgcd_appr_itch (asize);
+ hgcd_tp = refmpn_malloc_limbs (hgcd_scratch + 2) + 1;
+
+ mpn_random (marker, 4);
+
+ hgcd_init_tp[-1] = marker[0];
+ hgcd_init_tp[hgcd_init_scratch] = marker[1];
+ hgcd_tp[-1] = marker[2];
+ hgcd_tp[hgcd_scratch] = marker[3];
+
+#if 0
+ fprintf (stderr,
+ "one_test: i = %d asize = %d, bsize = %d\n",
+ i, a->_mp_size, b->_mp_size);
+
+ gmp_fprintf (stderr,
+ "one_test: i = %d\n"
+ " a = %Zx\n"
+ " b = %Zx\n",
+ i, a, b);
+#endif
+ hgcd_ref_init (&ref);
+
+ mpz_init_set (ref_r0, a);
+ mpz_init_set (ref_r1, b);
+ res[0] = hgcd_ref (&ref, ref_r0, ref_r1);
+
+ mpz_init_set (hgcd_r0, a);
+ mpz_init_set (hgcd_r1, b);
+ if (bsize < asize)
+ {
+ _mpz_realloc (hgcd_r1, asize);
+ MPN_ZERO (hgcd_r1->_mp_d + bsize, asize - bsize);
+ }
+ res[1] = mpn_hgcd_appr (hgcd_r0->_mp_d,
+ hgcd_r1->_mp_d,
+ asize,
+ &hgcd, hgcd_tp);
+
+ if (hgcd_init_tp[-1] != marker[0]
+ || hgcd_init_tp[hgcd_init_scratch] != marker[1]
+ || hgcd_tp[-1] != marker[2]
+ || hgcd_tp[hgcd_scratch] != marker[3])
+ {
+ fprintf (stderr, "ERROR in test %d\n", i);
+ fprintf (stderr, "scratch space overwritten!\n");
+
+ if (hgcd_init_tp[-1] != marker[0])
+ gmp_fprintf (stderr,
+ "before init_tp: %Mx\n"
+ "expected: %Mx\n",
+ hgcd_init_tp[-1], marker[0]);
+ if (hgcd_init_tp[hgcd_init_scratch] != marker[1])
+ gmp_fprintf (stderr,
+ "after init_tp: %Mx\n"
+ "expected: %Mx\n",
+ hgcd_init_tp[hgcd_init_scratch], marker[1]);
+ if (hgcd_tp[-1] != marker[2])
+ gmp_fprintf (stderr,
+ "before tp: %Mx\n"
+ "expected: %Mx\n",
+ hgcd_tp[-1], marker[2]);
+ if (hgcd_tp[hgcd_scratch] != marker[3])
+ gmp_fprintf (stderr,
+ "after tp: %Mx\n"
+ "expected: %Mx\n",
+ hgcd_tp[hgcd_scratch], marker[3]);
+
+ abort ();
+ }
+
+ if (!hgcd_appr_valid_p (a, b, res[0], &ref, ref_r0, ref_r1,
+ res[1], &hgcd))
+ {
+ fprintf (stderr, "ERROR in test %d\n", i);
+ fprintf (stderr, "Invalid results for hgcd and hgcd_ref\n");
+ fprintf (stderr, "op1="); debug_mp (a, -16);
+ fprintf (stderr, "op2="); debug_mp (b, -16);
+ fprintf (stderr, "hgcd_ref: %ld\n", (long) res[0]);
+ fprintf (stderr, "mpn_hgcd_appr: %ld\n", (long) res[1]);
+ abort ();
+ }
+
+ refmpn_free_limbs (hgcd_init_tp - 1);
+ refmpn_free_limbs (hgcd_tp - 1);
+ hgcd_ref_clear (&ref);
+ mpz_clear (ref_r0);
+ mpz_clear (ref_r1);
+ mpz_clear (hgcd_r0);
+ mpz_clear (hgcd_r1);
+
+ return res[0];
+}
+
+static void
+hgcd_ref_init (struct hgcd_ref *hgcd)
+{
+ unsigned i;
+ for (i = 0; i<2; i++)
+ {
+ unsigned j;
+ for (j = 0; j<2; j++)
+ mpz_init (hgcd->m[i][j]);
+ }
+}
+
+static void
+hgcd_ref_clear (struct hgcd_ref *hgcd)
+{
+ unsigned i;
+ for (i = 0; i<2; i++)
+ {
+ unsigned j;
+ for (j = 0; j<2; j++)
+ mpz_clear (hgcd->m[i][j]);
+ }
+}
+
+static int
+sdiv_qr (mpz_t q, mpz_t r, mp_size_t s, const mpz_t a, const mpz_t b)
+{
+ mpz_fdiv_qr (q, r, a, b);
+ if (mpz_size (r) <= s)
+ {
+ mpz_add (r, r, b);
+ mpz_sub_ui (q, q, 1);
+ }
+
+ return (mpz_sgn (q) > 0);
+}
+
+static int
+hgcd_ref (struct hgcd_ref *hgcd, mpz_t a, mpz_t b)
+{
+ mp_size_t n = MAX (mpz_size (a), mpz_size (b));
+ mp_size_t s = n/2 + 1;
+ mpz_t q;
+ int res;
+
+ if (mpz_size (a) <= s || mpz_size (b) <= s)
+ return 0;
+
+ res = mpz_cmp (a, b);
+ if (res < 0)
+ {
+ mpz_sub (b, b, a);
+ if (mpz_size (b) <= s)
+ return 0;
+
+ mpz_set_ui (hgcd->m[0][0], 1); mpz_set_ui (hgcd->m[0][1], 0);
+ mpz_set_ui (hgcd->m[1][0], 1); mpz_set_ui (hgcd->m[1][1], 1);
+ }
+ else if (res > 0)
+ {
+ mpz_sub (a, a, b);
+ if (mpz_size (a) <= s)
+ return 0;
+
+ mpz_set_ui (hgcd->m[0][0], 1); mpz_set_ui (hgcd->m[0][1], 1);
+ mpz_set_ui (hgcd->m[1][0], 0); mpz_set_ui (hgcd->m[1][1], 1);
+ }
+ else
+ return 0;
+
+ mpz_init (q);
+
+ for (;;)
+ {
+ ASSERT (mpz_size (a) > s);
+ ASSERT (mpz_size (b) > s);
+
+ if (mpz_cmp (a, b) > 0)
+ {
+ if (!sdiv_qr (q, a, s, a, b))
+ break;
+ mpz_addmul (hgcd->m[0][1], q, hgcd->m[0][0]);
+ mpz_addmul (hgcd->m[1][1], q, hgcd->m[1][0]);
+ }
+ else
+ {
+ if (!sdiv_qr (q, b, s, b, a))
+ break;
+ mpz_addmul (hgcd->m[0][0], q, hgcd->m[0][1]);
+ mpz_addmul (hgcd->m[1][0], q, hgcd->m[1][1]);
+ }
+ }
+
+ mpz_clear (q);
+
+ return 1;
+}
+
+static int
+mpz_mpn_equal (const mpz_t a, mp_srcptr bp, mp_size_t bsize)
+{
+ mp_srcptr ap = a->_mp_d;
+ mp_size_t asize = a->_mp_size;
+
+ MPN_NORMALIZE (bp, bsize);
+ return asize == bsize && mpn_cmp (ap, bp, asize) == 0;
+}
+
+static int
+hgcd_ref_equal (const struct hgcd_ref *A, const struct hgcd_ref *B)
+{
+ unsigned i;
+
+ for (i = 0; i<2; i++)
+ {
+ unsigned j;
+
+ for (j = 0; j<2; j++)
+ if (mpz_cmp (A->m[i][j], B->m[i][j]) != 0)
+ return 0;
+ }
+
+ return 1;
+}
+
+static int
+hgcd_appr_valid_p (mpz_t a, mpz_t b, mp_size_t res0,
+ struct hgcd_ref *ref, mpz_t ref_r0, mpz_t ref_r1,
+ mp_size_t res1, struct hgcd_matrix *hgcd)
+{
+ mp_size_t n = MAX (mpz_size (a), mpz_size (b));
+ mp_size_t s = n/2 + 1;
+
+ mp_bitcnt_t dbits, abits, margin;
+ mpz_t appr_r0, appr_r1, t, q;
+ struct hgcd_ref appr;
+
+ if (!res0)
+ {
+ if (!res1)
+ return 1;
+
+ fprintf (stderr, "mpn_hgcd_appr returned 1 when no reduction possible.\n");
+ return 0;
+ }
+
+ /* NOTE: No *_clear calls on error return, since we're going to
+ abort anyway. */
+ mpz_init (t);
+ mpz_init (q);
+ hgcd_ref_init (&appr);
+ mpz_init (appr_r0);
+ mpz_init (appr_r1);
+
+ if (mpz_size (ref_r0) <= s)
+ {
+ fprintf (stderr, "ref_r0 too small!!!: "); debug_mp (ref_r0, 16);
+ return 0;
+ }
+ if (mpz_size (ref_r1) <= s)
+ {
+ fprintf (stderr, "ref_r1 too small!!!: "); debug_mp (ref_r1, 16);
+ return 0;
+ }
+
+ mpz_sub (t, ref_r0, ref_r1);
+ dbits = mpz_sizeinbase (t, 2);
+ if (dbits > s*GMP_NUMB_BITS)
+ {
+ fprintf (stderr, "ref |r0 - r1| too large!!!: "); debug_mp (t, 16);
+ return 0;
+ }
+
+ if (!res1)
+ {
+ mpz_set (appr_r0, a);
+ mpz_set (appr_r1, b);
+ }
+ else
+ {
+ unsigned i;
+
+ for (i = 0; i<2; i++)
+ {
+ unsigned j;
+
+ for (j = 0; j<2; j++)
+ {
+ mp_size_t mn = hgcd->n;
+ MPN_NORMALIZE (hgcd->p[i][j], mn);
+ mpz_realloc (appr.m[i][j], mn);
+ MPN_COPY (PTR (appr.m[i][j]), hgcd->p[i][j], mn);
+ SIZ (appr.m[i][j]) = mn;
+ }
+ }
+ mpz_mul (appr_r0, appr.m[1][1], a);
+ mpz_mul (t, appr.m[0][1], b);
+ mpz_sub (appr_r0, appr_r0, t);
+ if (mpz_sgn (appr_r0) <= 0
+ || mpz_size (appr_r0) <= s)
+ {
+ fprintf (stderr, "appr_r0 too small: "); debug_mp (appr_r0, 16);
+ return 0;
+ }
+
+ mpz_mul (appr_r1, appr.m[1][0], a);
+ mpz_mul (t, appr.m[0][0], b);
+ mpz_sub (appr_r1, t, appr_r1);
+ if (mpz_sgn (appr_r1) <= 0
+ || mpz_size (appr_r1) <= s)
+ {
+ fprintf (stderr, "appr_r1 too small: "); debug_mp (appr_r1, 16);
+ return 0;
+ }
+ }
+
+ mpz_sub (t, appr_r0, appr_r1);
+ abits = mpz_sizeinbase (t, 2);
+ if (abits < dbits)
+ {
+ fprintf (stderr, "|r0 - r1| too small: "); debug_mp (t, 16);
+ return 0;
+ }
+
+ /* We lose one bit each time we discard the least significant limbs.
+ For the lehmer code, that can happen at most s * (GMP_NUMB_BITS)
+ / (GMP_NUMB_BITS - 1) times. For the dc code, we lose an entire
+ limb (or more?) for each level of recursion. */
+
+ margin = (n/2+1) * GMP_NUMB_BITS / (GMP_NUMB_BITS - 1);
+ {
+ mp_size_t rn;
+ for (rn = n; ABOVE_THRESHOLD (rn, HGCD_APPR_THRESHOLD); rn = (rn + 1)/2)
+ margin += GMP_NUMB_BITS;
+ }
+
+ if (verbose_flag && abits > dbits)
+ fprintf (stderr, "n = %u: sbits = %u: ref #(r0-r1): %u, appr #(r0-r1): %u excess: %d, margin: %u\n",
+ (unsigned) n, (unsigned) s*GMP_NUMB_BITS,
+ (unsigned) dbits, (unsigned) abits,
+ (int) abits - s * GMP_NUMB_BITS, (unsigned) margin);
+
+ if (abits > s*GMP_NUMB_BITS + margin)
+ {
+ fprintf (stderr, "appr |r0 - r1| much larger than minimal (by %u bits, margin = %u bits)\n",
+ (unsigned) (abits - s*GMP_NUMB_BITS), (unsigned) margin);
+ return 0;
+ }
+
+ while (mpz_cmp (appr_r0, ref_r0) > 0 || mpz_cmp (appr_r1, ref_r1) > 0)
+ {
+ ASSERT (mpz_size (appr_r0) > s);
+ ASSERT (mpz_size (appr_r1) > s);
+
+ if (mpz_cmp (appr_r0, appr_r1) > 0)
+ {
+ if (!sdiv_qr (q, appr_r0, s, appr_r0, appr_r1))
+ break;
+ mpz_addmul (appr.m[0][1], q, appr.m[0][0]);
+ mpz_addmul (appr.m[1][1], q, appr.m[1][0]);
+ }
+ else
+ {
+ if (!sdiv_qr (q, appr_r1, s, appr_r1, appr_r0))
+ break;
+ mpz_addmul (appr.m[0][0], q, appr.m[0][1]);
+ mpz_addmul (appr.m[1][0], q, appr.m[1][1]);
+ }
+ }
+
+ if (mpz_cmp (appr_r0, ref_r0) != 0
+ || mpz_cmp (appr_r1, ref_r1) != 0
+ || !hgcd_ref_equal (ref, &appr))
+ {
+ fprintf (stderr, "appr_r0: "); debug_mp (appr_r0, 16);
+ fprintf (stderr, "ref_r0: "); debug_mp (ref_r0, 16);
+
+ fprintf (stderr, "appr_r1: "); debug_mp (appr_r1, 16);
+ fprintf (stderr, "ref_r1: "); debug_mp (ref_r1, 16);
+
+ return 0;
+ }
+ mpz_clear (t);
+ mpz_clear (q);
+ hgcd_ref_clear (&appr);
+ mpz_clear (appr_r0);
+ mpz_clear (appr_r1);
+
+ return 1;
+}
Copyright 2002, 2003 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
int ncall;
-void __cyg_profile_func_enter __GMP_PROTO ((void *this_fn, void *call_site))
- __attribute__ ((no_instrument_function));
+void __cyg_profile_func_enter (void *, void *)
+ __attribute__ ((no_instrument_function));
void
__cyg_profile_func_enter (void *this_fn, void *call_site)
ncall++;
}
-void __cyg_profile_func_exit __GMP_PROTO ((void *this_fn, void *call_site))
- __attribute__ ((no_instrument_function));
+void __cyg_profile_func_exit (void *, void *)
+ __attribute__ ((no_instrument_function));
void
__cyg_profile_func_exit (void *this_fn, void *call_site)
Copyright 2009 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+#include <stdlib.h>
+#include <stdio.h>
+
#include "gmp.h"
#include "gmp-impl.h"
#include "tests.h"
-#include <stdlib.h>
-#include <stdio.h>
-
/* Sizes are up to 2^SIZE_LOG limbs */
#ifndef SIZE_LOG
#define SIZE_LOG 12
Copyright 2001, 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2008 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
/* Test mpn_mod_1 variants.
-Copyright 2010 Free Software Foundation, Inc.
+Copyright 2010, 2013 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
goto fail;
}
}
+ if (b <= GMP_NUMB_MASK / 3)
+ {
+ mp_limb_t pre[6];
+ mpn_mod_1s_3p_cps (pre, b);
+ r = mpn_mod_1s_3p (ap, n, b << pre[1], pre);
+ if (r != r_ref)
+ {
+ printf ("mpn_mod_1s_3p failed\n");
+ goto fail;
+ }
+ }
if (b <= GMP_NUMB_MASK / 4)
{
mp_limb_t pre[7];
Copyright 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2011, 2012 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdlib.h>
Copyright 2009 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+#include <stdlib.h>
+#include <stdio.h>
+
#include "gmp.h"
#include "gmp-impl.h"
#include "tests.h"
-#include <stdlib.h>
-#include <stdio.h>
-
/* Sizes are up to 2^SIZE_LOG limbs */
#ifndef SIZE_LOG
#define SIZE_LOG 10
--- /dev/null
+/* Test for mulmid function.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+/* Sizes are up to 2^SIZE_LOG limbs */
+#ifndef SIZE_LOG
+#define SIZE_LOG 9
+#endif
+
+#ifndef COUNT
+#define COUNT 5000
+#endif
+
+#define MAX_N (1L << SIZE_LOG)
+
+int
+main (int argc, char **argv)
+{
+ mp_ptr ap, bp, rp, refp;
+ gmp_randstate_ptr rands;
+ int test;
+ TMP_DECL;
+ TMP_MARK;
+
+ tests_start ();
+ rands = RANDS;
+
+ ap = TMP_ALLOC_LIMBS (MAX_N);
+ bp = TMP_ALLOC_LIMBS (MAX_N);
+ rp = TMP_ALLOC_LIMBS (MAX_N + 2);
+ refp = TMP_ALLOC_LIMBS (MAX_N + 2);
+
+ for (test = 0; test < COUNT; test++)
+ {
+ mp_size_t an, bn, rn;
+ unsigned size_log;
+
+ size_log = 1 + gmp_urandomm_ui (rands, SIZE_LOG);
+ an = 1 + gmp_urandomm_ui(rands, 1L << size_log);
+
+ size_log = 1 + gmp_urandomm_ui (rands, SIZE_LOG);
+ bn = 1 + gmp_urandomm_ui(rands, 1L << size_log);
+
+ /* Make sure an >= bn */
+ if (an < bn)
+ MP_SIZE_T_SWAP (an, bn);
+
+ mpn_random2 (ap, an);
+ mpn_random2 (bp, bn);
+
+ refmpn_mulmid (refp, ap, an, bp, bn);
+ mpn_mulmid (rp, ap, an, bp, bn);
+
+ rn = an + 3 - bn;
+ if (mpn_cmp (refp, rp, rn))
+ {
+ printf ("ERROR in test %d, an = %d, bn = %d, rn = %d\n",
+ test, an, bn, rn);
+ printf("a: "); mpn_dump (ap, an);
+ printf("b: "); mpn_dump (bp, bn);
+ printf("r: "); mpn_dump (rp, rn);
+ printf("ref: "); mpn_dump (refp, rn);
+
+ abort();
+ }
+ }
+ TMP_FREE;
+ tests_end ();
+ return 0;
+}
Copyright 2009 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+#include <stdlib.h>
+#include <stdio.h>
+
#include "gmp.h"
#include "gmp-impl.h"
#include "tests.h"
-#include <stdlib.h>
-#include <stdio.h>
-
/* Sizes are up to 2^SIZE_LOG limbs */
#ifndef SIZE_LOG
#define SIZE_LOG 11
Copyright 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2009 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+#include <stdlib.h>
+#include <stdio.h>
+
#include "gmp.h"
#include "gmp-impl.h"
#include "tests.h"
-#include <stdlib.h>
-#include <stdio.h>
-
/* Sizes are up to 2^SIZE_LOG limbs */
#ifndef SIZE_LOG
#define SIZE_LOG 12
--- /dev/null
+#define mpn_toomN_sqr mpn_toom2_sqr
+#define mpn_toomN_sqr_itch mpn_toom2_sqr_itch
+#define MIN_AN MPN_TOOM2_SQR_MINSIZE
+#define MAX_AN SQR_TOOM3_THRESHOLD
+
+#include "toom-sqr-shared.h"
--- /dev/null
+#define mpn_toomN_sqr mpn_toom3_sqr
+#define mpn_toomN_sqr_itch mpn_toom3_sqr_itch
+#define MIN_AN MAX(SQR_TOOM3_THRESHOLD,MPN_TOOM3_SQR_MINSIZE)
+#define MAX_AN SQR_TOOM4_THRESHOLD
+
+#include "toom-sqr-shared.h"
--- /dev/null
+#define mpn_toomN_sqr mpn_toom4_sqr
+#define mpn_toomN_sqr_itch mpn_toom4_sqr_itch
+#define MIN_AN MAX(SQR_TOOM3_THRESHOLD,MAX(SQR_TOOM4_THRESHOLD,MPN_TOOM4_SQR_MINSIZE))
+#define MAX_AN SQR_TOOM6_THRESHOLD
+
+#include "toom-sqr-shared.h"
--- /dev/null
+#define mpn_toomMN_mul mpn_toom54_mul
+#define mpn_toomMN_mul_itch mpn_toom54_mul_itch
+
+#define MIN_AN 31
+#define MIN_BN(an) ((3*(an) + 32) / (size_t) 5) /* 3/5 */
+#define MAX_BN(an) ((an) - 6) /* 1/1 */
+
+#include "toom-shared.h"
--- /dev/null
+#define mpn_toomN_sqr mpn_toom6_sqr
+#define mpn_toomN_sqr_itch mpn_toom6_sqr_itch
+#define MIN_AN MAX(SQR_TOOM3_THRESHOLD,MAX(SQR_TOOM4_THRESHOLD,MAX(SQR_TOOM6_THRESHOLD,MPN_TOOM6_SQR_MINSIZE)))
+#define MAX_AN SQR_TOOM8_THRESHOLD
+
+#include "toom-sqr-shared.h"
/* Smaller sizes not supported; may lead to recursive calls to
toom22_mul, toom33_mul, or toom44_mul with invalid input size. */
-#define MIN_AN MUL_TOOM6H_THRESHOLD
+#define MIN_AN MUL_TOOM6H_MIN
#define MIN_BN(an) (MAX ((an*3)>>3, 46))
#define COUNT 1000
--- /dev/null
+#define mpn_toomN_sqr mpn_toom8_sqr
+#define mpn_toomN_sqr_itch mpn_toom8_sqr_itch
+#define MIN_AN MAX(SQR_TOOM3_THRESHOLD,MAX(SQR_TOOM4_THRESHOLD,MAX(SQR_TOOM6_THRESHOLD,MAX(SQR_TOOM8_THRESHOLD,MPN_TOOM8_SQR_MINSIZE))))
+#define MAX_AN SQR_FFT_THRESHOLD
+
+#include "toom-sqr-shared.h"
/* Smaller sizes not supported; may lead to recursive calls to
toom{22,33,44,6h}_mul with invalid input size. */
-#define MIN_AN MUL_TOOM8H_THRESHOLD
+#define MIN_AN MUL_TOOM8H_MIN
-#if GMP_NUMB_BITS <= 10*3
-#define MIN_BN(an) (MAX ((an*6)/10, 86) )
-#else
-#if GMP_NUMB_BITS <= 11*3
-#define MIN_BN(an) (MAX ((an*5)/11, 86) )
-#else
-#if GMP_NUMB_BITS <= 12*3
-#define MIN_BN(an) (MAX ((an*4)/12, 86) )
-#else
-#define MIN_BN(an) (MAX ((an*4)/13, 86) )
-#endif
-#endif
-#endif
+#define MIN_BN(an) \
+(MAX(GMP_NUMB_BITS <= 10*3 ? (an*6)/10 : \
+ GMP_NUMB_BITS <= 11*3 ? (an*5)/11 : \
+ GMP_NUMB_BITS <= 12*3 ? (an*4)/12 : \
+ (an*4)/13, 86) )
#define COUNT 1000
Copyright 2009 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+#include <stdlib.h>
+#include <stdio.h>
+
#include "gmp.h"
#include "gmp-impl.h"
#include "tests.h"
-#include <stdlib.h>
-#include <stdio.h>
-
/* Main file is expected to define mpn_toomMN_mul,
* mpn_toomMN_mul_itch, MIN_AN, MIN_BN(an), MAX_BN(an) and then
* include this file. */
--- /dev/null
+/* Test for various Toom squaring functions.
+
+Copyright 2009, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+/* Main file is expected to define mpn_toomN_mul, mpn_toomN_sqr_itch,
+ * MIN_AN, MAX_AN and then include this file. */
+
+#ifndef COUNT
+#define COUNT 500
+#endif
+
+int
+main (int argc, char **argv)
+{
+ mp_ptr ap, refp, pp, scratch;
+ int count = COUNT;
+ int test;
+ gmp_randstate_ptr rands;
+ TMP_DECL;
+ TMP_MARK;
+
+ if (argc > 1)
+ {
+ char *end;
+ count = strtol (argv[1], &end, 0);
+ if (*end || count <= 0)
+ {
+ fprintf (stderr, "Invalid test count: %s.\n", argv[1]);
+ return 1;
+ }
+ }
+
+ tests_start ();
+
+ if (MAX_AN > MIN_AN) {
+ rands = RANDS;
+
+ ap = TMP_ALLOC_LIMBS (MAX_AN);
+ refp = TMP_ALLOC_LIMBS (MAX_AN * 2);
+ pp = 1 + TMP_ALLOC_LIMBS (MAX_AN * 2 + 2);
+ scratch
+ = 1+TMP_ALLOC_LIMBS (mpn_toomN_sqr_itch (MAX_AN) + 2);
+
+ for (test = 0; test < count; test++)
+ {
+ unsigned size_min;
+ unsigned size_range;
+ mp_size_t an;
+ mp_size_t itch;
+ mp_limb_t p_before, p_after, s_before, s_after;
+
+ an = MIN_AN
+ + gmp_urandomm_ui (rands, MAX_AN - MIN_AN);
+
+ mpn_random2 (ap, an);
+ mpn_random2 (pp-1, an * 2 + 2);
+ p_before = pp[-1];
+ p_after = pp[an * 2];
+
+ itch = mpn_toomN_sqr_itch (an);
+ ASSERT_ALWAYS (itch <= mpn_toomN_sqr_itch (MAX_AN));
+ mpn_random2 (scratch-1, itch+2);
+ s_before = scratch[-1];
+ s_after = scratch[itch];
+
+ mpn_toomN_sqr (pp, ap, an, scratch);
+ refmpn_mul (refp, ap, an, ap, an);
+ if (pp[-1] != p_before || pp[an * 2] != p_after
+ || scratch[-1] != s_before || scratch[itch] != s_after
+ || mpn_cmp (refp, pp, an * 2) != 0)
+ {
+ printf ("ERROR in test %d, an = %d\n",
+ test, (int) an);
+ if (pp[-1] != p_before)
+ {
+ printf ("before pp:"); mpn_dump (pp -1, 1);
+ printf ("keep: "); mpn_dump (&p_before, 1);
+ }
+ if (pp[an * 2] != p_after)
+ {
+ printf ("after pp:"); mpn_dump (pp + an * 2, 1);
+ printf ("keep: "); mpn_dump (&p_after, 1);
+ }
+ if (scratch[-1] != s_before)
+ {
+ printf ("before scratch:"); mpn_dump (scratch-1, 1);
+ printf ("keep: "); mpn_dump (&s_before, 1);
+ }
+ if (scratch[itch] != s_after)
+ {
+ printf ("after scratch:"); mpn_dump (scratch + itch, 1);
+ printf ("keep: "); mpn_dump (&s_after, 1);
+ }
+ mpn_dump (ap, an);
+ mpn_dump (pp, an * 2);
+ mpn_dump (refp, an * 2);
+
+ abort();
+ }
+ }
+ TMP_FREE;
+ }
+
+ tests_end ();
+ return 0;
+}
## Process this file with automake to generate Makefile.in
-# Copyright 1996, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+# Copyright 1996, 1999, 2000, 2001, 2002, 2012 Free Software Foundation, Inc.
#
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+# Public License for more details.
#
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/.
INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
LDADD = $(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
check_PROGRAMS = t-aors t-cmp t-cmp_ui t-cmp_si t-equal t-get_d t-get_str \
- t-inp_str t-md_2exp t-set_f t-set_str
+ t-inp_str t-inv t-md_2exp t-set_f t-set_str io reuse
TESTS = $(check_PROGRAMS)
# Temporary files used by the tests. Removed automatically if the tests
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@SET_MAKE@
-# Copyright 1996, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+# Copyright 1996, 1999, 2000, 2001, 2002, 2012 Free Software Foundation, Inc.
#
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+# Public License for more details.
#
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/.
VPATH = @srcdir@
+am__make_dryrun = \
+ { \
+ am__dry=no; \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
+ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+ *) \
+ for am__flg in $$MAKEFLAGS; do \
+ case $$am__flg in \
+ *=*|--*) ;; \
+ *n*) am__dry=yes; break;; \
+ esac; \
+ done;; \
+ esac; \
+ test $$am__dry = yes; \
+ }
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
check_PROGRAMS = t-aors$(EXEEXT) t-cmp$(EXEEXT) t-cmp_ui$(EXEEXT) \
t-cmp_si$(EXEEXT) t-equal$(EXEEXT) t-get_d$(EXEEXT) \
- t-get_str$(EXEEXT) t-inp_str$(EXEEXT) t-md_2exp$(EXEEXT) \
- t-set_f$(EXEEXT) t-set_str$(EXEEXT)
+ t-get_str$(EXEEXT) t-inp_str$(EXEEXT) t-inv$(EXEEXT) \
+ t-md_2exp$(EXEEXT) t-set_f$(EXEEXT) t-set_str$(EXEEXT) \
+ io$(EXEEXT) reuse$(EXEEXT)
subdir = tests/mpq
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/configure.in
+ $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/config.h
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
+io_SOURCES = io.c
+io_OBJECTS = io.$(OBJEXT)
+io_LDADD = $(LDADD)
+io_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+ $(top_builddir)/libgmp.la
+reuse_SOURCES = reuse.c
+reuse_OBJECTS = reuse.$(OBJEXT)
+reuse_LDADD = $(LDADD)
+reuse_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+ $(top_builddir)/libgmp.la
t_aors_SOURCES = t-aors.c
-t_aors_OBJECTS = t-aors$U.$(OBJEXT)
+t_aors_OBJECTS = t-aors.$(OBJEXT)
t_aors_LDADD = $(LDADD)
t_aors_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_cmp_SOURCES = t-cmp.c
-t_cmp_OBJECTS = t-cmp$U.$(OBJEXT)
+t_cmp_OBJECTS = t-cmp.$(OBJEXT)
t_cmp_LDADD = $(LDADD)
t_cmp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_cmp_si_SOURCES = t-cmp_si.c
-t_cmp_si_OBJECTS = t-cmp_si$U.$(OBJEXT)
+t_cmp_si_OBJECTS = t-cmp_si.$(OBJEXT)
t_cmp_si_LDADD = $(LDADD)
t_cmp_si_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_cmp_ui_SOURCES = t-cmp_ui.c
-t_cmp_ui_OBJECTS = t-cmp_ui$U.$(OBJEXT)
+t_cmp_ui_OBJECTS = t-cmp_ui.$(OBJEXT)
t_cmp_ui_LDADD = $(LDADD)
t_cmp_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_equal_SOURCES = t-equal.c
-t_equal_OBJECTS = t-equal$U.$(OBJEXT)
+t_equal_OBJECTS = t-equal.$(OBJEXT)
t_equal_LDADD = $(LDADD)
t_equal_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_get_d_SOURCES = t-get_d.c
-t_get_d_OBJECTS = t-get_d$U.$(OBJEXT)
+t_get_d_OBJECTS = t-get_d.$(OBJEXT)
t_get_d_LDADD = $(LDADD)
t_get_d_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_get_str_SOURCES = t-get_str.c
-t_get_str_OBJECTS = t-get_str$U.$(OBJEXT)
+t_get_str_OBJECTS = t-get_str.$(OBJEXT)
t_get_str_LDADD = $(LDADD)
t_get_str_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_inp_str_SOURCES = t-inp_str.c
-t_inp_str_OBJECTS = t-inp_str$U.$(OBJEXT)
+t_inp_str_OBJECTS = t-inp_str.$(OBJEXT)
t_inp_str_LDADD = $(LDADD)
t_inp_str_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
+t_inv_SOURCES = t-inv.c
+t_inv_OBJECTS = t-inv.$(OBJEXT)
+t_inv_LDADD = $(LDADD)
+t_inv_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+ $(top_builddir)/libgmp.la
t_md_2exp_SOURCES = t-md_2exp.c
-t_md_2exp_OBJECTS = t-md_2exp$U.$(OBJEXT)
+t_md_2exp_OBJECTS = t-md_2exp.$(OBJEXT)
t_md_2exp_LDADD = $(LDADD)
t_md_2exp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_set_f_SOURCES = t-set_f.c
-t_set_f_OBJECTS = t-set_f$U.$(OBJEXT)
+t_set_f_OBJECTS = t-set_f.$(OBJEXT)
t_set_f_LDADD = $(LDADD)
t_set_f_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_set_str_SOURCES = t-set_str.c
-t_set_str_OBJECTS = t-set_str$U.$(OBJEXT)
+t_set_str_OBJECTS = t-set_str.$(OBJEXT)
t_set_str_LDADD = $(LDADD)
t_set_str_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) \
$(LDFLAGS) -o $@
-SOURCES = t-aors.c t-cmp.c t-cmp_si.c t-cmp_ui.c t-equal.c t-get_d.c \
- t-get_str.c t-inp_str.c t-md_2exp.c t-set_f.c t-set_str.c
-DIST_SOURCES = t-aors.c t-cmp.c t-cmp_si.c t-cmp_ui.c t-equal.c \
- t-get_d.c t-get_str.c t-inp_str.c t-md_2exp.c t-set_f.c \
- t-set_str.c
+SOURCES = io.c reuse.c t-aors.c t-cmp.c t-cmp_si.c t-cmp_ui.c \
+ t-equal.c t-get_d.c t-get_str.c t-inp_str.c t-inv.c \
+ t-md_2exp.c t-set_f.c t-set_str.c
+DIST_SOURCES = io.c reuse.c t-aors.c t-cmp.c t-cmp_si.c t-cmp_ui.c \
+ t-equal.c t-get_d.c t-get_str.c t-inp_str.c t-inv.c \
+ t-md_2exp.c t-set_f.c t-set_str.c
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
ETAGS = etags
CTAGS = ctags
am__tty_colors = \
SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
STRIP = @STRIP@
TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
U_FOR_BUILD = @U_FOR_BUILD@
VERSION = @VERSION@
WITH_READLINE_01 = @WITH_READLINE_01@
mkdir_p = @mkdir_p@
mpn_objects = @mpn_objects@
mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-t-aors$(EXEEXT): $(t_aors_OBJECTS) $(t_aors_DEPENDENCIES)
+io$(EXEEXT): $(io_OBJECTS) $(io_DEPENDENCIES) $(EXTRA_io_DEPENDENCIES)
+ @rm -f io$(EXEEXT)
+ $(LINK) $(io_OBJECTS) $(io_LDADD) $(LIBS)
+reuse$(EXEEXT): $(reuse_OBJECTS) $(reuse_DEPENDENCIES) $(EXTRA_reuse_DEPENDENCIES)
+ @rm -f reuse$(EXEEXT)
+ $(LINK) $(reuse_OBJECTS) $(reuse_LDADD) $(LIBS)
+t-aors$(EXEEXT): $(t_aors_OBJECTS) $(t_aors_DEPENDENCIES) $(EXTRA_t_aors_DEPENDENCIES)
@rm -f t-aors$(EXEEXT)
$(LINK) $(t_aors_OBJECTS) $(t_aors_LDADD) $(LIBS)
-t-cmp$(EXEEXT): $(t_cmp_OBJECTS) $(t_cmp_DEPENDENCIES)
+t-cmp$(EXEEXT): $(t_cmp_OBJECTS) $(t_cmp_DEPENDENCIES) $(EXTRA_t_cmp_DEPENDENCIES)
@rm -f t-cmp$(EXEEXT)
$(LINK) $(t_cmp_OBJECTS) $(t_cmp_LDADD) $(LIBS)
-t-cmp_si$(EXEEXT): $(t_cmp_si_OBJECTS) $(t_cmp_si_DEPENDENCIES)
+t-cmp_si$(EXEEXT): $(t_cmp_si_OBJECTS) $(t_cmp_si_DEPENDENCIES) $(EXTRA_t_cmp_si_DEPENDENCIES)
@rm -f t-cmp_si$(EXEEXT)
$(LINK) $(t_cmp_si_OBJECTS) $(t_cmp_si_LDADD) $(LIBS)
-t-cmp_ui$(EXEEXT): $(t_cmp_ui_OBJECTS) $(t_cmp_ui_DEPENDENCIES)
+t-cmp_ui$(EXEEXT): $(t_cmp_ui_OBJECTS) $(t_cmp_ui_DEPENDENCIES) $(EXTRA_t_cmp_ui_DEPENDENCIES)
@rm -f t-cmp_ui$(EXEEXT)
$(LINK) $(t_cmp_ui_OBJECTS) $(t_cmp_ui_LDADD) $(LIBS)
-t-equal$(EXEEXT): $(t_equal_OBJECTS) $(t_equal_DEPENDENCIES)
+t-equal$(EXEEXT): $(t_equal_OBJECTS) $(t_equal_DEPENDENCIES) $(EXTRA_t_equal_DEPENDENCIES)
@rm -f t-equal$(EXEEXT)
$(LINK) $(t_equal_OBJECTS) $(t_equal_LDADD) $(LIBS)
-t-get_d$(EXEEXT): $(t_get_d_OBJECTS) $(t_get_d_DEPENDENCIES)
+t-get_d$(EXEEXT): $(t_get_d_OBJECTS) $(t_get_d_DEPENDENCIES) $(EXTRA_t_get_d_DEPENDENCIES)
@rm -f t-get_d$(EXEEXT)
$(LINK) $(t_get_d_OBJECTS) $(t_get_d_LDADD) $(LIBS)
-t-get_str$(EXEEXT): $(t_get_str_OBJECTS) $(t_get_str_DEPENDENCIES)
+t-get_str$(EXEEXT): $(t_get_str_OBJECTS) $(t_get_str_DEPENDENCIES) $(EXTRA_t_get_str_DEPENDENCIES)
@rm -f t-get_str$(EXEEXT)
$(LINK) $(t_get_str_OBJECTS) $(t_get_str_LDADD) $(LIBS)
-t-inp_str$(EXEEXT): $(t_inp_str_OBJECTS) $(t_inp_str_DEPENDENCIES)
+t-inp_str$(EXEEXT): $(t_inp_str_OBJECTS) $(t_inp_str_DEPENDENCIES) $(EXTRA_t_inp_str_DEPENDENCIES)
@rm -f t-inp_str$(EXEEXT)
$(LINK) $(t_inp_str_OBJECTS) $(t_inp_str_LDADD) $(LIBS)
-t-md_2exp$(EXEEXT): $(t_md_2exp_OBJECTS) $(t_md_2exp_DEPENDENCIES)
+t-inv$(EXEEXT): $(t_inv_OBJECTS) $(t_inv_DEPENDENCIES) $(EXTRA_t_inv_DEPENDENCIES)
+ @rm -f t-inv$(EXEEXT)
+ $(LINK) $(t_inv_OBJECTS) $(t_inv_LDADD) $(LIBS)
+t-md_2exp$(EXEEXT): $(t_md_2exp_OBJECTS) $(t_md_2exp_DEPENDENCIES) $(EXTRA_t_md_2exp_DEPENDENCIES)
@rm -f t-md_2exp$(EXEEXT)
$(LINK) $(t_md_2exp_OBJECTS) $(t_md_2exp_LDADD) $(LIBS)
-t-set_f$(EXEEXT): $(t_set_f_OBJECTS) $(t_set_f_DEPENDENCIES)
+t-set_f$(EXEEXT): $(t_set_f_OBJECTS) $(t_set_f_DEPENDENCIES) $(EXTRA_t_set_f_DEPENDENCIES)
@rm -f t-set_f$(EXEEXT)
$(LINK) $(t_set_f_OBJECTS) $(t_set_f_LDADD) $(LIBS)
-t-set_str$(EXEEXT): $(t_set_str_OBJECTS) $(t_set_str_DEPENDENCIES)
+t-set_str$(EXEEXT): $(t_set_str_OBJECTS) $(t_set_str_DEPENDENCIES) $(EXTRA_t_set_str_DEPENDENCIES)
@rm -f t-set_str$(EXEEXT)
$(LINK) $(t_set_str_OBJECTS) $(t_set_str_LDADD) $(LIBS)
distclean-compile:
-rm -f *.tab.c
-$(top_builddir)/ansi2knr:
- $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
- -test "$U" = "" || rm -f *_.c
.c.o:
$(COMPILE) -c $<
.c.lo:
$(LTCOMPILE) -c -o $@ $<
-t-aors_.c: t-aors.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-aors.c; then echo $(srcdir)/t-aors.c; else echo t-aors.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-cmp_.c: t-cmp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cmp.c; then echo $(srcdir)/t-cmp.c; else echo t-cmp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-cmp_si_.c: t-cmp_si.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cmp_si.c; then echo $(srcdir)/t-cmp_si.c; else echo t-cmp_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-cmp_ui_.c: t-cmp_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cmp_ui.c; then echo $(srcdir)/t-cmp_ui.c; else echo t-cmp_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-equal_.c: t-equal.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-equal.c; then echo $(srcdir)/t-equal.c; else echo t-equal.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-get_d_.c: t-get_d.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_d.c; then echo $(srcdir)/t-get_d.c; else echo t-get_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-get_str_.c: t-get_str.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_str.c; then echo $(srcdir)/t-get_str.c; else echo t-get_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-inp_str_.c: t-inp_str.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-inp_str.c; then echo $(srcdir)/t-inp_str.c; else echo t-inp_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-md_2exp_.c: t-md_2exp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-md_2exp.c; then echo $(srcdir)/t-md_2exp.c; else echo t-md_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-set_f_.c: t-set_f.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set_f.c; then echo $(srcdir)/t-set_f.c; else echo t-set_f.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-set_str_.c: t-set_str.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set_str.c; then echo $(srcdir)/t-set_str.c; else echo t-set_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-aors_.$(OBJEXT) t-aors_.lo t-cmp_.$(OBJEXT) t-cmp_.lo \
-t-cmp_si_.$(OBJEXT) t-cmp_si_.lo t-cmp_ui_.$(OBJEXT) t-cmp_ui_.lo \
-t-equal_.$(OBJEXT) t-equal_.lo t-get_d_.$(OBJEXT) t-get_d_.lo \
-t-get_str_.$(OBJEXT) t-get_str_.lo t-inp_str_.$(OBJEXT) t-inp_str_.lo \
-t-md_2exp_.$(OBJEXT) t-md_2exp_.lo t-set_f_.$(OBJEXT) t-set_f_.lo \
-t-set_str_.$(OBJEXT) t-set_str_.lo : $(ANSI2KNR)
mostlyclean-libtool:
-rm -f *.lo
fi; \
dashes=`echo "$$dashes" | sed s/./=/g`; \
if test "$$failed" -eq 0; then \
- echo "$$grn$$dashes"; \
+ col="$$grn"; \
else \
- echo "$$red$$dashes"; \
+ col="$$red"; \
fi; \
- echo "$$banner"; \
- test -z "$$skipped" || echo "$$skipped"; \
- test -z "$$report" || echo "$$report"; \
- echo "$$dashes$$std"; \
+ echo "$${col}$$dashes$${std}"; \
+ echo "$${col}$$banner$${std}"; \
+ test -z "$$skipped" || echo "$${col}$$skipped$${std}"; \
+ test -z "$$report" || echo "$${col}$$report$${std}"; \
+ echo "$${col}$$dashes$${std}"; \
test "$$failed" -eq 0; \
else :; fi
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
mostlyclean: mostlyclean-am
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
uninstall-am:
-.MAKE: $(top_builddir)/ansi2knr check-am install-am install-strip
+.MAKE: check-am install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \
clean-checkPROGRAMS clean-generic clean-libtool ctags \
install-ps install-ps-am install-strip installcheck \
installcheck-am installdirs maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
- mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
- pdf-am ps ps-am tags uninstall uninstall-am
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags uninstall uninstall-am
$(top_builddir)/tests/libtests.la:
--- /dev/null
+/* Test conversion and I/O using mpq_out_str and mpq_inp_str.
+
+Copyright 1993, 1994, 1996, 2000, 2001, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include "config.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#if HAVE_UNISTD_H
+#include <unistd.h> /* for unlink */
+#endif
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#define FILENAME "io.tmp"
+
+void
+debug_mp (mpq_t x, int base)
+{
+ mpq_out_str (stdout, base, x); fputc ('\n', stdout);
+}
+
+int
+main (int argc, char **argv)
+{
+ mpq_t op1, op2;
+ mp_size_t size;
+ int i;
+ int reps = 10000;
+ FILE *fp;
+ int base;
+ gmp_randstate_ptr rands;
+ mpz_t bs;
+ unsigned long bsi, size_range;
+ size_t nread;
+
+ tests_start ();
+ rands = RANDS;
+
+ mpz_init (bs);
+
+ if (argc == 2)
+ reps = atoi (argv[1]);
+
+ mpq_init (op1);
+ mpq_init (op2);
+
+ fp = fopen (FILENAME, "w+");
+
+ for (i = 0; i < reps; i++)
+ {
+ mpz_urandomb (bs, rands, 32);
+ size_range = mpz_get_ui (bs) % 10 + 2;
+
+ mpz_urandomb (bs, rands, size_range);
+ size = mpz_get_ui (bs);
+ mpz_errandomb (mpq_numref(op1), rands, 512L);
+ mpz_errandomb_nonzero (mpq_denref(op1), rands, 512L);
+ mpq_canonicalize (op1);
+
+ mpz_urandomb (bs, rands, 1);
+ bsi = mpz_get_ui (bs);
+ if ((bsi & 1) != 0)
+ mpq_neg (op1, op1);
+
+ mpz_urandomb (bs, rands, 16);
+ bsi = mpz_get_ui (bs);
+ base = bsi % 36 + 1;
+ if (base == 1)
+ base = 0;
+
+ rewind (fp);
+ if (mpq_out_str (fp, base, op1) == 0
+ || putc (' ', fp) == EOF
+ || fflush (fp) != 0)
+ {
+ printf ("mpq_out_str write error\n");
+ abort ();
+ }
+
+ rewind (fp);
+ nread = mpq_inp_str (op2, fp, base);
+ if (nread == 0)
+ {
+ if (ferror (fp))
+ printf ("mpq_inp_str stream read error\n");
+ else
+ printf ("mpq_inp_str data conversion error\n");
+ abort ();
+ }
+
+ if (nread != ftell(fp))
+ {
+ printf ("mpq_inp_str nread doesn't match ftell\n");
+ printf (" nread %lu\n", (unsigned long) nread);
+ printf (" ftell %ld\n", ftell(fp));
+ abort ();
+ }
+
+ if (mpq_cmp (op1, op2))
+ {
+ printf ("ERROR\n");
+ printf ("op1 = "); debug_mp (op1, -16);
+ printf ("op2 = "); debug_mp (op2, -16);
+ printf ("base = %d\n", base);
+ abort ();
+ }
+ }
+
+ fclose (fp);
+
+ unlink (FILENAME);
+
+ mpz_clear (bs);
+ mpq_clear (op1);
+ mpq_clear (op2);
+
+ tests_end ();
+ exit (0);
+}
--- /dev/null
+/* Test that routines allow reusing a source variable as destination.
+
+Copyright 1996, 2000, 2001, 2002, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+#if __GMP_LIBGMP_DLL
+
+/* FIXME: When linking to a DLL libgmp, mpq_add etc can't be used as
+ initializers for global variables because they're effectively global
+ variables (function pointers) themselves. Perhaps calling a test
+ function successively with mpq_add etc would be better. */
+
+int
+main (void)
+{
+ printf ("Test suppressed for windows DLL\n");
+ exit (0);
+}
+
+
+#else /* ! DLL_EXPORT */
+
+#ifndef SIZE
+#define SIZE 16
+#endif
+
+void dump_abort (const char *, mpq_t, mpq_t);
+
+typedef void (*dss_func) (mpq_ptr, mpq_srcptr, mpq_srcptr);
+
+dss_func dss_funcs[] =
+{
+ mpq_div, mpq_add, mpq_mul, mpq_sub,
+};
+
+const char *dss_func_names[] =
+{
+ "mpq_div", "mpq_add", "mpq_mul", "mpq_sub",
+};
+
+typedef void (*ds_func) (mpq_ptr, mpq_srcptr);
+
+ds_func ds_funcs[] =
+{
+ mpq_abs, mpq_neg,
+};
+
+const char *ds_func_names[] =
+{
+ "mpq_abs", "mpq_neg",
+};
+
+typedef void (*dsi_func) (mpq_ptr, mpq_srcptr, unsigned long int);
+
+dsi_func dsi_funcs[] =
+{
+ mpq_mul_2exp, mpq_div_2exp
+};
+
+const char *dsi_func_names[] =
+{
+ "mpq_mul_2exp", "mpq_div_2exp"
+};
+
+int
+main (int argc, char **argv)
+{
+ int i;
+ int pass, reps = 100;
+ mpq_t in1, in2, out1;
+ unsigned long int randbits, in2i;
+ mpq_t res1, res2, res3;
+ gmp_randstate_ptr rands;
+
+ tests_start ();
+
+ if (argc > 1)
+ reps = strtol (argv[1], 0, 0);
+
+ rands = RANDS;
+
+ mpq_init (in1);
+ mpq_init (in2);
+ mpq_init (out1);
+ mpq_init (res1);
+ mpq_init (res2);
+ mpq_init (res3);
+
+ for (pass = 1; pass <= reps; pass++)
+ {
+ randbits = urandom ();
+
+ if (randbits & 1)
+ {
+ mpq_clear (in1);
+ mpq_init (in1);
+ }
+ randbits >>= 1;
+ mpz_errandomb (mpq_numref(in1), rands, 512L);
+ mpz_errandomb_nonzero (mpq_denref(in1), rands, 512L);
+ if (randbits & 1)
+ mpz_neg (mpq_numref(in1),mpq_numref(in1));
+ randbits >>= 1;
+ mpq_canonicalize (in1);
+
+ if (randbits & 1)
+ {
+ mpq_clear (in2);
+ mpq_init (in2);
+ }
+ randbits >>= 1;
+ mpz_errandomb (mpq_numref(in2), rands, 512L);
+ mpz_errandomb_nonzero (mpq_denref(in2), rands, 512L);
+ if (randbits & 1)
+ mpz_neg (mpq_numref(in2),mpq_numref(in2));
+ randbits >>= 1;
+ mpq_canonicalize (in2);
+
+ for (i = 0; i < sizeof (dss_funcs) / sizeof (dss_func); i++)
+ {
+ /* Don't divide by 0. */
+ if (i == 0 && mpq_cmp_ui (in2, 0, 1) == 0)
+ continue;
+
+ if (randbits & 1)
+ {
+ mpq_clear (res1);
+ mpq_init (res1);
+ }
+ randbits >>= 1;
+
+ (dss_funcs[i]) (res1, in1, in2);
+
+ mpq_set (out1, in1);
+ (dss_funcs[i]) (out1, out1, in2);
+ mpq_set (res2, out1);
+
+ mpq_set (out1, in2);
+ (dss_funcs[i]) (out1, in1, out1);
+ mpq_set (res3, out1);
+
+ if (mpq_cmp (res1, res2) != 0)
+ dump_abort (dss_func_names[i], res1, res2);
+ if (mpq_cmp (res1, res3) != 0)
+ dump_abort (dss_func_names[i], res1, res3);
+ }
+
+ for (i = 0; i < sizeof (ds_funcs) / sizeof (ds_func); i++)
+ {
+ if (randbits & 1)
+ {
+ mpq_clear (res1);
+ mpq_init (res1);
+ }
+ randbits >>= 1;
+ (ds_funcs[i]) (res1, in1);
+
+ mpq_set (out1, in1);
+ (ds_funcs[i]) (out1, out1);
+ mpq_set (res2, out1);
+
+ if (mpq_cmp (res1, res2) != 0)
+ dump_abort (ds_func_names[i], res1, res2);
+ }
+
+ in2i = urandom () % 65536;
+ for (i = 0; i < sizeof (dsi_funcs) / sizeof (dsi_func); i++)
+ {
+ if (randbits & 1)
+ {
+ mpq_clear (res1);
+ mpq_init (res1);
+ }
+ randbits >>= 1;
+
+ (dsi_funcs[i]) (res1, in1, in2i);
+
+ mpq_set (out1, in1);
+ (dsi_funcs[i]) (out1, out1, in2i);
+ mpq_set (res2, out1);
+
+ if (mpq_cmp (res1, res2) != 0)
+ dump_abort (dsi_func_names[i], res1, res2);
+ }
+
+ }
+
+ mpq_clear (in1);
+ mpq_clear (in2);
+ mpq_clear (out1);
+ mpq_clear (res1);
+ mpq_clear (res2);
+ mpq_clear (res3);
+
+ tests_end ();
+ exit (0);
+}
+
+void
+dump_abort (const char *name, mpq_t res1, mpq_t res2)
+{
+ printf ("failure in %s:\n", name);
+ mpq_trace (" res1 ", res1);
+ mpq_trace (" res2 ", res2);
+ abort ();
+}
+
+#endif /* ! DLL_EXPORT */
Copyright 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include "config.h"
Copyright 1996, 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
#include "gmp-impl.h"
#include "tests.h"
-#define NUM(x) (&((x)->_mp_num))
-#define DEN(x) (&((x)->_mp_den))
-
#define SGN(x) ((x) < 0 ? -1 : (x) > 0 ? 1 : 0)
int
Copyright 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 1996, 1997, 2001, 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
#include "gmp-impl.h"
#include "tests.h"
-#define NUM(x) (&((x)->_mp_num))
-#define DEN(x) (&((x)->_mp_den))
-
#define SGN(x) ((x) < 0 ? -1 : (x) > 0 ? 1 : 0)
int
Copyright 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
SET4 (x, 1,1,2,3,4, 3,88,5,6,7);
SET4 (y, 4,1,2,3,4, 3,99,5,6,7);
check_all (x, y, 0);
+ SET4 (x, 4,1,2,3,4, 3,88,5,6,7);
+ SET4 (y, 4,1,2,3,4, 2,99,5,6,7);
+ check_all (x, y, 0);
mpq_clear (x);
mpq_clear (y);
/* Test mpq_get_d and mpq_set_d
-Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2003 Free Software
-Foundation, Inc.
+Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002, 2003, 2012, 2013 Free
+Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
/* VAX D floats only have an 8 bit signed exponent, so anything 2^128 or
bigger will overflow, that being 4 limbs. */
-#if defined (__vax__) && SIZE > 4
+#if defined (__vax) || defined (__vax__) && SIZE > 4
#undef SIZE
#define SIZE 4
#define EPSIZE 3
#define EPSIZE SIZE
#endif
-void dump __GMP_PROTO ((mpq_t));
+void dump (mpq_t);
void
check_monotonic (int argc, char **argv)
}
}
+#define MAXEXP 500
+
+#if defined (__vax) || defined (__vax__)
+#undef MAXEXP
+#define MAXEXP 30
+#endif
+
void
check_random (int argc, char **argv)
{
- double d, d2, nd, dd;
+ gmp_randstate_ptr rands = RANDS;
+
+ double d;
mpq_t q;
- mp_limb_t rp[LIMBS_PER_DOUBLE + 1];
+ mpz_t a, t;
+ int exp;
+
int test, reps = 100000;
- int i;
if (argc == 2)
reps = 100 * atoi (argv[1]);
mpq_init (q);
+ mpz_init (a);
+ mpz_init (t);
for (test = 0; test < reps; test++)
{
- mpn_random2 (rp, LIMBS_PER_DOUBLE + 1);
- d = 0.0;
- for (i = LIMBS_PER_DOUBLE - 1; i >= 0; i--)
- d = d * MP_BASE_AS_DOUBLE + rp[i];
- d = my_ldexp (d, (int) (rp[LIMBS_PER_DOUBLE] % 1000) - 500);
+ mpz_rrandomb (a, rands, 53);
+ mpz_urandomb (t, rands, 32);
+ exp = mpz_get_ui (t) % (2*MAXEXP) - MAXEXP;
+
+ d = my_ldexp (mpz_get_d (a), exp);
mpq_set_d (q, d);
- nd = mpz_get_d (mpq_numref (q));
- dd = mpz_get_d (mpq_denref (q));
- d2 = nd / dd;
- if (d != d2)
+ /* Check that n/d = a * 2^exp, or
+ d*a 2^{exp} = n */
+ mpz_mul (t, a, mpq_denref (q));
+ if (exp > 0)
+ mpz_mul_2exp (t, t, exp);
+ else
+ {
+ if (!mpz_divisible_2exp_p (t, -exp))
+ goto fail;
+ mpz_div_2exp (t, t, -exp);
+ }
+ if (mpz_cmp (t, mpq_numref (q)) != 0)
{
+ fail:
printf ("ERROR (check_random test %d): bad mpq_set_d results\n", test);
printf ("%.16g\n", d);
- printf ("%.16g\n", d2);
+ gmp_printf ("%Qd\n", q);
abort ();
}
}
mpq_clear (q);
+ mpz_clear (t);
+ mpz_clear (a);
}
void
Copyright 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2001, 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include "config.h"
--- /dev/null
+/* Test mpq_inv (and set/get_num/den).
+
+Copyright 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+int
+main (int argc, char **argv)
+{
+ mpq_t a, b;
+ mpz_t m, n;
+ const char* s = "-420000000000000000000000";
+
+ tests_start ();
+
+ mpq_inits (a, b, (mpq_ptr)0);
+ mpz_inits (m, n, (mpz_ptr)0);
+
+ mpz_set_ui (m, 13);
+ mpq_set_den (a, m);
+ mpz_set_str (m, s, 0);
+ mpq_set_num (a, m);
+ MPQ_CHECK_FORMAT (a);
+ mpq_inv (b, a);
+ MPQ_CHECK_FORMAT (b);
+ mpq_get_num (n, b);
+ ASSERT_ALWAYS (mpz_cmp_si (n, -13) == 0);
+ mpq_neg (b, b);
+ mpq_inv (a, b);
+ MPQ_CHECK_FORMAT (a);
+ mpq_inv (b, b);
+ MPQ_CHECK_FORMAT (b);
+ mpq_get_den (n, b);
+ ASSERT_ALWAYS (mpz_cmp_ui (n, 13) == 0);
+ mpq_get_num (n, a);
+ mpz_add (n, n, m);
+ ASSERT_ALWAYS (mpz_sgn (n) == 0);
+
+ mpq_clears (a, b, (mpq_ptr)0);
+ mpz_clears (m, n, (mpz_ptr)0);
+
+ tests_end ();
+ return 0;
+}
Copyright 2000, 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
const char *den;
};
+void
+check_random ()
+{
+ gmp_randstate_ptr rands;
+ mpz_t bs;
+ unsigned long arg_size, size_range;
+ mpq_t q, r;
+ int i;
+ mp_bitcnt_t shift;
+ int reps = 10000;
+
+ rands = RANDS;
+
+ mpz_init (bs);
+ mpq_init (q);
+ mpq_init (r);
+
+ for (i = 0; i < reps; i++)
+ {
+ mpz_urandomb (bs, rands, 32);
+ size_range = mpz_get_ui (bs) % 11 + 2; /* 0..4096 bit operands */
+
+ mpz_urandomb (bs, rands, size_range);
+ arg_size = mpz_get_ui (bs);
+ mpz_rrandomb (mpq_numref (q), rands, arg_size);
+ do
+ {
+ mpz_urandomb (bs, rands, size_range);
+ arg_size = mpz_get_ui (bs);
+ mpz_rrandomb (mpq_denref (q), rands, arg_size);
+ }
+ while (mpz_sgn (mpq_denref (q)) == 0);
+
+ /* We now have a random rational in q, albeit an unnormalised one. The
+ lack of normalisation should not matter here, so let's save the time a
+ gcd would require. */
+
+ mpz_urandomb (bs, rands, 32);
+ shift = mpz_get_ui (bs) % 4096;
+
+ mpq_mul_2exp (r, q, shift);
+
+ if (mpq_cmp (r, q) < 0)
+ {
+ printf ("mpq_mul_2exp wrong on random\n");
+ abort ();
+ }
+
+ mpq_div_2exp (r, r, shift);
+
+ if (mpq_cmp (r, q) != 0)
+ {
+ printf ("mpq_mul_2exp or mpq_div_2exp wrong on random\n");
+ abort ();
+ }
+ }
+ mpq_clear (q);
+ mpq_clear (r);
+ mpz_clear (bs);
+}
+
int
-main (void)
+main (int argc, char **argv)
{
static const struct {
struct pair_t left;
{ {"1","0x10000000000000000"}, 3, {"1","0x2000000000000000"} },
};
- void (*fun) __GMP_PROTO ((mpq_ptr, mpq_srcptr, unsigned long));
+ void (*fun) (mpq_ptr, mpq_srcptr, unsigned long);
const struct pair_t *p_start, *p_want;
const char *name;
mpq_t sep, got, want;
}
}
+ check_random ();
+
mpq_clear (sep);
mpq_clear (got);
mpq_clear (want);
Copyright 2000, 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
# Copyright 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2009, 2012 Free Software
# Foundation, Inc.
#
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+# Public License for more details.
#
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/.
INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
convert io t-inp_str logic bit t-powm t-powm_ui t-pow t-div_2exp reuse \
t-root t-perfsqr t-perfpow t-jac t-bin t-get_d t-get_d_2exp t-get_si \
t-set_d t-set_si \
- t-fac_ui t-fib_ui t-lucnum_ui t-scan t-fits \
+ t-fac_ui t-mfac_uiui t-primorial_ui t-fib_ui t-lucnum_ui t-scan t-fits \
t-divis t-divis_2exp t-cong t-cong_2exp t-sizeinbase t-set_str \
t-aorsmul t-cmp_d t-cmp_si t-hamdist t-oddeven t-popcount t-set_f \
- t-io_raw t-import t-export t-pprime_p t-nextprime
+ t-io_raw t-import t-export t-pprime_p t-nextprime t-remove
TESTS = $(check_PROGRAMS)
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# Copyright 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2009, 2012 Free Software
# Foundation, Inc.
#
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+# Public License for more details.
#
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/.
VPATH = @srcdir@
+am__make_dryrun = \
+ { \
+ am__dry=no; \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
+ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+ *) \
+ for am__flg in $$MAKEFLAGS; do \
+ case $$am__flg in \
+ *=*|--*) ;; \
+ *n*) am__dry=yes; break;; \
+ esac; \
+ done;; \
+ esac; \
+ test $$am__dry = yes; \
+ }
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
check_PROGRAMS = t-addsub$(EXEEXT) t-cmp$(EXEEXT) t-mul$(EXEEXT) \
t-mul_i$(EXEEXT) t-tdiv$(EXEEXT) t-tdiv_ui$(EXEEXT) \
t-fdiv$(EXEEXT) t-fdiv_ui$(EXEEXT) t-cdiv_ui$(EXEEXT) \
t-perfpow$(EXEEXT) t-jac$(EXEEXT) t-bin$(EXEEXT) \
t-get_d$(EXEEXT) t-get_d_2exp$(EXEEXT) t-get_si$(EXEEXT) \
t-set_d$(EXEEXT) t-set_si$(EXEEXT) t-fac_ui$(EXEEXT) \
- t-fib_ui$(EXEEXT) t-lucnum_ui$(EXEEXT) t-scan$(EXEEXT) \
- t-fits$(EXEEXT) t-divis$(EXEEXT) t-divis_2exp$(EXEEXT) \
- t-cong$(EXEEXT) t-cong_2exp$(EXEEXT) t-sizeinbase$(EXEEXT) \
- t-set_str$(EXEEXT) t-aorsmul$(EXEEXT) t-cmp_d$(EXEEXT) \
- t-cmp_si$(EXEEXT) t-hamdist$(EXEEXT) t-oddeven$(EXEEXT) \
- t-popcount$(EXEEXT) t-set_f$(EXEEXT) t-io_raw$(EXEEXT) \
- t-import$(EXEEXT) t-export$(EXEEXT) t-pprime_p$(EXEEXT) \
- t-nextprime$(EXEEXT)
+ t-mfac_uiui$(EXEEXT) t-primorial_ui$(EXEEXT) t-fib_ui$(EXEEXT) \
+ t-lucnum_ui$(EXEEXT) t-scan$(EXEEXT) t-fits$(EXEEXT) \
+ t-divis$(EXEEXT) t-divis_2exp$(EXEEXT) t-cong$(EXEEXT) \
+ t-cong_2exp$(EXEEXT) t-sizeinbase$(EXEEXT) t-set_str$(EXEEXT) \
+ t-aorsmul$(EXEEXT) t-cmp_d$(EXEEXT) t-cmp_si$(EXEEXT) \
+ t-hamdist$(EXEEXT) t-oddeven$(EXEEXT) t-popcount$(EXEEXT) \
+ t-set_f$(EXEEXT) t-io_raw$(EXEEXT) t-import$(EXEEXT) \
+ t-export$(EXEEXT) t-pprime_p$(EXEEXT) t-nextprime$(EXEEXT) \
+ t-remove$(EXEEXT)
subdir = tests/mpz
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/configure.in
+ $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
bit_SOURCES = bit.c
-bit_OBJECTS = bit$U.$(OBJEXT)
+bit_OBJECTS = bit.$(OBJEXT)
bit_LDADD = $(LDADD)
bit_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
convert_SOURCES = convert.c
-convert_OBJECTS = convert$U.$(OBJEXT)
+convert_OBJECTS = convert.$(OBJEXT)
convert_LDADD = $(LDADD)
convert_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
dive_SOURCES = dive.c
-dive_OBJECTS = dive$U.$(OBJEXT)
+dive_OBJECTS = dive.$(OBJEXT)
dive_LDADD = $(LDADD)
dive_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
dive_ui_SOURCES = dive_ui.c
-dive_ui_OBJECTS = dive_ui$U.$(OBJEXT)
+dive_ui_OBJECTS = dive_ui.$(OBJEXT)
dive_ui_LDADD = $(LDADD)
dive_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
io_SOURCES = io.c
-io_OBJECTS = io$U.$(OBJEXT)
+io_OBJECTS = io.$(OBJEXT)
io_LDADD = $(LDADD)
io_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
logic_SOURCES = logic.c
-logic_OBJECTS = logic$U.$(OBJEXT)
+logic_OBJECTS = logic.$(OBJEXT)
logic_LDADD = $(LDADD)
logic_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
reuse_SOURCES = reuse.c
-reuse_OBJECTS = reuse$U.$(OBJEXT)
+reuse_OBJECTS = reuse.$(OBJEXT)
reuse_LDADD = $(LDADD)
reuse_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_addsub_SOURCES = t-addsub.c
-t_addsub_OBJECTS = t-addsub$U.$(OBJEXT)
+t_addsub_OBJECTS = t-addsub.$(OBJEXT)
t_addsub_LDADD = $(LDADD)
t_addsub_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_aorsmul_SOURCES = t-aorsmul.c
-t_aorsmul_OBJECTS = t-aorsmul$U.$(OBJEXT)
+t_aorsmul_OBJECTS = t-aorsmul.$(OBJEXT)
t_aorsmul_LDADD = $(LDADD)
t_aorsmul_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_bin_SOURCES = t-bin.c
-t_bin_OBJECTS = t-bin$U.$(OBJEXT)
+t_bin_OBJECTS = t-bin.$(OBJEXT)
t_bin_LDADD = $(LDADD)
t_bin_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_cdiv_ui_SOURCES = t-cdiv_ui.c
-t_cdiv_ui_OBJECTS = t-cdiv_ui$U.$(OBJEXT)
+t_cdiv_ui_OBJECTS = t-cdiv_ui.$(OBJEXT)
t_cdiv_ui_LDADD = $(LDADD)
t_cdiv_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_cmp_SOURCES = t-cmp.c
-t_cmp_OBJECTS = t-cmp$U.$(OBJEXT)
+t_cmp_OBJECTS = t-cmp.$(OBJEXT)
t_cmp_LDADD = $(LDADD)
t_cmp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_cmp_d_SOURCES = t-cmp_d.c
-t_cmp_d_OBJECTS = t-cmp_d$U.$(OBJEXT)
+t_cmp_d_OBJECTS = t-cmp_d.$(OBJEXT)
t_cmp_d_LDADD = $(LDADD)
t_cmp_d_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_cmp_si_SOURCES = t-cmp_si.c
-t_cmp_si_OBJECTS = t-cmp_si$U.$(OBJEXT)
+t_cmp_si_OBJECTS = t-cmp_si.$(OBJEXT)
t_cmp_si_LDADD = $(LDADD)
t_cmp_si_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_cong_SOURCES = t-cong.c
-t_cong_OBJECTS = t-cong$U.$(OBJEXT)
+t_cong_OBJECTS = t-cong.$(OBJEXT)
t_cong_LDADD = $(LDADD)
t_cong_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_cong_2exp_SOURCES = t-cong_2exp.c
-t_cong_2exp_OBJECTS = t-cong_2exp$U.$(OBJEXT)
+t_cong_2exp_OBJECTS = t-cong_2exp.$(OBJEXT)
t_cong_2exp_LDADD = $(LDADD)
t_cong_2exp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_div_2exp_SOURCES = t-div_2exp.c
-t_div_2exp_OBJECTS = t-div_2exp$U.$(OBJEXT)
+t_div_2exp_OBJECTS = t-div_2exp.$(OBJEXT)
t_div_2exp_LDADD = $(LDADD)
t_div_2exp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_divis_SOURCES = t-divis.c
-t_divis_OBJECTS = t-divis$U.$(OBJEXT)
+t_divis_OBJECTS = t-divis.$(OBJEXT)
t_divis_LDADD = $(LDADD)
t_divis_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_divis_2exp_SOURCES = t-divis_2exp.c
-t_divis_2exp_OBJECTS = t-divis_2exp$U.$(OBJEXT)
+t_divis_2exp_OBJECTS = t-divis_2exp.$(OBJEXT)
t_divis_2exp_LDADD = $(LDADD)
t_divis_2exp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_export_SOURCES = t-export.c
-t_export_OBJECTS = t-export$U.$(OBJEXT)
+t_export_OBJECTS = t-export.$(OBJEXT)
t_export_LDADD = $(LDADD)
t_export_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_fac_ui_SOURCES = t-fac_ui.c
-t_fac_ui_OBJECTS = t-fac_ui$U.$(OBJEXT)
+t_fac_ui_OBJECTS = t-fac_ui.$(OBJEXT)
t_fac_ui_LDADD = $(LDADD)
t_fac_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_fdiv_SOURCES = t-fdiv.c
-t_fdiv_OBJECTS = t-fdiv$U.$(OBJEXT)
+t_fdiv_OBJECTS = t-fdiv.$(OBJEXT)
t_fdiv_LDADD = $(LDADD)
t_fdiv_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_fdiv_ui_SOURCES = t-fdiv_ui.c
-t_fdiv_ui_OBJECTS = t-fdiv_ui$U.$(OBJEXT)
+t_fdiv_ui_OBJECTS = t-fdiv_ui.$(OBJEXT)
t_fdiv_ui_LDADD = $(LDADD)
t_fdiv_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_fib_ui_SOURCES = t-fib_ui.c
-t_fib_ui_OBJECTS = t-fib_ui$U.$(OBJEXT)
+t_fib_ui_OBJECTS = t-fib_ui.$(OBJEXT)
t_fib_ui_LDADD = $(LDADD)
t_fib_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_fits_SOURCES = t-fits.c
-t_fits_OBJECTS = t-fits$U.$(OBJEXT)
+t_fits_OBJECTS = t-fits.$(OBJEXT)
t_fits_LDADD = $(LDADD)
t_fits_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_gcd_SOURCES = t-gcd.c
-t_gcd_OBJECTS = t-gcd$U.$(OBJEXT)
+t_gcd_OBJECTS = t-gcd.$(OBJEXT)
t_gcd_LDADD = $(LDADD)
t_gcd_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_gcd_ui_SOURCES = t-gcd_ui.c
-t_gcd_ui_OBJECTS = t-gcd_ui$U.$(OBJEXT)
+t_gcd_ui_OBJECTS = t-gcd_ui.$(OBJEXT)
t_gcd_ui_LDADD = $(LDADD)
t_gcd_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_get_d_SOURCES = t-get_d.c
-t_get_d_OBJECTS = t-get_d$U.$(OBJEXT)
+t_get_d_OBJECTS = t-get_d.$(OBJEXT)
t_get_d_LDADD = $(LDADD)
t_get_d_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_get_d_2exp_SOURCES = t-get_d_2exp.c
-t_get_d_2exp_OBJECTS = t-get_d_2exp$U.$(OBJEXT)
+t_get_d_2exp_OBJECTS = t-get_d_2exp.$(OBJEXT)
t_get_d_2exp_LDADD = $(LDADD)
t_get_d_2exp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_get_si_SOURCES = t-get_si.c
-t_get_si_OBJECTS = t-get_si$U.$(OBJEXT)
+t_get_si_OBJECTS = t-get_si.$(OBJEXT)
t_get_si_LDADD = $(LDADD)
t_get_si_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_hamdist_SOURCES = t-hamdist.c
-t_hamdist_OBJECTS = t-hamdist$U.$(OBJEXT)
+t_hamdist_OBJECTS = t-hamdist.$(OBJEXT)
t_hamdist_LDADD = $(LDADD)
t_hamdist_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_import_SOURCES = t-import.c
-t_import_OBJECTS = t-import$U.$(OBJEXT)
+t_import_OBJECTS = t-import.$(OBJEXT)
t_import_LDADD = $(LDADD)
t_import_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_inp_str_SOURCES = t-inp_str.c
-t_inp_str_OBJECTS = t-inp_str$U.$(OBJEXT)
+t_inp_str_OBJECTS = t-inp_str.$(OBJEXT)
t_inp_str_LDADD = $(LDADD)
t_inp_str_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_invert_SOURCES = t-invert.c
-t_invert_OBJECTS = t-invert$U.$(OBJEXT)
+t_invert_OBJECTS = t-invert.$(OBJEXT)
t_invert_LDADD = $(LDADD)
t_invert_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_io_raw_SOURCES = t-io_raw.c
-t_io_raw_OBJECTS = t-io_raw$U.$(OBJEXT)
+t_io_raw_OBJECTS = t-io_raw.$(OBJEXT)
t_io_raw_LDADD = $(LDADD)
t_io_raw_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_jac_SOURCES = t-jac.c
-t_jac_OBJECTS = t-jac$U.$(OBJEXT)
+t_jac_OBJECTS = t-jac.$(OBJEXT)
t_jac_LDADD = $(LDADD)
t_jac_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_lcm_SOURCES = t-lcm.c
-t_lcm_OBJECTS = t-lcm$U.$(OBJEXT)
+t_lcm_OBJECTS = t-lcm.$(OBJEXT)
t_lcm_LDADD = $(LDADD)
t_lcm_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_lucnum_ui_SOURCES = t-lucnum_ui.c
-t_lucnum_ui_OBJECTS = t-lucnum_ui$U.$(OBJEXT)
+t_lucnum_ui_OBJECTS = t-lucnum_ui.$(OBJEXT)
t_lucnum_ui_LDADD = $(LDADD)
t_lucnum_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
+t_mfac_uiui_SOURCES = t-mfac_uiui.c
+t_mfac_uiui_OBJECTS = t-mfac_uiui.$(OBJEXT)
+t_mfac_uiui_LDADD = $(LDADD)
+t_mfac_uiui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+ $(top_builddir)/libgmp.la
t_mul_SOURCES = t-mul.c
-t_mul_OBJECTS = t-mul$U.$(OBJEXT)
+t_mul_OBJECTS = t-mul.$(OBJEXT)
t_mul_LDADD = $(LDADD)
t_mul_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_mul_i_SOURCES = t-mul_i.c
-t_mul_i_OBJECTS = t-mul_i$U.$(OBJEXT)
+t_mul_i_OBJECTS = t-mul_i.$(OBJEXT)
t_mul_i_LDADD = $(LDADD)
t_mul_i_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_nextprime_SOURCES = t-nextprime.c
-t_nextprime_OBJECTS = t-nextprime$U.$(OBJEXT)
+t_nextprime_OBJECTS = t-nextprime.$(OBJEXT)
t_nextprime_LDADD = $(LDADD)
t_nextprime_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_oddeven_SOURCES = t-oddeven.c
-t_oddeven_OBJECTS = t-oddeven$U.$(OBJEXT)
+t_oddeven_OBJECTS = t-oddeven.$(OBJEXT)
t_oddeven_LDADD = $(LDADD)
t_oddeven_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_perfpow_SOURCES = t-perfpow.c
-t_perfpow_OBJECTS = t-perfpow$U.$(OBJEXT)
+t_perfpow_OBJECTS = t-perfpow.$(OBJEXT)
t_perfpow_LDADD = $(LDADD)
t_perfpow_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_perfsqr_SOURCES = t-perfsqr.c
-t_perfsqr_OBJECTS = t-perfsqr$U.$(OBJEXT)
+t_perfsqr_OBJECTS = t-perfsqr.$(OBJEXT)
t_perfsqr_LDADD = $(LDADD)
t_perfsqr_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_popcount_SOURCES = t-popcount.c
-t_popcount_OBJECTS = t-popcount$U.$(OBJEXT)
+t_popcount_OBJECTS = t-popcount.$(OBJEXT)
t_popcount_LDADD = $(LDADD)
t_popcount_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_pow_SOURCES = t-pow.c
-t_pow_OBJECTS = t-pow$U.$(OBJEXT)
+t_pow_OBJECTS = t-pow.$(OBJEXT)
t_pow_LDADD = $(LDADD)
t_pow_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_powm_SOURCES = t-powm.c
-t_powm_OBJECTS = t-powm$U.$(OBJEXT)
+t_powm_OBJECTS = t-powm.$(OBJEXT)
t_powm_LDADD = $(LDADD)
t_powm_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_powm_ui_SOURCES = t-powm_ui.c
-t_powm_ui_OBJECTS = t-powm_ui$U.$(OBJEXT)
+t_powm_ui_OBJECTS = t-powm_ui.$(OBJEXT)
t_powm_ui_LDADD = $(LDADD)
t_powm_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_pprime_p_SOURCES = t-pprime_p.c
-t_pprime_p_OBJECTS = t-pprime_p$U.$(OBJEXT)
+t_pprime_p_OBJECTS = t-pprime_p.$(OBJEXT)
t_pprime_p_LDADD = $(LDADD)
t_pprime_p_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
+t_primorial_ui_SOURCES = t-primorial_ui.c
+t_primorial_ui_OBJECTS = t-primorial_ui.$(OBJEXT)
+t_primorial_ui_LDADD = $(LDADD)
+t_primorial_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+ $(top_builddir)/libgmp.la
+t_remove_SOURCES = t-remove.c
+t_remove_OBJECTS = t-remove.$(OBJEXT)
+t_remove_LDADD = $(LDADD)
+t_remove_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
+ $(top_builddir)/libgmp.la
t_root_SOURCES = t-root.c
-t_root_OBJECTS = t-root$U.$(OBJEXT)
+t_root_OBJECTS = t-root.$(OBJEXT)
t_root_LDADD = $(LDADD)
t_root_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_scan_SOURCES = t-scan.c
-t_scan_OBJECTS = t-scan$U.$(OBJEXT)
+t_scan_OBJECTS = t-scan.$(OBJEXT)
t_scan_LDADD = $(LDADD)
t_scan_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_set_d_SOURCES = t-set_d.c
-t_set_d_OBJECTS = t-set_d$U.$(OBJEXT)
+t_set_d_OBJECTS = t-set_d.$(OBJEXT)
t_set_d_LDADD = $(LDADD)
t_set_d_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_set_f_SOURCES = t-set_f.c
-t_set_f_OBJECTS = t-set_f$U.$(OBJEXT)
+t_set_f_OBJECTS = t-set_f.$(OBJEXT)
t_set_f_LDADD = $(LDADD)
t_set_f_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_set_si_SOURCES = t-set_si.c
-t_set_si_OBJECTS = t-set_si$U.$(OBJEXT)
+t_set_si_OBJECTS = t-set_si.$(OBJEXT)
t_set_si_LDADD = $(LDADD)
t_set_si_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_set_str_SOURCES = t-set_str.c
-t_set_str_OBJECTS = t-set_str$U.$(OBJEXT)
+t_set_str_OBJECTS = t-set_str.$(OBJEXT)
t_set_str_LDADD = $(LDADD)
t_set_str_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_sizeinbase_SOURCES = t-sizeinbase.c
-t_sizeinbase_OBJECTS = t-sizeinbase$U.$(OBJEXT)
+t_sizeinbase_OBJECTS = t-sizeinbase.$(OBJEXT)
t_sizeinbase_LDADD = $(LDADD)
t_sizeinbase_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_sqrtrem_SOURCES = t-sqrtrem.c
-t_sqrtrem_OBJECTS = t-sqrtrem$U.$(OBJEXT)
+t_sqrtrem_OBJECTS = t-sqrtrem.$(OBJEXT)
t_sqrtrem_LDADD = $(LDADD)
t_sqrtrem_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_tdiv_SOURCES = t-tdiv.c
-t_tdiv_OBJECTS = t-tdiv$U.$(OBJEXT)
+t_tdiv_OBJECTS = t-tdiv.$(OBJEXT)
t_tdiv_LDADD = $(LDADD)
t_tdiv_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_tdiv_ui_SOURCES = t-tdiv_ui.c
-t_tdiv_ui_OBJECTS = t-tdiv_ui$U.$(OBJEXT)
+t_tdiv_ui_OBJECTS = t-tdiv_ui.$(OBJEXT)
t_tdiv_ui_LDADD = $(LDADD)
t_tdiv_ui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t-divis_2exp.c t-export.c t-fac_ui.c t-fdiv.c t-fdiv_ui.c \
t-fib_ui.c t-fits.c t-gcd.c t-gcd_ui.c t-get_d.c \
t-get_d_2exp.c t-get_si.c t-hamdist.c t-import.c t-inp_str.c \
- t-invert.c t-io_raw.c t-jac.c t-lcm.c t-lucnum_ui.c t-mul.c \
- t-mul_i.c t-nextprime.c t-oddeven.c t-perfpow.c t-perfsqr.c \
- t-popcount.c t-pow.c t-powm.c t-powm_ui.c t-pprime_p.c \
- t-root.c t-scan.c t-set_d.c t-set_f.c t-set_si.c t-set_str.c \
+ t-invert.c t-io_raw.c t-jac.c t-lcm.c t-lucnum_ui.c \
+ t-mfac_uiui.c t-mul.c t-mul_i.c t-nextprime.c t-oddeven.c \
+ t-perfpow.c t-perfsqr.c t-popcount.c t-pow.c t-powm.c \
+ t-powm_ui.c t-pprime_p.c t-primorial_ui.c t-remove.c t-root.c \
+ t-scan.c t-set_d.c t-set_f.c t-set_si.c t-set_str.c \
t-sizeinbase.c t-sqrtrem.c t-tdiv.c t-tdiv_ui.c
DIST_SOURCES = bit.c convert.c dive.c dive_ui.c io.c logic.c reuse.c \
t-addsub.c t-aorsmul.c t-bin.c t-cdiv_ui.c t-cmp.c t-cmp_d.c \
t-divis_2exp.c t-export.c t-fac_ui.c t-fdiv.c t-fdiv_ui.c \
t-fib_ui.c t-fits.c t-gcd.c t-gcd_ui.c t-get_d.c \
t-get_d_2exp.c t-get_si.c t-hamdist.c t-import.c t-inp_str.c \
- t-invert.c t-io_raw.c t-jac.c t-lcm.c t-lucnum_ui.c t-mul.c \
- t-mul_i.c t-nextprime.c t-oddeven.c t-perfpow.c t-perfsqr.c \
- t-popcount.c t-pow.c t-powm.c t-powm_ui.c t-pprime_p.c \
- t-root.c t-scan.c t-set_d.c t-set_f.c t-set_si.c t-set_str.c \
+ t-invert.c t-io_raw.c t-jac.c t-lcm.c t-lucnum_ui.c \
+ t-mfac_uiui.c t-mul.c t-mul_i.c t-nextprime.c t-oddeven.c \
+ t-perfpow.c t-perfsqr.c t-popcount.c t-pow.c t-powm.c \
+ t-powm_ui.c t-pprime_p.c t-primorial_ui.c t-remove.c t-root.c \
+ t-scan.c t-set_d.c t-set_f.c t-set_si.c t-set_str.c \
t-sizeinbase.c t-sqrtrem.c t-tdiv.c t-tdiv_ui.c
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
ETAGS = etags
CTAGS = ctags
am__tty_colors = \
SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
STRIP = @STRIP@
TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
U_FOR_BUILD = @U_FOR_BUILD@
VERSION = @VERSION@
WITH_READLINE_01 = @WITH_READLINE_01@
mkdir_p = @mkdir_p@
mpn_objects = @mpn_objects@
mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-bit$(EXEEXT): $(bit_OBJECTS) $(bit_DEPENDENCIES)
+bit$(EXEEXT): $(bit_OBJECTS) $(bit_DEPENDENCIES) $(EXTRA_bit_DEPENDENCIES)
@rm -f bit$(EXEEXT)
$(LINK) $(bit_OBJECTS) $(bit_LDADD) $(LIBS)
-convert$(EXEEXT): $(convert_OBJECTS) $(convert_DEPENDENCIES)
+convert$(EXEEXT): $(convert_OBJECTS) $(convert_DEPENDENCIES) $(EXTRA_convert_DEPENDENCIES)
@rm -f convert$(EXEEXT)
$(LINK) $(convert_OBJECTS) $(convert_LDADD) $(LIBS)
-dive$(EXEEXT): $(dive_OBJECTS) $(dive_DEPENDENCIES)
+dive$(EXEEXT): $(dive_OBJECTS) $(dive_DEPENDENCIES) $(EXTRA_dive_DEPENDENCIES)
@rm -f dive$(EXEEXT)
$(LINK) $(dive_OBJECTS) $(dive_LDADD) $(LIBS)
-dive_ui$(EXEEXT): $(dive_ui_OBJECTS) $(dive_ui_DEPENDENCIES)
+dive_ui$(EXEEXT): $(dive_ui_OBJECTS) $(dive_ui_DEPENDENCIES) $(EXTRA_dive_ui_DEPENDENCIES)
@rm -f dive_ui$(EXEEXT)
$(LINK) $(dive_ui_OBJECTS) $(dive_ui_LDADD) $(LIBS)
-io$(EXEEXT): $(io_OBJECTS) $(io_DEPENDENCIES)
+io$(EXEEXT): $(io_OBJECTS) $(io_DEPENDENCIES) $(EXTRA_io_DEPENDENCIES)
@rm -f io$(EXEEXT)
$(LINK) $(io_OBJECTS) $(io_LDADD) $(LIBS)
-logic$(EXEEXT): $(logic_OBJECTS) $(logic_DEPENDENCIES)
+logic$(EXEEXT): $(logic_OBJECTS) $(logic_DEPENDENCIES) $(EXTRA_logic_DEPENDENCIES)
@rm -f logic$(EXEEXT)
$(LINK) $(logic_OBJECTS) $(logic_LDADD) $(LIBS)
-reuse$(EXEEXT): $(reuse_OBJECTS) $(reuse_DEPENDENCIES)
+reuse$(EXEEXT): $(reuse_OBJECTS) $(reuse_DEPENDENCIES) $(EXTRA_reuse_DEPENDENCIES)
@rm -f reuse$(EXEEXT)
$(LINK) $(reuse_OBJECTS) $(reuse_LDADD) $(LIBS)
-t-addsub$(EXEEXT): $(t_addsub_OBJECTS) $(t_addsub_DEPENDENCIES)
+t-addsub$(EXEEXT): $(t_addsub_OBJECTS) $(t_addsub_DEPENDENCIES) $(EXTRA_t_addsub_DEPENDENCIES)
@rm -f t-addsub$(EXEEXT)
$(LINK) $(t_addsub_OBJECTS) $(t_addsub_LDADD) $(LIBS)
-t-aorsmul$(EXEEXT): $(t_aorsmul_OBJECTS) $(t_aorsmul_DEPENDENCIES)
+t-aorsmul$(EXEEXT): $(t_aorsmul_OBJECTS) $(t_aorsmul_DEPENDENCIES) $(EXTRA_t_aorsmul_DEPENDENCIES)
@rm -f t-aorsmul$(EXEEXT)
$(LINK) $(t_aorsmul_OBJECTS) $(t_aorsmul_LDADD) $(LIBS)
-t-bin$(EXEEXT): $(t_bin_OBJECTS) $(t_bin_DEPENDENCIES)
+t-bin$(EXEEXT): $(t_bin_OBJECTS) $(t_bin_DEPENDENCIES) $(EXTRA_t_bin_DEPENDENCIES)
@rm -f t-bin$(EXEEXT)
$(LINK) $(t_bin_OBJECTS) $(t_bin_LDADD) $(LIBS)
-t-cdiv_ui$(EXEEXT): $(t_cdiv_ui_OBJECTS) $(t_cdiv_ui_DEPENDENCIES)
+t-cdiv_ui$(EXEEXT): $(t_cdiv_ui_OBJECTS) $(t_cdiv_ui_DEPENDENCIES) $(EXTRA_t_cdiv_ui_DEPENDENCIES)
@rm -f t-cdiv_ui$(EXEEXT)
$(LINK) $(t_cdiv_ui_OBJECTS) $(t_cdiv_ui_LDADD) $(LIBS)
-t-cmp$(EXEEXT): $(t_cmp_OBJECTS) $(t_cmp_DEPENDENCIES)
+t-cmp$(EXEEXT): $(t_cmp_OBJECTS) $(t_cmp_DEPENDENCIES) $(EXTRA_t_cmp_DEPENDENCIES)
@rm -f t-cmp$(EXEEXT)
$(LINK) $(t_cmp_OBJECTS) $(t_cmp_LDADD) $(LIBS)
-t-cmp_d$(EXEEXT): $(t_cmp_d_OBJECTS) $(t_cmp_d_DEPENDENCIES)
+t-cmp_d$(EXEEXT): $(t_cmp_d_OBJECTS) $(t_cmp_d_DEPENDENCIES) $(EXTRA_t_cmp_d_DEPENDENCIES)
@rm -f t-cmp_d$(EXEEXT)
$(LINK) $(t_cmp_d_OBJECTS) $(t_cmp_d_LDADD) $(LIBS)
-t-cmp_si$(EXEEXT): $(t_cmp_si_OBJECTS) $(t_cmp_si_DEPENDENCIES)
+t-cmp_si$(EXEEXT): $(t_cmp_si_OBJECTS) $(t_cmp_si_DEPENDENCIES) $(EXTRA_t_cmp_si_DEPENDENCIES)
@rm -f t-cmp_si$(EXEEXT)
$(LINK) $(t_cmp_si_OBJECTS) $(t_cmp_si_LDADD) $(LIBS)
-t-cong$(EXEEXT): $(t_cong_OBJECTS) $(t_cong_DEPENDENCIES)
+t-cong$(EXEEXT): $(t_cong_OBJECTS) $(t_cong_DEPENDENCIES) $(EXTRA_t_cong_DEPENDENCIES)
@rm -f t-cong$(EXEEXT)
$(LINK) $(t_cong_OBJECTS) $(t_cong_LDADD) $(LIBS)
-t-cong_2exp$(EXEEXT): $(t_cong_2exp_OBJECTS) $(t_cong_2exp_DEPENDENCIES)
+t-cong_2exp$(EXEEXT): $(t_cong_2exp_OBJECTS) $(t_cong_2exp_DEPENDENCIES) $(EXTRA_t_cong_2exp_DEPENDENCIES)
@rm -f t-cong_2exp$(EXEEXT)
$(LINK) $(t_cong_2exp_OBJECTS) $(t_cong_2exp_LDADD) $(LIBS)
-t-div_2exp$(EXEEXT): $(t_div_2exp_OBJECTS) $(t_div_2exp_DEPENDENCIES)
+t-div_2exp$(EXEEXT): $(t_div_2exp_OBJECTS) $(t_div_2exp_DEPENDENCIES) $(EXTRA_t_div_2exp_DEPENDENCIES)
@rm -f t-div_2exp$(EXEEXT)
$(LINK) $(t_div_2exp_OBJECTS) $(t_div_2exp_LDADD) $(LIBS)
-t-divis$(EXEEXT): $(t_divis_OBJECTS) $(t_divis_DEPENDENCIES)
+t-divis$(EXEEXT): $(t_divis_OBJECTS) $(t_divis_DEPENDENCIES) $(EXTRA_t_divis_DEPENDENCIES)
@rm -f t-divis$(EXEEXT)
$(LINK) $(t_divis_OBJECTS) $(t_divis_LDADD) $(LIBS)
-t-divis_2exp$(EXEEXT): $(t_divis_2exp_OBJECTS) $(t_divis_2exp_DEPENDENCIES)
+t-divis_2exp$(EXEEXT): $(t_divis_2exp_OBJECTS) $(t_divis_2exp_DEPENDENCIES) $(EXTRA_t_divis_2exp_DEPENDENCIES)
@rm -f t-divis_2exp$(EXEEXT)
$(LINK) $(t_divis_2exp_OBJECTS) $(t_divis_2exp_LDADD) $(LIBS)
-t-export$(EXEEXT): $(t_export_OBJECTS) $(t_export_DEPENDENCIES)
+t-export$(EXEEXT): $(t_export_OBJECTS) $(t_export_DEPENDENCIES) $(EXTRA_t_export_DEPENDENCIES)
@rm -f t-export$(EXEEXT)
$(LINK) $(t_export_OBJECTS) $(t_export_LDADD) $(LIBS)
-t-fac_ui$(EXEEXT): $(t_fac_ui_OBJECTS) $(t_fac_ui_DEPENDENCIES)
+t-fac_ui$(EXEEXT): $(t_fac_ui_OBJECTS) $(t_fac_ui_DEPENDENCIES) $(EXTRA_t_fac_ui_DEPENDENCIES)
@rm -f t-fac_ui$(EXEEXT)
$(LINK) $(t_fac_ui_OBJECTS) $(t_fac_ui_LDADD) $(LIBS)
-t-fdiv$(EXEEXT): $(t_fdiv_OBJECTS) $(t_fdiv_DEPENDENCIES)
+t-fdiv$(EXEEXT): $(t_fdiv_OBJECTS) $(t_fdiv_DEPENDENCIES) $(EXTRA_t_fdiv_DEPENDENCIES)
@rm -f t-fdiv$(EXEEXT)
$(LINK) $(t_fdiv_OBJECTS) $(t_fdiv_LDADD) $(LIBS)
-t-fdiv_ui$(EXEEXT): $(t_fdiv_ui_OBJECTS) $(t_fdiv_ui_DEPENDENCIES)
+t-fdiv_ui$(EXEEXT): $(t_fdiv_ui_OBJECTS) $(t_fdiv_ui_DEPENDENCIES) $(EXTRA_t_fdiv_ui_DEPENDENCIES)
@rm -f t-fdiv_ui$(EXEEXT)
$(LINK) $(t_fdiv_ui_OBJECTS) $(t_fdiv_ui_LDADD) $(LIBS)
-t-fib_ui$(EXEEXT): $(t_fib_ui_OBJECTS) $(t_fib_ui_DEPENDENCIES)
+t-fib_ui$(EXEEXT): $(t_fib_ui_OBJECTS) $(t_fib_ui_DEPENDENCIES) $(EXTRA_t_fib_ui_DEPENDENCIES)
@rm -f t-fib_ui$(EXEEXT)
$(LINK) $(t_fib_ui_OBJECTS) $(t_fib_ui_LDADD) $(LIBS)
-t-fits$(EXEEXT): $(t_fits_OBJECTS) $(t_fits_DEPENDENCIES)
+t-fits$(EXEEXT): $(t_fits_OBJECTS) $(t_fits_DEPENDENCIES) $(EXTRA_t_fits_DEPENDENCIES)
@rm -f t-fits$(EXEEXT)
$(LINK) $(t_fits_OBJECTS) $(t_fits_LDADD) $(LIBS)
-t-gcd$(EXEEXT): $(t_gcd_OBJECTS) $(t_gcd_DEPENDENCIES)
+t-gcd$(EXEEXT): $(t_gcd_OBJECTS) $(t_gcd_DEPENDENCIES) $(EXTRA_t_gcd_DEPENDENCIES)
@rm -f t-gcd$(EXEEXT)
$(LINK) $(t_gcd_OBJECTS) $(t_gcd_LDADD) $(LIBS)
-t-gcd_ui$(EXEEXT): $(t_gcd_ui_OBJECTS) $(t_gcd_ui_DEPENDENCIES)
+t-gcd_ui$(EXEEXT): $(t_gcd_ui_OBJECTS) $(t_gcd_ui_DEPENDENCIES) $(EXTRA_t_gcd_ui_DEPENDENCIES)
@rm -f t-gcd_ui$(EXEEXT)
$(LINK) $(t_gcd_ui_OBJECTS) $(t_gcd_ui_LDADD) $(LIBS)
-t-get_d$(EXEEXT): $(t_get_d_OBJECTS) $(t_get_d_DEPENDENCIES)
+t-get_d$(EXEEXT): $(t_get_d_OBJECTS) $(t_get_d_DEPENDENCIES) $(EXTRA_t_get_d_DEPENDENCIES)
@rm -f t-get_d$(EXEEXT)
$(LINK) $(t_get_d_OBJECTS) $(t_get_d_LDADD) $(LIBS)
-t-get_d_2exp$(EXEEXT): $(t_get_d_2exp_OBJECTS) $(t_get_d_2exp_DEPENDENCIES)
+t-get_d_2exp$(EXEEXT): $(t_get_d_2exp_OBJECTS) $(t_get_d_2exp_DEPENDENCIES) $(EXTRA_t_get_d_2exp_DEPENDENCIES)
@rm -f t-get_d_2exp$(EXEEXT)
$(LINK) $(t_get_d_2exp_OBJECTS) $(t_get_d_2exp_LDADD) $(LIBS)
-t-get_si$(EXEEXT): $(t_get_si_OBJECTS) $(t_get_si_DEPENDENCIES)
+t-get_si$(EXEEXT): $(t_get_si_OBJECTS) $(t_get_si_DEPENDENCIES) $(EXTRA_t_get_si_DEPENDENCIES)
@rm -f t-get_si$(EXEEXT)
$(LINK) $(t_get_si_OBJECTS) $(t_get_si_LDADD) $(LIBS)
-t-hamdist$(EXEEXT): $(t_hamdist_OBJECTS) $(t_hamdist_DEPENDENCIES)
+t-hamdist$(EXEEXT): $(t_hamdist_OBJECTS) $(t_hamdist_DEPENDENCIES) $(EXTRA_t_hamdist_DEPENDENCIES)
@rm -f t-hamdist$(EXEEXT)
$(LINK) $(t_hamdist_OBJECTS) $(t_hamdist_LDADD) $(LIBS)
-t-import$(EXEEXT): $(t_import_OBJECTS) $(t_import_DEPENDENCIES)
+t-import$(EXEEXT): $(t_import_OBJECTS) $(t_import_DEPENDENCIES) $(EXTRA_t_import_DEPENDENCIES)
@rm -f t-import$(EXEEXT)
$(LINK) $(t_import_OBJECTS) $(t_import_LDADD) $(LIBS)
-t-inp_str$(EXEEXT): $(t_inp_str_OBJECTS) $(t_inp_str_DEPENDENCIES)
+t-inp_str$(EXEEXT): $(t_inp_str_OBJECTS) $(t_inp_str_DEPENDENCIES) $(EXTRA_t_inp_str_DEPENDENCIES)
@rm -f t-inp_str$(EXEEXT)
$(LINK) $(t_inp_str_OBJECTS) $(t_inp_str_LDADD) $(LIBS)
-t-invert$(EXEEXT): $(t_invert_OBJECTS) $(t_invert_DEPENDENCIES)
+t-invert$(EXEEXT): $(t_invert_OBJECTS) $(t_invert_DEPENDENCIES) $(EXTRA_t_invert_DEPENDENCIES)
@rm -f t-invert$(EXEEXT)
$(LINK) $(t_invert_OBJECTS) $(t_invert_LDADD) $(LIBS)
-t-io_raw$(EXEEXT): $(t_io_raw_OBJECTS) $(t_io_raw_DEPENDENCIES)
+t-io_raw$(EXEEXT): $(t_io_raw_OBJECTS) $(t_io_raw_DEPENDENCIES) $(EXTRA_t_io_raw_DEPENDENCIES)
@rm -f t-io_raw$(EXEEXT)
$(LINK) $(t_io_raw_OBJECTS) $(t_io_raw_LDADD) $(LIBS)
-t-jac$(EXEEXT): $(t_jac_OBJECTS) $(t_jac_DEPENDENCIES)
+t-jac$(EXEEXT): $(t_jac_OBJECTS) $(t_jac_DEPENDENCIES) $(EXTRA_t_jac_DEPENDENCIES)
@rm -f t-jac$(EXEEXT)
$(LINK) $(t_jac_OBJECTS) $(t_jac_LDADD) $(LIBS)
-t-lcm$(EXEEXT): $(t_lcm_OBJECTS) $(t_lcm_DEPENDENCIES)
+t-lcm$(EXEEXT): $(t_lcm_OBJECTS) $(t_lcm_DEPENDENCIES) $(EXTRA_t_lcm_DEPENDENCIES)
@rm -f t-lcm$(EXEEXT)
$(LINK) $(t_lcm_OBJECTS) $(t_lcm_LDADD) $(LIBS)
-t-lucnum_ui$(EXEEXT): $(t_lucnum_ui_OBJECTS) $(t_lucnum_ui_DEPENDENCIES)
+t-lucnum_ui$(EXEEXT): $(t_lucnum_ui_OBJECTS) $(t_lucnum_ui_DEPENDENCIES) $(EXTRA_t_lucnum_ui_DEPENDENCIES)
@rm -f t-lucnum_ui$(EXEEXT)
$(LINK) $(t_lucnum_ui_OBJECTS) $(t_lucnum_ui_LDADD) $(LIBS)
-t-mul$(EXEEXT): $(t_mul_OBJECTS) $(t_mul_DEPENDENCIES)
+t-mfac_uiui$(EXEEXT): $(t_mfac_uiui_OBJECTS) $(t_mfac_uiui_DEPENDENCIES) $(EXTRA_t_mfac_uiui_DEPENDENCIES)
+ @rm -f t-mfac_uiui$(EXEEXT)
+ $(LINK) $(t_mfac_uiui_OBJECTS) $(t_mfac_uiui_LDADD) $(LIBS)
+t-mul$(EXEEXT): $(t_mul_OBJECTS) $(t_mul_DEPENDENCIES) $(EXTRA_t_mul_DEPENDENCIES)
@rm -f t-mul$(EXEEXT)
$(LINK) $(t_mul_OBJECTS) $(t_mul_LDADD) $(LIBS)
-t-mul_i$(EXEEXT): $(t_mul_i_OBJECTS) $(t_mul_i_DEPENDENCIES)
+t-mul_i$(EXEEXT): $(t_mul_i_OBJECTS) $(t_mul_i_DEPENDENCIES) $(EXTRA_t_mul_i_DEPENDENCIES)
@rm -f t-mul_i$(EXEEXT)
$(LINK) $(t_mul_i_OBJECTS) $(t_mul_i_LDADD) $(LIBS)
-t-nextprime$(EXEEXT): $(t_nextprime_OBJECTS) $(t_nextprime_DEPENDENCIES)
+t-nextprime$(EXEEXT): $(t_nextprime_OBJECTS) $(t_nextprime_DEPENDENCIES) $(EXTRA_t_nextprime_DEPENDENCIES)
@rm -f t-nextprime$(EXEEXT)
$(LINK) $(t_nextprime_OBJECTS) $(t_nextprime_LDADD) $(LIBS)
-t-oddeven$(EXEEXT): $(t_oddeven_OBJECTS) $(t_oddeven_DEPENDENCIES)
+t-oddeven$(EXEEXT): $(t_oddeven_OBJECTS) $(t_oddeven_DEPENDENCIES) $(EXTRA_t_oddeven_DEPENDENCIES)
@rm -f t-oddeven$(EXEEXT)
$(LINK) $(t_oddeven_OBJECTS) $(t_oddeven_LDADD) $(LIBS)
-t-perfpow$(EXEEXT): $(t_perfpow_OBJECTS) $(t_perfpow_DEPENDENCIES)
+t-perfpow$(EXEEXT): $(t_perfpow_OBJECTS) $(t_perfpow_DEPENDENCIES) $(EXTRA_t_perfpow_DEPENDENCIES)
@rm -f t-perfpow$(EXEEXT)
$(LINK) $(t_perfpow_OBJECTS) $(t_perfpow_LDADD) $(LIBS)
-t-perfsqr$(EXEEXT): $(t_perfsqr_OBJECTS) $(t_perfsqr_DEPENDENCIES)
+t-perfsqr$(EXEEXT): $(t_perfsqr_OBJECTS) $(t_perfsqr_DEPENDENCIES) $(EXTRA_t_perfsqr_DEPENDENCIES)
@rm -f t-perfsqr$(EXEEXT)
$(LINK) $(t_perfsqr_OBJECTS) $(t_perfsqr_LDADD) $(LIBS)
-t-popcount$(EXEEXT): $(t_popcount_OBJECTS) $(t_popcount_DEPENDENCIES)
+t-popcount$(EXEEXT): $(t_popcount_OBJECTS) $(t_popcount_DEPENDENCIES) $(EXTRA_t_popcount_DEPENDENCIES)
@rm -f t-popcount$(EXEEXT)
$(LINK) $(t_popcount_OBJECTS) $(t_popcount_LDADD) $(LIBS)
-t-pow$(EXEEXT): $(t_pow_OBJECTS) $(t_pow_DEPENDENCIES)
+t-pow$(EXEEXT): $(t_pow_OBJECTS) $(t_pow_DEPENDENCIES) $(EXTRA_t_pow_DEPENDENCIES)
@rm -f t-pow$(EXEEXT)
$(LINK) $(t_pow_OBJECTS) $(t_pow_LDADD) $(LIBS)
-t-powm$(EXEEXT): $(t_powm_OBJECTS) $(t_powm_DEPENDENCIES)
+t-powm$(EXEEXT): $(t_powm_OBJECTS) $(t_powm_DEPENDENCIES) $(EXTRA_t_powm_DEPENDENCIES)
@rm -f t-powm$(EXEEXT)
$(LINK) $(t_powm_OBJECTS) $(t_powm_LDADD) $(LIBS)
-t-powm_ui$(EXEEXT): $(t_powm_ui_OBJECTS) $(t_powm_ui_DEPENDENCIES)
+t-powm_ui$(EXEEXT): $(t_powm_ui_OBJECTS) $(t_powm_ui_DEPENDENCIES) $(EXTRA_t_powm_ui_DEPENDENCIES)
@rm -f t-powm_ui$(EXEEXT)
$(LINK) $(t_powm_ui_OBJECTS) $(t_powm_ui_LDADD) $(LIBS)
-t-pprime_p$(EXEEXT): $(t_pprime_p_OBJECTS) $(t_pprime_p_DEPENDENCIES)
+t-pprime_p$(EXEEXT): $(t_pprime_p_OBJECTS) $(t_pprime_p_DEPENDENCIES) $(EXTRA_t_pprime_p_DEPENDENCIES)
@rm -f t-pprime_p$(EXEEXT)
$(LINK) $(t_pprime_p_OBJECTS) $(t_pprime_p_LDADD) $(LIBS)
-t-root$(EXEEXT): $(t_root_OBJECTS) $(t_root_DEPENDENCIES)
+t-primorial_ui$(EXEEXT): $(t_primorial_ui_OBJECTS) $(t_primorial_ui_DEPENDENCIES) $(EXTRA_t_primorial_ui_DEPENDENCIES)
+ @rm -f t-primorial_ui$(EXEEXT)
+ $(LINK) $(t_primorial_ui_OBJECTS) $(t_primorial_ui_LDADD) $(LIBS)
+t-remove$(EXEEXT): $(t_remove_OBJECTS) $(t_remove_DEPENDENCIES) $(EXTRA_t_remove_DEPENDENCIES)
+ @rm -f t-remove$(EXEEXT)
+ $(LINK) $(t_remove_OBJECTS) $(t_remove_LDADD) $(LIBS)
+t-root$(EXEEXT): $(t_root_OBJECTS) $(t_root_DEPENDENCIES) $(EXTRA_t_root_DEPENDENCIES)
@rm -f t-root$(EXEEXT)
$(LINK) $(t_root_OBJECTS) $(t_root_LDADD) $(LIBS)
-t-scan$(EXEEXT): $(t_scan_OBJECTS) $(t_scan_DEPENDENCIES)
+t-scan$(EXEEXT): $(t_scan_OBJECTS) $(t_scan_DEPENDENCIES) $(EXTRA_t_scan_DEPENDENCIES)
@rm -f t-scan$(EXEEXT)
$(LINK) $(t_scan_OBJECTS) $(t_scan_LDADD) $(LIBS)
-t-set_d$(EXEEXT): $(t_set_d_OBJECTS) $(t_set_d_DEPENDENCIES)
+t-set_d$(EXEEXT): $(t_set_d_OBJECTS) $(t_set_d_DEPENDENCIES) $(EXTRA_t_set_d_DEPENDENCIES)
@rm -f t-set_d$(EXEEXT)
$(LINK) $(t_set_d_OBJECTS) $(t_set_d_LDADD) $(LIBS)
-t-set_f$(EXEEXT): $(t_set_f_OBJECTS) $(t_set_f_DEPENDENCIES)
+t-set_f$(EXEEXT): $(t_set_f_OBJECTS) $(t_set_f_DEPENDENCIES) $(EXTRA_t_set_f_DEPENDENCIES)
@rm -f t-set_f$(EXEEXT)
$(LINK) $(t_set_f_OBJECTS) $(t_set_f_LDADD) $(LIBS)
-t-set_si$(EXEEXT): $(t_set_si_OBJECTS) $(t_set_si_DEPENDENCIES)
+t-set_si$(EXEEXT): $(t_set_si_OBJECTS) $(t_set_si_DEPENDENCIES) $(EXTRA_t_set_si_DEPENDENCIES)
@rm -f t-set_si$(EXEEXT)
$(LINK) $(t_set_si_OBJECTS) $(t_set_si_LDADD) $(LIBS)
-t-set_str$(EXEEXT): $(t_set_str_OBJECTS) $(t_set_str_DEPENDENCIES)
+t-set_str$(EXEEXT): $(t_set_str_OBJECTS) $(t_set_str_DEPENDENCIES) $(EXTRA_t_set_str_DEPENDENCIES)
@rm -f t-set_str$(EXEEXT)
$(LINK) $(t_set_str_OBJECTS) $(t_set_str_LDADD) $(LIBS)
-t-sizeinbase$(EXEEXT): $(t_sizeinbase_OBJECTS) $(t_sizeinbase_DEPENDENCIES)
+t-sizeinbase$(EXEEXT): $(t_sizeinbase_OBJECTS) $(t_sizeinbase_DEPENDENCIES) $(EXTRA_t_sizeinbase_DEPENDENCIES)
@rm -f t-sizeinbase$(EXEEXT)
$(LINK) $(t_sizeinbase_OBJECTS) $(t_sizeinbase_LDADD) $(LIBS)
-t-sqrtrem$(EXEEXT): $(t_sqrtrem_OBJECTS) $(t_sqrtrem_DEPENDENCIES)
+t-sqrtrem$(EXEEXT): $(t_sqrtrem_OBJECTS) $(t_sqrtrem_DEPENDENCIES) $(EXTRA_t_sqrtrem_DEPENDENCIES)
@rm -f t-sqrtrem$(EXEEXT)
$(LINK) $(t_sqrtrem_OBJECTS) $(t_sqrtrem_LDADD) $(LIBS)
-t-tdiv$(EXEEXT): $(t_tdiv_OBJECTS) $(t_tdiv_DEPENDENCIES)
+t-tdiv$(EXEEXT): $(t_tdiv_OBJECTS) $(t_tdiv_DEPENDENCIES) $(EXTRA_t_tdiv_DEPENDENCIES)
@rm -f t-tdiv$(EXEEXT)
$(LINK) $(t_tdiv_OBJECTS) $(t_tdiv_LDADD) $(LIBS)
-t-tdiv_ui$(EXEEXT): $(t_tdiv_ui_OBJECTS) $(t_tdiv_ui_DEPENDENCIES)
+t-tdiv_ui$(EXEEXT): $(t_tdiv_ui_OBJECTS) $(t_tdiv_ui_DEPENDENCIES) $(EXTRA_t_tdiv_ui_DEPENDENCIES)
@rm -f t-tdiv_ui$(EXEEXT)
$(LINK) $(t_tdiv_ui_OBJECTS) $(t_tdiv_ui_LDADD) $(LIBS)
distclean-compile:
-rm -f *.tab.c
-$(top_builddir)/ansi2knr:
- $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
- -test "$U" = "" || rm -f *_.c
.c.o:
$(COMPILE) -c $<
.c.lo:
$(LTCOMPILE) -c -o $@ $<
-bit_.c: bit.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/bit.c; then echo $(srcdir)/bit.c; else echo bit.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-convert_.c: convert.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/convert.c; then echo $(srcdir)/convert.c; else echo convert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dive_.c: dive.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dive.c; then echo $(srcdir)/dive.c; else echo dive.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dive_ui_.c: dive_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dive_ui.c; then echo $(srcdir)/dive_ui.c; else echo dive_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-io_.c: io.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/io.c; then echo $(srcdir)/io.c; else echo io.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-logic_.c: logic.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/logic.c; then echo $(srcdir)/logic.c; else echo logic.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-reuse_.c: reuse.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/reuse.c; then echo $(srcdir)/reuse.c; else echo reuse.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-addsub_.c: t-addsub.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-addsub.c; then echo $(srcdir)/t-addsub.c; else echo t-addsub.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-aorsmul_.c: t-aorsmul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-aorsmul.c; then echo $(srcdir)/t-aorsmul.c; else echo t-aorsmul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-bin_.c: t-bin.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-bin.c; then echo $(srcdir)/t-bin.c; else echo t-bin.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-cdiv_ui_.c: t-cdiv_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cdiv_ui.c; then echo $(srcdir)/t-cdiv_ui.c; else echo t-cdiv_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-cmp_.c: t-cmp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cmp.c; then echo $(srcdir)/t-cmp.c; else echo t-cmp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-cmp_d_.c: t-cmp_d.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cmp_d.c; then echo $(srcdir)/t-cmp_d.c; else echo t-cmp_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-cmp_si_.c: t-cmp_si.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cmp_si.c; then echo $(srcdir)/t-cmp_si.c; else echo t-cmp_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-cong_.c: t-cong.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cong.c; then echo $(srcdir)/t-cong.c; else echo t-cong.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-cong_2exp_.c: t-cong_2exp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-cong_2exp.c; then echo $(srcdir)/t-cong_2exp.c; else echo t-cong_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-div_2exp_.c: t-div_2exp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-div_2exp.c; then echo $(srcdir)/t-div_2exp.c; else echo t-div_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-divis_.c: t-divis.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-divis.c; then echo $(srcdir)/t-divis.c; else echo t-divis.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-divis_2exp_.c: t-divis_2exp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-divis_2exp.c; then echo $(srcdir)/t-divis_2exp.c; else echo t-divis_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-export_.c: t-export.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-export.c; then echo $(srcdir)/t-export.c; else echo t-export.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-fac_ui_.c: t-fac_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-fac_ui.c; then echo $(srcdir)/t-fac_ui.c; else echo t-fac_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-fdiv_.c: t-fdiv.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-fdiv.c; then echo $(srcdir)/t-fdiv.c; else echo t-fdiv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-fdiv_ui_.c: t-fdiv_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-fdiv_ui.c; then echo $(srcdir)/t-fdiv_ui.c; else echo t-fdiv_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-fib_ui_.c: t-fib_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-fib_ui.c; then echo $(srcdir)/t-fib_ui.c; else echo t-fib_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-fits_.c: t-fits.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-fits.c; then echo $(srcdir)/t-fits.c; else echo t-fits.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-gcd_.c: t-gcd.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-gcd.c; then echo $(srcdir)/t-gcd.c; else echo t-gcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-gcd_ui_.c: t-gcd_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-gcd_ui.c; then echo $(srcdir)/t-gcd_ui.c; else echo t-gcd_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-get_d_.c: t-get_d.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_d.c; then echo $(srcdir)/t-get_d.c; else echo t-get_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-get_d_2exp_.c: t-get_d_2exp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_d_2exp.c; then echo $(srcdir)/t-get_d_2exp.c; else echo t-get_d_2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-get_si_.c: t-get_si.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-get_si.c; then echo $(srcdir)/t-get_si.c; else echo t-get_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-hamdist_.c: t-hamdist.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-hamdist.c; then echo $(srcdir)/t-hamdist.c; else echo t-hamdist.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-import_.c: t-import.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-import.c; then echo $(srcdir)/t-import.c; else echo t-import.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-inp_str_.c: t-inp_str.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-inp_str.c; then echo $(srcdir)/t-inp_str.c; else echo t-inp_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-invert_.c: t-invert.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-invert.c; then echo $(srcdir)/t-invert.c; else echo t-invert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-io_raw_.c: t-io_raw.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-io_raw.c; then echo $(srcdir)/t-io_raw.c; else echo t-io_raw.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-jac_.c: t-jac.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-jac.c; then echo $(srcdir)/t-jac.c; else echo t-jac.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-lcm_.c: t-lcm.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-lcm.c; then echo $(srcdir)/t-lcm.c; else echo t-lcm.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-lucnum_ui_.c: t-lucnum_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-lucnum_ui.c; then echo $(srcdir)/t-lucnum_ui.c; else echo t-lucnum_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-mul_.c: t-mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mul.c; then echo $(srcdir)/t-mul.c; else echo t-mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-mul_i_.c: t-mul_i.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mul_i.c; then echo $(srcdir)/t-mul_i.c; else echo t-mul_i.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-nextprime_.c: t-nextprime.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-nextprime.c; then echo $(srcdir)/t-nextprime.c; else echo t-nextprime.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-oddeven_.c: t-oddeven.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-oddeven.c; then echo $(srcdir)/t-oddeven.c; else echo t-oddeven.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-perfpow_.c: t-perfpow.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-perfpow.c; then echo $(srcdir)/t-perfpow.c; else echo t-perfpow.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-perfsqr_.c: t-perfsqr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-perfsqr.c; then echo $(srcdir)/t-perfsqr.c; else echo t-perfsqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-popcount_.c: t-popcount.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-popcount.c; then echo $(srcdir)/t-popcount.c; else echo t-popcount.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-pow_.c: t-pow.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-pow.c; then echo $(srcdir)/t-pow.c; else echo t-pow.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-powm_.c: t-powm.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-powm.c; then echo $(srcdir)/t-powm.c; else echo t-powm.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-powm_ui_.c: t-powm_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-powm_ui.c; then echo $(srcdir)/t-powm_ui.c; else echo t-powm_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-pprime_p_.c: t-pprime_p.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-pprime_p.c; then echo $(srcdir)/t-pprime_p.c; else echo t-pprime_p.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-root_.c: t-root.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-root.c; then echo $(srcdir)/t-root.c; else echo t-root.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-scan_.c: t-scan.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-scan.c; then echo $(srcdir)/t-scan.c; else echo t-scan.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-set_d_.c: t-set_d.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set_d.c; then echo $(srcdir)/t-set_d.c; else echo t-set_d.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-set_f_.c: t-set_f.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set_f.c; then echo $(srcdir)/t-set_f.c; else echo t-set_f.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-set_si_.c: t-set_si.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set_si.c; then echo $(srcdir)/t-set_si.c; else echo t-set_si.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-set_str_.c: t-set_str.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-set_str.c; then echo $(srcdir)/t-set_str.c; else echo t-set_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-sizeinbase_.c: t-sizeinbase.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-sizeinbase.c; then echo $(srcdir)/t-sizeinbase.c; else echo t-sizeinbase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-sqrtrem_.c: t-sqrtrem.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-sqrtrem.c; then echo $(srcdir)/t-sqrtrem.c; else echo t-sqrtrem.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-tdiv_.c: t-tdiv.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-tdiv.c; then echo $(srcdir)/t-tdiv.c; else echo t-tdiv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-tdiv_ui_.c: t-tdiv_ui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-tdiv_ui.c; then echo $(srcdir)/t-tdiv_ui.c; else echo t-tdiv_ui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-bit_.$(OBJEXT) bit_.lo convert_.$(OBJEXT) convert_.lo dive_.$(OBJEXT) \
-dive_.lo dive_ui_.$(OBJEXT) dive_ui_.lo io_.$(OBJEXT) io_.lo \
-logic_.$(OBJEXT) logic_.lo reuse_.$(OBJEXT) reuse_.lo \
-t-addsub_.$(OBJEXT) t-addsub_.lo t-aorsmul_.$(OBJEXT) t-aorsmul_.lo \
-t-bin_.$(OBJEXT) t-bin_.lo t-cdiv_ui_.$(OBJEXT) t-cdiv_ui_.lo \
-t-cmp_.$(OBJEXT) t-cmp_.lo t-cmp_d_.$(OBJEXT) t-cmp_d_.lo \
-t-cmp_si_.$(OBJEXT) t-cmp_si_.lo t-cong_.$(OBJEXT) t-cong_.lo \
-t-cong_2exp_.$(OBJEXT) t-cong_2exp_.lo t-div_2exp_.$(OBJEXT) \
-t-div_2exp_.lo t-divis_.$(OBJEXT) t-divis_.lo t-divis_2exp_.$(OBJEXT) \
-t-divis_2exp_.lo t-export_.$(OBJEXT) t-export_.lo t-fac_ui_.$(OBJEXT) \
-t-fac_ui_.lo t-fdiv_.$(OBJEXT) t-fdiv_.lo t-fdiv_ui_.$(OBJEXT) \
-t-fdiv_ui_.lo t-fib_ui_.$(OBJEXT) t-fib_ui_.lo t-fits_.$(OBJEXT) \
-t-fits_.lo t-gcd_.$(OBJEXT) t-gcd_.lo t-gcd_ui_.$(OBJEXT) t-gcd_ui_.lo \
-t-get_d_.$(OBJEXT) t-get_d_.lo t-get_d_2exp_.$(OBJEXT) \
-t-get_d_2exp_.lo t-get_si_.$(OBJEXT) t-get_si_.lo t-hamdist_.$(OBJEXT) \
-t-hamdist_.lo t-import_.$(OBJEXT) t-import_.lo t-inp_str_.$(OBJEXT) \
-t-inp_str_.lo t-invert_.$(OBJEXT) t-invert_.lo t-io_raw_.$(OBJEXT) \
-t-io_raw_.lo t-jac_.$(OBJEXT) t-jac_.lo t-lcm_.$(OBJEXT) t-lcm_.lo \
-t-lucnum_ui_.$(OBJEXT) t-lucnum_ui_.lo t-mul_.$(OBJEXT) t-mul_.lo \
-t-mul_i_.$(OBJEXT) t-mul_i_.lo t-nextprime_.$(OBJEXT) t-nextprime_.lo \
-t-oddeven_.$(OBJEXT) t-oddeven_.lo t-perfpow_.$(OBJEXT) t-perfpow_.lo \
-t-perfsqr_.$(OBJEXT) t-perfsqr_.lo t-popcount_.$(OBJEXT) \
-t-popcount_.lo t-pow_.$(OBJEXT) t-pow_.lo t-powm_.$(OBJEXT) t-powm_.lo \
-t-powm_ui_.$(OBJEXT) t-powm_ui_.lo t-pprime_p_.$(OBJEXT) \
-t-pprime_p_.lo t-root_.$(OBJEXT) t-root_.lo t-scan_.$(OBJEXT) \
-t-scan_.lo t-set_d_.$(OBJEXT) t-set_d_.lo t-set_f_.$(OBJEXT) \
-t-set_f_.lo t-set_si_.$(OBJEXT) t-set_si_.lo t-set_str_.$(OBJEXT) \
-t-set_str_.lo t-sizeinbase_.$(OBJEXT) t-sizeinbase_.lo \
-t-sqrtrem_.$(OBJEXT) t-sqrtrem_.lo t-tdiv_.$(OBJEXT) t-tdiv_.lo \
-t-tdiv_ui_.$(OBJEXT) t-tdiv_ui_.lo : $(ANSI2KNR)
mostlyclean-libtool:
-rm -f *.lo
fi; \
dashes=`echo "$$dashes" | sed s/./=/g`; \
if test "$$failed" -eq 0; then \
- echo "$$grn$$dashes"; \
+ col="$$grn"; \
else \
- echo "$$red$$dashes"; \
+ col="$$red"; \
fi; \
- echo "$$banner"; \
- test -z "$$skipped" || echo "$$skipped"; \
- test -z "$$report" || echo "$$report"; \
- echo "$$dashes$$std"; \
+ echo "$${col}$$dashes$${std}"; \
+ echo "$${col}$$banner$${std}"; \
+ test -z "$$skipped" || echo "$${col}$$skipped$${std}"; \
+ test -z "$$report" || echo "$${col}$$report$${std}"; \
+ echo "$${col}$$dashes$${std}"; \
test "$$failed" -eq 0; \
else :; fi
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
mostlyclean: mostlyclean-am
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
uninstall-am:
-.MAKE: $(top_builddir)/ansi2knr check-am install-am install-strip
+.MAKE: check-am install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \
clean-checkPROGRAMS clean-generic clean-libtool ctags \
install-ps install-ps-am install-strip installcheck \
installcheck-am installdirs maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
- mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
- pdf-am ps ps-am tags uninstall uninstall-am
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags uninstall uninstall-am
$(top_builddir)/tests/libtests.la:
/* Test mpz_setbit, mpz_clrbit, mpz_tstbit.
-Copyright 1997, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+Copyright 1997, 2000, 2001, 2002, 2003, 2012, 2013 Free Software
+Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
/* exercise the case where mpz_clrbit or mpz_combit ends up extending a
value like -2^(k*GMP_NUMB_BITS-1) when clearing bit k*GMP_NUMB_BITS-1. */
+/* And vice-versa. */
void
check_clr_extend (void)
{
for (f = 0; f <= 1; f++)
{
/* lots of 1 bits in _mp_d */
- mpz_set_ui (got, 1L);
+ mpz_set_si (got, 1L);
mpz_mul_2exp (got, got, 10*GMP_NUMB_BITS);
mpz_sub_ui (got, got, 1L);
mpz_trace ("want", want);
abort ();
}
+
+ /* complement bit n, going back to ..11100..00 which is -2^(n-1) */
+ if (f == 0)
+ mpz_setbit (got, i*GMP_NUMB_BITS-1);
+ else
+ mpz_combit (got, i*GMP_NUMB_BITS-1);
+ MPZ_CHECK_FORMAT (got);
+
+ mpz_set_si (want, -1L);
+ mpz_mul_2exp (want, want, i*GMP_NUMB_BITS - 1);
+
+ if (mpz_cmp (got, want) != 0)
+ {
+ if (f == 0)
+ printf ("mpz_setbit: ");
+ else
+ printf ("mpz_combit: ");
+ printf ("wrong after shrinking\n");
+ mpz_trace ("got ", got);
+ mpz_trace ("want", want);
+ abort ();
+ }
}
}
{
for (offset = (limb==0 ? 0 : -2); offset <= 2; offset++)
{
- for (initial = 0; initial >= -1; initial--)
+ for (initial = 1; initial >= -1; initial--)
{
mpz_set_si (x, (long) initial);
mpz_set (s2, x);
bit2 = mpz_tstbit (x, bitindex);
- mpz_setbit (x, bitindex);
+ mpz_combit (x, bitindex);
MPZ_CHECK_FORMAT (x);
mpz_set (s3, x);
if (mpz_cmp (s2, s3) == 0)
abort ();
+ mpz_combit (x, bitindex);
+ MPZ_CHECK_FORMAT (x);
+ if (mpz_cmp (s2, x) != 0)
+ abort ();
+
+ mpz_clrbit (x, bitindex);
+ MPZ_CHECK_FORMAT (x);
+ if (mpz_cmp (s2, x) != 0)
+ abort ();
+
mpz_ui_pow_ui (m, 2L, bitindex);
MPZ_CHECK_FORMAT (m);
- mpz_ior (x, s2, m);
+ mpz_ior (x, s0, m);
MPZ_CHECK_FORMAT (x);
if (mpz_cmp (x, s3) != 0)
abort ();
mpz_com (m, m);
MPZ_CHECK_FORMAT (m);
- mpz_and (x, s1, m);
+ mpz_and (x, s0, m);
MPZ_CHECK_FORMAT (x);
if (mpz_cmp (x, s2) != 0)
abort ();
Copyright 1993, 1994, 1996, 1999, 2000, 2001, 2002, 2006, 2007 Free Software
Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
#include "gmp-impl.h"
#include "tests.h"
-void debug_mp __GMP_PROTO ((mpz_t, int));
+void debug_mp (mpz_t, int);
void
mpz_t bs;
unsigned long bsi;
int d, l;
- char *collseq = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+ const char *collseq = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
mpz_init (bs);
size_range = mpz_get_ui (bs) % 16 + 1; /* 1..16 */
mpz_urandomb (bs, rands, size_range); /* 1..65536 digits */
len = mpz_get_ui (bs) + 1;
- buf = (*__gmp_allocate_func) (len + 1);
+ buf = (char *) (*__gmp_allocate_func) (len + 1);
if (base == 0)
base = 10;
string_urandomb (buf, len, base, rands);
Copyright 1996, 2001, 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 1996, 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
/* Test conversion and I/O using mpz_out_str and mpz_inp_str.
-Copyright 1993, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
+Copyright 1993, 1994, 1996, 2000, 2001, 2012 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include "config.h"
int i;
int reps = 10000;
FILE *fp;
- int base;
+ int base, base_out;
gmp_randstate_ptr rands;
mpz_t bs;
unsigned long bsi, size_range;
mpz_urandomb (bs, rands, 16);
bsi = mpz_get_ui (bs);
- base = bsi % 36 + 1;
+ base = bsi % 62 + 1;
if (base == 1)
base = 0;
+ if (i % 2 == 0 && base <= 36)
+ base_out = -base;
+ else
+ base_out = base;
+
rewind (fp);
- if (mpz_out_str (fp, base, op1) == 0
+ if (mpz_out_str (fp, base_out, op1) == 0
|| putc (' ', fp) == EOF
|| fflush (fp) != 0)
{
/* Test mpz_com, mpz_and, mpz_ior, and mpz_xor.
-Copyright 1993, 1994, 1996, 1997, 2001 Free Software Foundation, Inc.
+Copyright 1993, 1994, 1996, 1997, 2001, 2013 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
#include "gmp-impl.h"
#include "tests.h"
-void dump_abort __GMP_PROTO (());
-void debug_mp __GMP_PROTO ((mpz_t, int));
+void dump_abort (void);
+void debug_mp (mpz_t, int);
int
main (int argc, char **argv)
mpz_init (t2);
mpz_init (t3);
+ mpz_set_si (x, -1);
+ mpz_set_ui (y, 0);
+ for (i = 0; i < 300; i++)
+ {
+ mpz_mul_2exp (x, x, 1);
+
+ mpz_and (r1, x, x);
+ MPZ_CHECK_FORMAT (r1);
+ if (mpz_cmp (r1, x) != 0)
+ dump_abort ();
+
+ mpz_ior (r2, x, x);
+ MPZ_CHECK_FORMAT (r2);
+ if (mpz_cmp (r2, x) != 0)
+ dump_abort ();
+
+ mpz_xor (t1, x, x);
+ MPZ_CHECK_FORMAT (t1);
+ if (mpz_cmp_si (t1, 0) != 0)
+ dump_abort ();
+
+ mpz_ior (t1, x, y);
+ MPZ_CHECK_FORMAT (t1);
+ if (mpz_cmp (t1, x) != 0)
+ dump_abort ();
+
+ mpz_xor (t2, x, y);
+ MPZ_CHECK_FORMAT (t2);
+ if (mpz_cmp (t2, x) != 0)
+ dump_abort ();
+
+ mpz_com (t2, x);
+ MPZ_CHECK_FORMAT (t2);
+ mpz_xor (t3, t2, x);
+ MPZ_CHECK_FORMAT (t3);
+ if (mpz_cmp_si (t3, -1) != 0)
+ dump_abort ();
+ }
+
for (i = 0; i < reps; i++)
{
mpz_urandomb (bs, rands, 32);
mpz_mul_si
mpz_addmul_ui (should this really allow a+=a*c?)
-Copyright 1996, 1999, 2000, 2001, 2002, 2009 Free Software Foundation, Inc.
+Copyright 1996, 1999, 2000, 2001, 2002, 2009, 2012, 2013 Free Software
+Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <unistd.h>
#include "gmp.h"
#include "gmp-impl.h"
#else /* ! DLL_EXPORT */
-void dump __GMP_PROTO ((char *, mpz_t, mpz_t, mpz_t));
+void dump (const char *, mpz_t, mpz_t, mpz_t);
-typedef void (*dss_func) __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr));
-typedef void (*dsi_func) __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
-typedef unsigned long int (*dsi_div_func) __GMP_PROTO ((mpz_ptr, mpz_srcptr, unsigned long int));
-typedef unsigned long int (*ddsi_div_func) __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int));
-typedef void (*ddss_div_func) __GMP_PROTO ((mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr));
-typedef void (*ds_func) __GMP_PROTO ((mpz_ptr, mpz_srcptr));
+typedef void (*dss_func) (mpz_ptr, mpz_srcptr, mpz_srcptr);
+typedef void (*dsi_func) (mpz_ptr, mpz_srcptr, unsigned long int);
+typedef unsigned long int (*dsi_div_func) (mpz_ptr, mpz_srcptr, unsigned long int);
+typedef unsigned long int (*ddsi_div_func) (mpz_ptr, mpz_ptr, mpz_srcptr, unsigned long int);
+typedef void (*ddss_div_func) (mpz_ptr, mpz_ptr, mpz_srcptr, mpz_srcptr);
+typedef void (*ds_func) (mpz_ptr, mpz_srcptr);
void
mpz_set_ui (r, 0);
}
-dss_func dss_funcs[] =
-{
- mpz_add, mpz_sub, mpz_mul,
- mpz_cdiv_q, mpz_cdiv_r, mpz_fdiv_q, mpz_fdiv_r, mpz_tdiv_q, mpz_tdiv_r,
- mpz_xinvert,
- mpz_gcd, mpz_lcm, mpz_and, mpz_ior, mpz_xor
-};
-char *dss_func_names[] =
-{
- "mpz_add", "mpz_sub", "mpz_mul",
- "mpz_cdiv_q", "mpz_cdiv_r", "mpz_fdiv_q", "mpz_fdiv_r", "mpz_tdiv_q", "mpz_tdiv_r",
- "mpz_xinvert",
- "mpz_gcd", "mpz_lcm", "mpz_and", "mpz_ior", "mpz_xor"
-};
-char dss_func_division[] = {0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0};
-
-dsi_func dsi_funcs[] =
+struct {
+ dss_func fptr;
+ const char *fname;
+ int isdivision;
+ int isslow;
+} dss[] =
+ { { mpz_add, "mpz_add", 0, 0 },
+ { mpz_sub, "mpz_sub", 0, 0 },
+ { mpz_mul, "mpz_mul", 0, 0 },
+ { mpz_cdiv_q, "mpz_cdiv_q", 1, 0 },
+ { mpz_cdiv_r, "mpz_cdiv_r", 1, 0 },
+ { mpz_fdiv_q, "mpz_fdiv_q", 1, 0 },
+ { mpz_fdiv_r, "mpz_fdiv_r", 1, 0 },
+ { mpz_tdiv_q, "mpz_tdiv_q", 1, 0 },
+ { mpz_tdiv_r, "mpz_tdiv_r", 1, 0 },
+ { mpz_mod, "mpz_mod", 1, 0 },
+ { mpz_xinvert, "mpz_xinvert", 1, 1 },
+ { mpz_gcd, "mpz_gcd", 0, 1 },
+ { mpz_lcm, "mpz_lcm", 0, 1 },
+ { mpz_and, "mpz_and", 0, 0 },
+ { mpz_ior, "mpz_ior", 0, 0 },
+ { mpz_xor, "mpz_xor", 0, 0 }
+ };
+
+
+struct {
+ dsi_func fptr;
+ const char *fname;
+ int mod;
+} dsi[] =
{
/* Don't change order here without changing the code in main(). */
- mpz_add_ui, mpz_mul_ui, mpz_sub_ui,
- mpz_fdiv_q_2exp, mpz_fdiv_r_2exp,
- mpz_cdiv_q_2exp, mpz_cdiv_r_2exp,
- mpz_tdiv_q_2exp, mpz_tdiv_r_2exp,
- mpz_mul_2exp,
- mpz_pow_ui
-};
-char *dsi_func_names[] =
-{
- "mpz_add_ui", "mpz_mul_ui", "mpz_sub_ui",
- "mpz_fdiv_q_2exp", "mpz_fdiv_r_2exp",
- "mpz_cdiv_q_2exp", "mpz_cdiv_r_2exp",
- "mpz_tdiv_q_2exp", "mpz_tdiv_r_2exp",
- "mpz_mul_2exp",
- "mpz_pow_ui"
+ { mpz_add_ui, "mpz_add_ui", 0 },
+ { mpz_mul_ui, "mpz_mul_ui", 0 },
+ { mpz_sub_ui, "mpz_sub_ui", 0 },
+ { mpz_fdiv_q_2exp, "mpz_fdiv_q_2exp", 0x1000 },
+ { mpz_fdiv_r_2exp, "mpz_fdiv_r_2exp", 0x1000 },
+ { mpz_cdiv_q_2exp, "mpz_cdiv_q_2exp", 0x1000 },
+ { mpz_cdiv_r_2exp, "mpz_cdiv_r_2exp", 0x1000 },
+ { mpz_tdiv_q_2exp, "mpz_tdiv_q_2exp", 0x1000 },
+ { mpz_tdiv_r_2exp, "mpz_tdiv_r_2exp", 0x1000 },
+ { mpz_mul_2exp, "mpz_mul_2exp", 0x100 },
+ { mpz_pow_ui, "mpz_pow_ui", 0x10 }
};
-dsi_div_func dsi_div_funcs[] =
+struct {
+ dsi_div_func fptr;
+ const char *fname;
+} dsi_div[] =
{
- mpz_cdiv_q_ui, mpz_cdiv_r_ui,
- mpz_fdiv_q_ui, mpz_fdiv_r_ui,
- mpz_tdiv_q_ui, mpz_tdiv_r_ui
-};
-char *dsi_div_func_names[] =
-{
- "mpz_cdiv_q_ui", "mpz_cdiv_r_ui",
- "mpz_fdiv_q_ui", "mpz_fdiv_r_ui",
- "mpz_tdiv_q_ui", "mpz_tdiv_r_ui"
+ { mpz_cdiv_q_ui, "mpz_cdiv_q_ui" },
+ { mpz_cdiv_r_ui, "mpz_cdiv_r_ui" },
+ { mpz_fdiv_q_ui, "mpz_fdiv_q_ui" },
+ { mpz_fdiv_r_ui, "mpz_fdiv_r_ui" },
+ { mpz_tdiv_q_ui, "mpz_tdiv_q_ui" },
+ { mpz_tdiv_r_ui, "mpz_tdiv_r_ui" }
};
-ddsi_div_func ddsi_div_funcs[] =
-{
- mpz_cdiv_qr_ui,
- mpz_fdiv_qr_ui,
- mpz_tdiv_qr_ui
-};
-char *ddsi_div_func_names[] =
+struct {
+ ddsi_div_func fptr;
+ const char *fname;
+ int isslow;
+} ddsi_div[] =
{
- "mpz_cdiv_qr_ui",
- "mpz_fdiv_qr_ui",
- "mpz_tdiv_qr_ui"
+ { mpz_cdiv_qr_ui, "mpz_cdiv_qr_ui", 0 },
+ { mpz_fdiv_qr_ui, "mpz_fdiv_qr_ui", 0 },
+ { mpz_tdiv_qr_ui, "mpz_tdiv_qr_ui", 0 },
};
-ddss_div_func ddss_div_funcs[] =
-{
- mpz_cdiv_qr,
- mpz_fdiv_qr,
- mpz_tdiv_qr
-};
-char *ddss_div_func_names[] =
-{
- "mpz_cdiv_qr",
- "mpz_fdiv_qr",
- "mpz_tdiv_qr"
-};
-ds_func ds_funcs[] =
+struct {
+ ddss_div_func fptr;
+ const char *fname;
+ int isslow;
+} ddss_div[] =
{
- mpz_abs, mpz_com, mpz_neg, mpz_sqrt
+ { mpz_cdiv_qr, "mpz_cdiv_qr", 0 },
+ { mpz_fdiv_qr, "mpz_fdiv_qr", 0 },
+ { mpz_tdiv_qr, "mpz_tdiv_qr", 0 },
};
-char *ds_func_names[] =
+
+struct {
+ ds_func fptr;
+ const char *fname;
+ int nonneg;
+} ds[] =
{
- "mpz_abs", "mpz_com", "mpz_neg", "mpz_sqrt"
+ { mpz_abs, "mpz_abs", 0 },
+ { mpz_com, "mpz_com", 0 },
+ { mpz_neg, "mpz_neg", 0 },
+ { mpz_sqrt, "mpz_sqrt", 1 },
};
+#define FAIL(class,indx,op1,op2,op3) \
+ do { \
+ dump (class[indx].fname, op1, op2, op3); \
+ exit (1); \
+ } while (0)
+
+#define FAIL2(fname,op1,op2,op3) \
+ do { \
+ dump (#fname, op1, op2, op3); \
+ exit (1); \
+ } while (0)
+
-/* Really use `defined (__STDC__)' here; we want it to be true for Sun C */
-#if defined (__STDC__) || defined (__cplusplus)
-#define FAIL(class,indx,op1,op2,op3) \
+#define INVOKE_RRS(desc,r1,r2,i1) \
do { \
- class##_funcs[indx] = 0; \
- dump (class##_func_names[indx], op1, op2, op3); \
- failures++; \
+ if (pass & 1) _mpz_realloc (r1, ABSIZ(r1)); \
+ if (pass & 2) _mpz_realloc (r2, ABSIZ(r2)); \
+ (desc).fptr (r1, r2, i1); \
} while (0)
-#define FAIL2(fname,op1,op2,op3) \
+#define INVOKE_RS(desc,r1,i1) \
do { \
- dump (#fname, op1, op2, op3); \
- failures++; \
+ if (pass & 1) _mpz_realloc (r1, ABSIZ(r1)); \
+ (desc).fptr (r1, i1); \
} while (0)
-#else
-#define FAIL(class,indx,op1,op2,op3) \
+#define INVOKE_RRSS(desc,r1,r2,i1,i2) \
do { \
- class/**/_funcs[indx] = 0; \
- dump (class/**/_func_names[indx], op1, op2, op3); \
- failures++; \
+ if (pass & 1) _mpz_realloc (r1, ABSIZ(r1)); \
+ if (pass & 2) _mpz_realloc (r2, ABSIZ(r2)); \
+ (desc).fptr (r1, r2, i1, i2); \
} while (0)
-#define FAIL2(fname,op1,op2,op3) \
+#define INVOKE_RSS(desc,r1,i1,i2) \
do { \
- dump ("fname", op1, op2, op3); \
- failures++; \
+ if (pass & 1) _mpz_realloc (r1, ABSIZ(r1)); \
+ (desc).fptr (r1, i1, i2); \
} while (0)
-#endif
-
-
int
main (int argc, char **argv)
{
int i;
- int pass, reps = 100;
+ int pass, reps = 400;
mpz_t in1, in2, in3;
unsigned long int in2i;
mp_size_t size;
mpz_t ref1, ref2, ref3;
mpz_t t;
unsigned long int r1, r2;
- long failures = 0;
gmp_randstate_ptr rands;
mpz_t bs;
unsigned long bsi, size_range;
for (pass = 1; pass <= reps; pass++)
{
- mpz_urandomb (bs, rands, 32);
- size_range = mpz_get_ui (bs) % 17 + 2;
-
- mpz_urandomb (bs, rands, size_range);
- size = mpz_get_ui (bs);
- mpz_rrandomb (in1, rands, size);
+ if (isatty (fileno (stdout)))
+ {
+ printf ("\r%d/%d passes", pass, reps);
+ fflush (stdout);
+ }
- mpz_urandomb (bs, rands, size_range);
- size = mpz_get_ui (bs);
- mpz_rrandomb (in2, rands, size);
+ mpz_urandomb (bs, rands, 32);
+ size_range = mpz_get_ui (bs) % 21 + 2;
- mpz_urandomb (bs, rands, size_range);
- size = mpz_get_ui (bs);
- mpz_rrandomb (in3, rands, size);
+ if ((pass & 1) == 0)
+ {
+ /* Make all input operands have quite different sizes */
+ mpz_urandomb (bs, rands, 32);
+ size = mpz_get_ui (bs) % size_range;
+ mpz_rrandomb (in1, rands, size);
+
+ mpz_urandomb (bs, rands, 32);
+ size = mpz_get_ui (bs) % size_range;
+ mpz_rrandomb (in2, rands, size);
+
+ mpz_urandomb (bs, rands, 32);
+ size = mpz_get_ui (bs) % size_range;
+ mpz_rrandomb (in3, rands, size);
+ }
+ else
+ {
+ /* Make all input operands have about the same size */
+ mpz_urandomb (bs, rands, size_range);
+ size = mpz_get_ui (bs);
+ mpz_rrandomb (in1, rands, size);
+
+ mpz_urandomb (bs, rands, size_range);
+ size = mpz_get_ui (bs);
+ mpz_rrandomb (in2, rands, size);
+
+ mpz_urandomb (bs, rands, size_range);
+ size = mpz_get_ui (bs);
+ mpz_rrandomb (in3, rands, size);
+ }
mpz_urandomb (bs, rands, 3);
bsi = mpz_get_ui (bs);
if ((bsi & 1) != 0)
mpz_neg (in1, in1);
- if ((bsi & 1) != 0)
+ if ((bsi & 2) != 0)
mpz_neg (in2, in2);
- if ((bsi & 1) != 0)
+ if ((bsi & 4) != 0)
mpz_neg (in3, in3);
- for (i = 0; i < sizeof (dss_funcs) / sizeof (dss_func); i++)
+ for (i = 0; i < numberof (dss); i++)
{
- if (dss_funcs[i] == 0)
+ if (dss[i].isdivision && mpz_sgn (in2) == 0)
continue;
- if (dss_func_division[i] && mpz_sgn (in2) == 0)
+ if (dss[i].isslow && size_range > 19)
continue;
- (dss_funcs[i]) (ref1, in1, in2);
+ (dss[i].fptr) (ref1, in1, in2);
MPZ_CHECK_FORMAT (ref1);
mpz_set (res1, in1);
- (dss_funcs[i]) (res1, res1, in2);
+ INVOKE_RSS (dss[i], res1, res1, in2);
MPZ_CHECK_FORMAT (res1);
if (mpz_cmp (ref1, res1) != 0)
FAIL (dss, i, in1, in2, NULL);
mpz_set (res1, in2);
- (dss_funcs[i]) (res1, in1, res1);
+ INVOKE_RSS (dss[i], res1, in1, res1);
MPZ_CHECK_FORMAT (res1);
if (mpz_cmp (ref1, res1) != 0)
FAIL (dss, i, in1, in2, NULL);
}
- for (i = 0; i < sizeof (ddss_div_funcs) / sizeof (ddss_div_func); i++)
+ for (i = 0; i < numberof (ddss_div); i++)
{
- if (ddss_div_funcs[i] == 0)
- continue;
if (mpz_sgn (in2) == 0)
continue;
- (ddss_div_funcs[i]) (ref1, ref2, in1, in2);
+ (ddss_div[i].fptr) (ref1, ref2, in1, in2);
MPZ_CHECK_FORMAT (ref1);
MPZ_CHECK_FORMAT (ref2);
mpz_set (res1, in1);
- (ddss_div_funcs[i]) (res1, res2, res1, in2);
+ INVOKE_RRSS (ddss_div[i], res1, res2, res1, in2);
MPZ_CHECK_FORMAT (res1);
MPZ_CHECK_FORMAT (res2);
if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
FAIL (ddss_div, i, in1, in2, NULL);
mpz_set (res2, in1);
- (ddss_div_funcs[i]) (res1, res2, res2, in2);
+ INVOKE_RRSS (ddss_div[i], res1, res2, res2, in2);
MPZ_CHECK_FORMAT (res1);
MPZ_CHECK_FORMAT (res2);
if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
FAIL (ddss_div, i, in1, in2, NULL);
mpz_set (res1, in2);
- (ddss_div_funcs[i]) (res1, res2, in1, res1);
+ INVOKE_RRSS (ddss_div[i], res1, res2, in1, res1);
MPZ_CHECK_FORMAT (res1);
MPZ_CHECK_FORMAT (res2);
if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
FAIL (ddss_div, i, in1, in2, NULL);
mpz_set (res2, in2);
- (ddss_div_funcs[i]) (res1, res2, in1, res2);
+ INVOKE_RRSS (ddss_div[i], res1, res2, in1, res2);
MPZ_CHECK_FORMAT (res1);
MPZ_CHECK_FORMAT (res2);
if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
FAIL (ddss_div, i, in1, in2, NULL);
}
- for (i = 0; i < sizeof (ds_funcs) / sizeof (ds_func); i++)
+ for (i = 0; i < numberof (ds); i++)
{
- if (ds_funcs[i] == 0)
- continue;
- if (strcmp (ds_func_names[i], "mpz_sqrt") == 0
- && mpz_sgn (in1) < 0)
+ if (ds[i].nonneg && mpz_sgn (in1) < 0)
continue;
- (ds_funcs[i]) (ref1, in1);
+ (ds[i].fptr) (ref1, in1);
MPZ_CHECK_FORMAT (ref1);
mpz_set (res1, in1);
- (ds_funcs[i]) (res1, res1);
+ INVOKE_RS (ds[i], res1, res1);
MPZ_CHECK_FORMAT (res1);
if (mpz_cmp (ref1, res1) != 0)
FAIL (ds, i, in1, in2, NULL);
in2i = mpz_get_ui (in2);
- for (i = 0; i < sizeof (dsi_funcs) / sizeof (dsi_func); i++)
+ for (i = 0; i < numberof (dsi); i++)
{
- if (dsi_funcs[i] == 0)
- continue;
- if (strcmp (dsi_func_names[i], "mpz_fdiv_q_2exp") == 0)
- /* Limit exponent to something reasonable for the division
- functions. Without this, we'd normally shift things off
- the end and just generate the trivial values 1, 0, -1. */
- in2i %= 0x1000;
- if (strcmp (dsi_func_names[i], "mpz_mul_2exp") == 0)
- /* Limit exponent more for mpz_mul_2exp to save time. */
- in2i %= 0x100;
- if (strcmp (dsi_func_names[i], "mpz_pow_ui") == 0)
- /* Limit exponent yet more for mpz_pow_ui to save time. */
- in2i %= 0x10;
-
- (dsi_funcs[i]) (ref1, in1, in2i);
+ if (dsi[i].mod != 0)
+ in2i = mpz_get_ui (in2) % dsi[i].mod;
+
+ (dsi[i].fptr) (ref1, in1, in2i);
MPZ_CHECK_FORMAT (ref1);
mpz_set (res1, in1);
- (dsi_funcs[i]) (res1, res1, in2i);
+ INVOKE_RRS (dsi[i], res1, res1, in2i);
MPZ_CHECK_FORMAT (res1);
if (mpz_cmp (ref1, res1) != 0)
FAIL (dsi, i, in1, in2, NULL);
if (in2i != 0) /* Don't divide by 0. */
{
- for (i = 0; i < sizeof (dsi_div_funcs) / sizeof (dsi_div_funcs); i++)
+ for (i = 0; i < numberof (dsi_div); i++)
{
- r1 = (dsi_div_funcs[i]) (ref1, in1, in2i);
+ r1 = (dsi_div[i].fptr) (ref1, in1, in2i);
MPZ_CHECK_FORMAT (ref1);
mpz_set (res1, in1);
- r2 = (dsi_div_funcs[i]) (res1, res1, in2i);
+ r2 = (dsi_div[i].fptr) (res1, res1, in2i);
MPZ_CHECK_FORMAT (res1);
if (mpz_cmp (ref1, res1) != 0 || r1 != r2)
FAIL (dsi_div, i, in1, in2, NULL);
}
- for (i = 0; i < sizeof (ddsi_div_funcs) / sizeof (ddsi_div_funcs); i++)
+ for (i = 0; i < numberof (ddsi_div); i++)
{
- r1 = (ddsi_div_funcs[i]) (ref1, ref2, in1, in2i);
+ r1 = (ddsi_div[i].fptr) (ref1, ref2, in1, in2i);
MPZ_CHECK_FORMAT (ref1);
mpz_set (res1, in1);
- r2 = (ddsi_div_funcs[i]) (res1, res2, res1, in2i);
+ r2 = (ddsi_div[i].fptr) (res1, res2, res1, in2i);
MPZ_CHECK_FORMAT (res1);
if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0 || r1 != r2)
FAIL (ddsi_div, i, in1, in2, NULL);
mpz_set (res2, in1);
- (ddsi_div_funcs[i]) (res1, res2, res2, in2i);
+ (ddsi_div[i].fptr) (res1, res2, res2, in2i);
MPZ_CHECK_FORMAT (res1);
if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0 || r1 != r2)
FAIL (ddsi_div, i, in1, in2, NULL);
MPZ_CHECK_FORMAT (res2);
if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
FAIL2 (mpz_sqrtrem, in1, NULL, NULL);
+
+ mpz_set (res1, in1);
+ mpz_sqrtrem (res1, res1, res1);
+ MPZ_CHECK_FORMAT (res1);
+ if (mpz_cmp (ref2, res1) != 0)
+ FAIL2 (mpz_sqrtrem, in1, NULL, NULL);
}
if (mpz_sgn (in1) >= 0)
{
- mpz_root (ref1, in1, in2i % 0x1000 + 1);
+ mpz_root (ref1, in1, in2i % 0x100 + 1);
MPZ_CHECK_FORMAT (ref1);
mpz_set (res1, in1);
- mpz_root (res1, res1, in2i % 0x1000 + 1);
+ mpz_root (res1, res1, in2i % 0x100 + 1);
MPZ_CHECK_FORMAT (res1);
if (mpz_cmp (ref1, res1) != 0)
FAIL2 (mpz_root, in1, in2, NULL);
if (mpz_sgn (in1) >= 0)
{
- mpz_rootrem (ref1, ref2, in1, in2i % 0x1000 + 1);
+ mpz_rootrem (ref1, ref2, in1, in2i % 0x100 + 1);
MPZ_CHECK_FORMAT (ref1);
MPZ_CHECK_FORMAT (ref2);
mpz_set (res1, in1);
- mpz_rootrem (res1, res2, res1, in2i % 0x1000 + 1);
+ mpz_rootrem (res1, res2, res1, in2i % 0x100 + 1);
MPZ_CHECK_FORMAT (res1);
MPZ_CHECK_FORMAT (res2);
if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
FAIL2 (mpz_rootrem, in1, in2, NULL);
mpz_set (res2, in1);
- mpz_rootrem (res1, res2, res2, in2i % 0x1000 + 1);
+ mpz_rootrem (res1, res2, res2, in2i % 0x100 + 1);
MPZ_CHECK_FORMAT (res1);
MPZ_CHECK_FORMAT (res2);
if (mpz_cmp (ref1, res1) != 0 || mpz_cmp (ref2, res2) != 0)
FAIL2 (mpz_rootrem, in1, in2, NULL);
}
- if (pass < reps / 2) /* run fewer tests since gcdext lots of time */
+ if (size_range < 18) /* run fewer tests since gcdext lots of time */
{
mpz_gcdext (ref1, ref2, ref3, in1, in2);
MPZ_CHECK_FORMAT (ref1);
}
/* Don't run mpz_powm for huge exponents or when undefined. */
- if (mpz_sizeinbase (in2, 2) < 250 && mpz_sgn (in3) != 0
+ if (size_range < 17 && mpz_sizeinbase (in2, 2) < 250 && mpz_sgn (in3) != 0
&& (mpz_sgn (in2) >= 0 || mpz_invert (t, in1, in3)))
{
mpz_powm (ref1, in1, in2, in3);
}
/* Don't run mpz_powm_ui when undefined. */
- if (mpz_sgn (in3) != 0)
+ if (size_range < 17 && mpz_sgn (in3) != 0)
{
mpz_powm_ui (ref1, in1, in2i, in3);
MPZ_CHECK_FORMAT (ref1);
FAIL2 (mpz_gcd_ui, in1, in2, NULL);
}
- if (mpz_cmp_ui (in2, 1L) > 0 && mpz_sgn (in1) != 0)
+ if (mpz_sgn (in2) != 0)
{
/* Test mpz_remove */
- mpz_remove (ref1, in1, in2);
+ mp_bitcnt_t refretval, retval;
+ refretval = mpz_remove (ref1, in1, in2);
MPZ_CHECK_FORMAT (ref1);
mpz_set (res1, in1);
- mpz_remove (res1, res1, in2);
+ retval = mpz_remove (res1, res1, in2);
MPZ_CHECK_FORMAT (res1);
- if (mpz_cmp (ref1, res1) != 0)
+ if (mpz_cmp (ref1, res1) != 0 || refretval != retval)
FAIL2 (mpz_remove, in1, in2, NULL);
mpz_set (res1, in2);
- mpz_remove (res1, in1, res1);
+ retval = mpz_remove (res1, in1, res1);
MPZ_CHECK_FORMAT (res1);
- if (mpz_cmp (ref1, res1) != 0)
+ if (mpz_cmp (ref1, res1) != 0 || refretval != retval)
FAIL2 (mpz_remove, in1, in2, NULL);
}
}
}
- if (failures != 0)
- {
- fprintf (stderr, "mpz/reuse: %ld error%s\n", failures, "s" + (failures == 1));
- exit (1);
- }
+ if (isatty (fileno (stdout)))
+ printf ("\r%20s", "");
mpz_clear (bs);
mpz_clear (in1);
mpz_clear (res3);
mpz_clear (t);
+ if (isatty (fileno (stdout)))
+ printf ("\r");
+
tests_end ();
exit (0);
}
void
-dump (char *name, mpz_t in1, mpz_t in2, mpz_t in3)
+dump (const char *name, mpz_t in1, mpz_t in2, mpz_t in3)
{
printf ("failure in %s (", name);
- mpz_out_str (stdout, -16, in1);
+ 0 && mpz_out_str (stdout, -16, in1);
if (in2 != NULL)
{
printf (" ");
- mpz_out_str (stdout, -16, in2);
+ 0 && mpz_out_str (stdout, -16, in2);
}
if (in3 != NULL)
{
printf (" ");
- mpz_out_str (stdout, -16, in3);
+ 0 && mpz_out_str (stdout, -16, in3);
}
printf (")\n");
}
Copyright 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
#include "longlong.h"
#include "tests.h"
-void debug_mp __GMP_PROTO ((mpz_t, int));
-void dump_abort __GMP_PROTO ((int, char *, mpz_t, mpz_t));
+void debug_mp (mpz_t, int);
+void dump_abort (int, const char *, mpz_t, mpz_t);
int
main (int argc, char **argv)
}
void
-dump_abort (int i, char *s, mpz_t op1, mpz_t op2)
+dump_abort (int i, const char *s, mpz_t op1, mpz_t op2)
{
fprintf (stderr, "ERROR: %s in test %d\n", s, i);
fprintf (stderr, "op1 = "); debug_mp (op1, -16);
Copyright 2001, 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
/* Exercise mpz_bin_ui and mpz_bin_uiui.
-Copyright 2000, 2001 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2010, 2012 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
#include "gmp-impl.h"
#include "tests.h"
+/* Default number of generated tests. */
+#define COUNT 700
void
try_mpz_bin_ui (mpz_srcptr want, mpz_srcptr n, unsigned long k)
const char *want;
} data[] = {
- { "0", 0, "1" },
- { "0", 1, "0" },
- { "0", 2, "0" },
- { "0", 3, "0" },
- { "0", 4, "0" },
{ "0", 123456, "0" },
-
- { "1", 0, "1" },
- { "1", 1, "1" },
- { "1", 2, "0" },
- { "1", 3, "0" },
- { "1", 4, "0" },
- { "1", 123456, "0" },
-
- { "2", 0, "1" },
- { "2", 1, "2" },
- { "2", 2, "1" },
- { "2", 3, "0" },
- { "2", 4, "0" },
- { "2", 123456, "0" },
-
- { "3", 0, "1" },
- { "3", 1, "3" },
- { "3", 2, "3" },
- { "3", 3, "1" },
- { "3", 4, "0" },
- { "3", 5, "0" },
- { "3", 123456, "0" },
-
- { "4", 0, "1" },
- { "4", 1, "4" },
- { "4", 2, "6" },
- { "4", 3, "4" },
- { "4", 4, "1" },
- { "4", 5, "0" },
- { "4", 6, "0" },
- { "4", 123456, "0" },
-
- { "10", 0, "1" },
- { "10", 1, "10" },
- { "10", 2, "45" },
- { "10", 3, "120" },
- { "10", 4, "210" },
- { "10", 5, "252" },
- { "10", 6, "210" },
- { "10", 7, "120" },
- { "10", 8, "45" },
- { "10", 9, "10" },
- { "10", 10, "1" },
- { "10", 11, "0" },
- { "10", 12, "0" },
- { "10", 123456, "0" },
+ { "1", 543210, "0" },
+ { "2", 123321, "0" },
+ { "3", 234567, "0" },
+ { "10", 23456, "0" },
/* negatives, using bin(-n,k)=bin(n+k-1,k) */
{ "-1", 0, "1" },
{ "-3", 5, "-21" },
{ "-3", 6, "28" },
- { "40", 20, "137846528820" },
- { "60", 30, "118264581564861424" },
+ /* A few random values */
+ { "41", 20, "269128937220" },
+ { "62", 37, "147405545359541742" },
+ { "50", 18, "18053528883775" },
+ { "149", 21, "19332950844468483467894649" },
};
mpz_t n, want;
/* Test some bin(2k,k) cases. This produces some biggish numbers to
exercise the limb accumulating code. */
void
-twos (void)
+twos (int count)
{
mpz_t n, want;
unsigned long k;
mpz_init (want);
mpz_set_ui (want, (unsigned long) 2);
- for (k = 1; k < 200; k++)
+ for (k = 1; k < count; k++)
{
mpz_set_ui (n, 2*k);
try_mpz_bin_ui (want, n, k);
mpz_clear (want);
}
+/* Test some random bin(n,k) cases. This produces some biggish
+ numbers to exercise the limb accumulating code. */
+void
+randomwalk (int count)
+{
+ mpz_t n_z, want;
+ unsigned long n, k, i, r;
+ int tests;
+ gmp_randstate_ptr rands;
+
+ rands = RANDS;
+ mpz_init (n_z);
+ mpz_init (want);
+
+ k = 3;
+ n = 12;
+ mpz_set_ui (want, (unsigned long) 220); /* binomial(12,3) = 220 */
+
+ for (tests = 1; tests < count; tests++)
+ {
+ r = gmp_urandomm_ui (rands, 62) + 1;
+ for (i = r & 7; i > 0; i--)
+ {
+ n++; k++;
+ mpz_mul_ui (want, want, n);
+ mpz_fdiv_q_ui (want, want, k);
+ }
+ for (i = r >> 3; i > 0; i--)
+ {
+ n++;
+ mpz_mul_ui (want, want, n);
+ mpz_fdiv_q_ui (want, want, n - k);
+ }
+
+ mpz_set_ui (n_z, n);
+ try_mpz_bin_ui (want, n_z, k);
+
+ try_mpz_bin_uiui (want, n, k);
+ }
+
+ mpz_clear (n_z);
+ mpz_clear (want);
+}
+
+
+/* Test all bin(n,k) cases, with 0 <= k <= n + 1 <= count. */
+void
+smallexaustive (unsigned int count)
+{
+ mpz_t n_z, want;
+ unsigned long n, k, i, r;
+ int tests;
+ gmp_randstate_ptr rands;
+
+ mpz_init (n_z);
+ mpz_init (want);
+
+ for (n = 0; n < count; n++)
+ {
+ mpz_set_ui (want, (unsigned long) 1);
+ mpz_set_ui (n_z, n);
+ for (k = 0; k <= n; k++)
+ {
+ try_mpz_bin_ui (want, n_z, k);
+ try_mpz_bin_uiui (want, n, k);
+ mpz_mul_ui (want, want, n - k);
+ mpz_fdiv_q_ui (want, want, k + 1);
+ }
+ try_mpz_bin_ui (want, n_z, k);
+ try_mpz_bin_uiui (want, n, k);
+ }
+
+ mpz_clear (n_z);
+ mpz_clear (want);
+}
int
-main (void)
+main (int argc, char **argv)
{
+ int count;
+
+ if (argc > 1)
+ {
+ char *end;
+ count = strtol (argv[1], &end, 0);
+ if (*end || count <= 0)
+ {
+ fprintf (stderr, "Invalid test count: %s.\n", argv[1]);
+ return 1;
+ }
+ }
+ else
+ count = COUNT;
+
tests_start ();
samples ();
- twos ();
+ smallexaustive (count >> 4);
+ twos (count >> 1);
+ randomwalk (count - (count >> 1));
tests_end ();
exit (0);
Copyright 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
#include "gmp-impl.h"
#include "tests.h"
-void dump_abort __GMP_PROTO ((char *, mpz_t, unsigned long));
-void debug_mp __GMP_PROTO ((mpz_t, int));
+void dump_abort (const char *, mpz_t, unsigned long);
+void debug_mp (mpz_t, int);
int
main (int argc, char **argv)
}
void
-dump_abort (char *str, mpz_t dividend, unsigned long divisor)
+dump_abort (const char *str, mpz_t dividend, unsigned long divisor)
{
fprintf (stderr, "ERROR: %s\n", str);
fprintf (stderr, "dividend = "); debug_mp (dividend, -16);
Copyright 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
{ "1", 0.0, 1, 1 },
{ "-1", 0.0, -1, 1 },
+ { "1", 0.5, 1, 1 },
+ { "-1", -0.5, -1, 1 },
+
{ "0", 1.0, -1, -1 },
{ "0", -1.0, 1, -1 },
- { "0x1000000000000000000000000000000000000000000000000", 0.0, 1, 1 },
- { "-0x1000000000000000000000000000000000000000000000000", 0.0, -1, 1 },
+ { "0x1000000000000000000000000000000000000000000000000", 1.0, 1, 1 },
+ { "-0x1000000000000000000000000000000000000000000000000", 1.0, -1, 1 },
{ "0", 1e100, -1, -1 },
{ "0", -1e100, 1, -1 },
mpz_init (x);
/* FIXME: It'd be better to base this on the float format. */
-#ifdef __vax
+#if defined (__vax) || defined (__vax__)
#define LIM 127 /* vax fp numbers have limited range */
#else
#define LIM 512
Copyright 2000, 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
/* test mpz_congruent_p and mpz_congruent_ui_p
-Copyright 2001, 2002 Free Software Foundation, Inc.
+Copyright 2001, 2002, 2012 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
{
got = (mpz_congruent_p (a, c, d) != 0);
if (want != got)
- {
- printf ("mpz_congruent_p wrong\n");
- printf (" expected %d got %d\n", want, got);
- mpz_trace (" a", a);
- mpz_trace (" c", c);
- mpz_trace (" d", d);
- mp_trace_base = -16;
- mpz_trace (" a", a);
- mpz_trace (" c", c);
- mpz_trace (" d", d);
- abort ();
- }
+ {
+ printf ("mpz_congruent_p wrong\n");
+ printf (" expected %d got %d\n", want, got);
+ mpz_trace (" a", a);
+ mpz_trace (" c", c);
+ mpz_trace (" d", d);
+ mp_trace_base = -16;
+ mpz_trace (" a", a);
+ mpz_trace (" c", c);
+ mpz_trace (" d", d);
+ abort ();
+ }
if (mpz_fits_ulong_p (c) && mpz_fits_ulong_p (d))
- {
- unsigned long uc = mpz_get_ui (c);
- unsigned long ud = mpz_get_ui (d);
- got = (mpz_congruent_ui_p (a, uc, ud) != 0);
- if (want != got)
- {
- printf ("mpz_congruent_ui_p wrong\n");
- printf (" expected %d got %d\n", want, got);
- mpz_trace (" a", a);
- printf (" c=%lu\n", uc);
- printf (" d=%lu\n", ud);
- mp_trace_base = -16;
- mpz_trace (" a", a);
- printf (" c=0x%lX\n", uc);
- printf (" d=0x%lX\n", ud);
- abort ();
- }
- }
+ {
+ unsigned long uc = mpz_get_ui (c);
+ unsigned long ud = mpz_get_ui (d);
+ got = (mpz_congruent_ui_p (a, uc, ud) != 0);
+ if (want != got)
+ {
+ printf ("mpz_congruent_ui_p wrong\n");
+ printf (" expected %d got %d\n", want, got);
+ mpz_trace (" a", a);
+ printf (" c=%lu\n", uc);
+ printf (" d=%lu\n", ud);
+ mp_trace_base = -16;
+ mpz_trace (" a", a);
+ printf (" c=0x%lX\n", uc);
+ printf (" d=0x%lX\n", ud);
+ abort ();
+ }
+ }
MPZ_SRCPTR_SWAP (a, c);
}
} data[] = {
+ /* strict equality mod 0 */
+ { "0", "0", "0", 1 },
+ { "11", "11", "0", 1 },
+ { "3", "11", "0", 0 },
+
/* anything congruent mod 1 */
{ "0", "0", "1", 1 },
{ "1", "0", "1", 1 },
mpz_t a, c, d, ra, rc;
int i;
int want;
- int reps = 50000;
+ int reps = 10000;
+ mpz_t bs;
+ unsigned long size_range, size;
if (argc >= 2)
reps = atoi (argv[1]);
+ mpz_init (bs);
+
mpz_init (a);
mpz_init (c);
mpz_init (d);
for (i = 0; i < reps; i++)
{
- mpz_errandomb (a, rands, 8*GMP_LIMB_BITS);
- MPZ_CHECK_FORMAT (a);
- mpz_errandomb (c, rands, 8*GMP_LIMB_BITS);
- MPZ_CHECK_FORMAT (c);
- mpz_errandomb_nonzero (d, rands, 8*GMP_LIMB_BITS);
+ mpz_urandomb (bs, rands, 32);
+ size_range = mpz_get_ui (bs) % 16 + 1; /* 0..65536 bit operands */
+
+ mpz_urandomb (bs, rands, size_range);
+ size = mpz_get_ui (bs);
+ mpz_rrandomb (a, rands, size);
+
+ mpz_urandomb (bs, rands, 32);
+ size_range = mpz_get_ui (bs) % 16 + 1; /* 0..65536 bit operands */
+
+ mpz_urandomb (bs, rands, size_range);
+ size = mpz_get_ui (bs);
+ mpz_rrandomb (c, rands, size);
+
+ do
+ {
+ mpz_urandomb (bs, rands, 32);
+ size_range = mpz_get_ui (bs) % 16 + 1; /* 0..65536 bit operands */
+
+ mpz_urandomb (bs, rands, size_range);
+ size = mpz_get_ui (bs);
+ mpz_rrandomb (d, rands, size);
+ }
+ while (SIZ(d) == 0);
mpz_negrandom (a, rands);
MPZ_CHECK_FORMAT (a);
if (! mpz_pow2abs_p (d))
{
- refmpz_combit (a, urandom() % (8*GMP_LIMB_BITS));
- check_one (a, c, d, 0);
+ refmpz_combit (a, urandom() % (8*GMP_LIMB_BITS));
+ check_one (a, c, d, 0);
}
}
+ mpz_clear (bs);
+
mpz_clear (a);
mpz_clear (c);
mpz_clear (d);
/*
Copyright 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2001, 2009 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
} data[] = {
+ { "0", "0", 1 },
+ { "17", "0", 0 },
{ "0", "1", 1 },
{ "123", "1", 1 },
{ "-123", "1", 1 },
/*
Copyright 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2002, 2003 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
-/* Exercise mpz_fac_ui.
+/* Exercise mpz_fac_ui and mpz_2fac_ui.
-Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2002, 2012 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
int
main (int argc, char *argv[])
{
- unsigned long n;
- unsigned long limit = 1500;
- mpz_t f, r;
+ unsigned long n, m;
+ unsigned long limit = 2222;
+ mpz_t df[2], f, r;
tests_start ();
/* for small limb testing */
limit = MIN (limit, MP_LIMB_T_MAX);
+ mpz_init_set_ui (df[0], 1); /* 0!! = 1 */
+ mpz_init_set_ui (df[1], 1); /* -1!! = 1 */
mpz_init_set_ui (f, 1); /* 0! = 1 */
mpz_init (r);
- for (n = 0; n < limit; n++)
+ for (n = 0, m = 0; n < limit; n++)
{
mpz_fac_ui (r, n);
MPZ_CHECK_FORMAT (r);
abort ();
}
+ mpz_2fac_ui (r, n);
+ MPZ_CHECK_FORMAT (r);
+
+ if (mpz_cmp (df[m], r) != 0)
+ {
+ printf ("mpz_2fac_ui(%lu) wrong\n", n);
+ printf (" got "); mpz_out_str (stdout, 10, r); printf("\n");
+ printf (" want "); mpz_out_str (stdout, 10, df[m]); printf("\n");
+ abort ();
+ }
+
+ m ^= 1;
+ mpz_mul_ui (df[m], df[m], n+1); /* (n+1)!! = (n-1)!! * (n+1) */
mpz_mul_ui (f, f, n+1); /* (n+1)! = n! * (n+1) */
}
+ n = 1048573; /* a prime */
+ if (n > MP_LIMB_T_MAX)
+ n = 65521; /* a smaller prime :-) */
+ mpz_fac_ui (f, n - 1);
+ m = mpz_fdiv_ui (f, n);
+ if ( m != n - 1)
+ {
+ printf ("mpz_fac_ui(%lu) wrong\n", n - 1);
+ printf (" Wilson's theorem not verified: got %lu, expected %lu.\n",m ,n - 1);
+ abort ();
+ }
+
+ mpz_clear (df[0]);
+ mpz_clear (df[1]);
mpz_clear (f);
mpz_clear (r);
Copyright 1993, 1994, 1996, 2000, 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
#include "gmp-impl.h"
#include "tests.h"
-void dump_abort __GMP_PROTO ((mpz_t, mpz_t));
-void debug_mp __GMP_PROTO ((mpz_t, int));
+void dump_abort (mpz_t, mpz_t);
+void debug_mp (mpz_t, int);
int
main (int argc, char **argv)
Copyright 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
#include "gmp-impl.h"
#include "tests.h"
-void dump_abort __GMP_PROTO ((char *, mpz_t, unsigned long));
-void debug_mp __GMP_PROTO ((mpz_t, int));
+void dump_abort (const char *, mpz_t, unsigned long);
+void debug_mp (mpz_t, int);
int
main (int argc, char **argv)
}
void
-dump_abort (char *str, mpz_t dividend, unsigned long divisor)
+dump_abort (const char *str, mpz_t dividend, unsigned long divisor)
{
fprintf (stderr, "ERROR: %s\n", str);
fprintf (stderr, "dividend = "); debug_mp (dividend, -16);
Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
/*
Copyright 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
/* Test mpz_gcd, mpz_gcdext, and mpz_gcd_ui.
Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2005,
-2008, 2009 Free Software Foundation, Inc.
+2008, 2009, 2012 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
#include "gmp-impl.h"
#include "tests.h"
-void one_test __GMP_PROTO ((mpz_t, mpz_t, mpz_t, int));
-void debug_mp __GMP_PROTO ((mpz_t, int));
+void one_test (mpz_t, mpz_t, mpz_t, int);
+void debug_mp (mpz_t, int);
+
+static int gcdext_valid_p (const mpz_t, const mpz_t, const mpz_t, const mpz_t);
+
+/* Keep one_test's variables global, so that we don't need
+ to reinitialize them for each test. */
+mpz_t gcd1, gcd2, s, temp1, temp2, temp3;
+
+#define MAX_SCHOENHAGE_THRESHOLD HGCD_REDUCE_THRESHOLD
+
+/* Define this to make all operands be large enough for Schoenhage gcd
+ to be used. */
+#ifndef WHACK_SCHOENHAGE
+#define WHACK_SCHOENHAGE 0
+#endif
+
+#if WHACK_SCHOENHAGE
+#define MIN_OPERAND_BITSIZE (MAX_SCHOENHAGE_THRESHOLD * GMP_NUMB_BITS)
+#else
+#define MIN_OPERAND_BITSIZE 1
+#endif
-static int gcdext_valid_p __GMP_PROTO ((const mpz_t a, const mpz_t b, const mpz_t g, const mpz_t s));
void
check_data (void)
mpz_t a, b, got, want;
int i;
- mpz_init (a);
- mpz_init (b);
- mpz_init (got);
- mpz_init (want);
+ mpz_inits (a, b, got, want, NULL);
for (i = 0; i < numberof (data); i++)
{
}
}
- mpz_clear (a);
- mpz_clear (b);
- mpz_clear (got);
- mpz_clear (want);
+ mpz_clears (a, b, got, want, NULL);
}
-/* Keep one_test's variables global, so that we don't need
- to reinitialize them for each test. */
-mpz_t gcd1, gcd2, s, t, temp1, temp2, temp3;
+void
+make_chain_operands (mpz_t ref, mpz_t a, mpz_t b, gmp_randstate_t rs, int nb1, int nb2, int chain_len)
+{
+ mpz_t bs, temp1, temp2;
+ int j;
-#if GCD_DC_THRESHOLD > GCDEXT_DC_THRESHOLD
-#define MAX_SCHOENHAGE_THRESHOLD GCD_DC_THRESHOLD
-#else
-#define MAX_SCHOENHAGE_THRESHOLD GCDEXT_DC_THRESHOLD
-#endif
+ mpz_inits (bs, temp1, temp2, NULL);
-/* Define this to make all operands be large enough for Schoenhage gcd
- to be used. */
-#ifndef WHACK_SCHOENHAGE
-#define WHACK_SCHOENHAGE 0
-#endif
+ /* Generate a division chain backwards, allowing otherwise unlikely huge
+ quotients. */
-#if WHACK_SCHOENHAGE
-#define MIN_OPERAND_BITSIZE (MAX_SCHOENHAGE_THRESHOLD * GMP_NUMB_BITS)
-#else
-#define MIN_OPERAND_BITSIZE 1
-#endif
+ mpz_set_ui (a, 0);
+ mpz_urandomb (bs, rs, 32);
+ mpz_urandomb (bs, rs, mpz_get_ui (bs) % nb1 + 1);
+ mpz_rrandomb (b, rs, mpz_get_ui (bs));
+ mpz_add_ui (b, b, 1);
+ mpz_set (ref, b);
+
+ for (j = 0; j < chain_len; j++)
+ {
+ mpz_urandomb (bs, rs, 32);
+ mpz_urandomb (bs, rs, mpz_get_ui (bs) % nb2 + 1);
+ mpz_rrandomb (temp2, rs, mpz_get_ui (bs) + 1);
+ mpz_add_ui (temp2, temp2, 1);
+ mpz_mul (temp1, b, temp2);
+ mpz_add (a, a, temp1);
+
+ mpz_urandomb (bs, rs, 32);
+ mpz_urandomb (bs, rs, mpz_get_ui (bs) % nb2 + 1);
+ mpz_rrandomb (temp2, rs, mpz_get_ui (bs) + 1);
+ mpz_add_ui (temp2, temp2, 1);
+ mpz_mul (temp1, a, temp2);
+ mpz_add (b, b, temp1);
+ }
+
+ mpz_clears (bs, temp1, temp2, NULL);
+}
+
+/* Test operands from a table of seed data. This variant creates the operands
+ using plain ol' mpz_rrandomb. This is a hack for better coverage of the gcd
+ code, which depends on that the random number generators give the exact
+ numbers we expect. */
+void
+check_kolmo1 (void)
+{
+ static const struct {
+ unsigned int seed;
+ int nb;
+ const char *want;
+ } data[] = {
+ { 59618, 38208, "5"},
+ { 76521, 49024, "3"},
+ { 85869, 54976, "1"},
+ { 99449, 63680, "1"},
+ {112453, 72000, "1"}
+ };
+
+ gmp_randstate_t rs;
+ mpz_t bs, a, b, want;
+ int i, unb, vnb, nb;
+
+ gmp_randinit_default (rs);
+
+ mpz_inits (bs, a, b, want, NULL);
+
+ for (i = 0; i < numberof (data); i++)
+ {
+ nb = data[i].nb;
+
+ gmp_randseed_ui (rs, data[i].seed);
+
+ mpz_urandomb (bs, rs, 32);
+ unb = mpz_get_ui (bs) % nb;
+ mpz_urandomb (bs, rs, 32);
+ vnb = mpz_get_ui (bs) % nb;
+
+ mpz_rrandomb (a, rs, unb);
+ mpz_rrandomb (b, rs, vnb);
+
+ mpz_set_str_or_abort (want, data[i].want, 0);
+
+ one_test (a, b, want, -1);
+ }
+
+ mpz_clears (bs, a, b, want, NULL);
+ gmp_randclear (rs);
+}
+
+/* Test operands from a table of seed data. This variant creates the operands
+ using a division chain. This is a hack for better coverage of the gcd
+ code, which depends on that the random number generators give the exact
+ numbers we expect. */
+void
+check_kolmo2 (void)
+{
+ static const struct {
+ unsigned int seed;
+ int nb, chain_len;
+ } data[] = {
+ { 917, 15, 5 },
+ { 1032, 18, 6 },
+ { 1167, 18, 6 },
+ { 1174, 18, 6 },
+ { 1192, 18, 6 },
+ };
+
+ gmp_randstate_t rs;
+ mpz_t bs, a, b, want;
+ int i;
+
+ gmp_randinit_default (rs);
+
+ mpz_inits (bs, a, b, want, NULL);
+
+ for (i = 0; i < numberof (data); i++)
+ {
+ gmp_randseed_ui (rs, data[i].seed);
+ make_chain_operands (want, a, b, rs, data[i].nb, data[i].nb, data[i].chain_len);
+ one_test (a, b, want, -1);
+ }
+
+ mpz_clears (bs, a, b, want, NULL);
+ gmp_randclear (rs);
+}
int
main (int argc, char **argv)
{
mpz_t op1, op2, ref;
- int i, j, chain_len;
+ int i, chain_len;
gmp_randstate_ptr rands;
mpz_t bs;
unsigned long bsi, size_range;
- int reps = 200;
+ long int reps = 200;
tests_start ();
TESTS_REPS (reps, argv, argc);
rands = RANDS;
- check_data ();
+ mpz_inits (bs, op1, op2, ref, gcd1, gcd2, temp1, temp2, temp3, s, NULL);
- mpz_init (bs);
- mpz_init (op1);
- mpz_init (op2);
- mpz_init (ref);
- mpz_init (gcd1);
- mpz_init (gcd2);
- mpz_init (temp1);
- mpz_init (temp2);
- mpz_init (temp3);
- mpz_init (s);
- mpz_init (t);
+ check_data ();
+ check_kolmo1 ();
+ check_kolmo2 ();
/* Testcase to exercise the u0 == u1 case in mpn_gcdext_lehmer_n. */
- mpz_set_ui (op2, GMP_NUMB_MAX);
+ mpz_set_ui (op2, GMP_NUMB_MAX); /* FIXME: Huge limb doesn't always fit */
mpz_mul_2exp (op1, op2, 100);
mpz_add (op1, op1, op2);
mpz_mul_ui (op2, op2, 2);
one_test (op1, op2, NULL, -1);
-#if 0
- mpz_set_str (op1, "4da8e405e0d2f70d6d679d3de08a5100a81ec2cff40f97b313ae75e1183f1df2b244e194ebb02a4ece50d943640a301f0f6cc7f539117b783c3f3a3f91649f8a00d2e1444d52722810562bce02fccdbbc8fe3276646e306e723dd3b", 16);
- mpz_set_str (op2, "76429e12e4fdd8929d89c21657097fbac09d1dc08cf7f1323a34e78ca34226e1a7a29b86fee0fa7fe2cc2a183d46d50df1fe7029590974ad7da77605f35f902cb8b9b8d22dd881eaae5919675d49a337145a029c3b33fc2b0", 16);
- one_test (op1, op2, NULL, -1);
-#endif
-
for (i = 0; i < reps; i++)
{
/* Generate plain operands with unknown gcd. These types of operands
/* Generate a division chain backwards, allowing otherwise unlikely huge
quotients. */
- mpz_set_ui (op1, 0);
mpz_urandomb (bs, rands, 32);
- mpz_urandomb (bs, rands, mpz_get_ui (bs) % 16 + 1);
- mpz_rrandomb (op2, rands, mpz_get_ui (bs));
- mpz_add_ui (op2, op2, 1);
- mpz_set (ref, op2);
-
-#if WHACK_SCHOENHAGE
- chain_len = 1000000;
-#else
+ chain_len = mpz_get_ui (bs) % LOG2C (GMP_NUMB_BITS * MAX_SCHOENHAGE_THRESHOLD);
mpz_urandomb (bs, rands, 32);
- chain_len = mpz_get_ui (bs) % (GMP_NUMB_BITS * MAX_SCHOENHAGE_THRESHOLD / 256);
-#endif
+ chain_len = mpz_get_ui (bs) % (1 << chain_len) / 32;
+
+ make_chain_operands (ref, op1, op2, rands, 16, 12, chain_len);
- for (j = 0; j < chain_len; j++)
- {
- mpz_urandomb (bs, rands, 32);
- mpz_urandomb (bs, rands, mpz_get_ui (bs) % 12 + 1);
- mpz_rrandomb (temp2, rands, mpz_get_ui (bs) + 1);
- mpz_add_ui (temp2, temp2, 1);
- mpz_mul (temp1, op2, temp2);
- mpz_add (op1, op1, temp1);
-
- /* Don't generate overly huge operands. */
- if (SIZ (op1) > 3 * MAX_SCHOENHAGE_THRESHOLD)
- break;
-
- mpz_urandomb (bs, rands, 32);
- mpz_urandomb (bs, rands, mpz_get_ui (bs) % 12 + 1);
- mpz_rrandomb (temp2, rands, mpz_get_ui (bs) + 1);
- mpz_add_ui (temp2, temp2, 1);
- mpz_mul (temp1, op1, temp2);
- mpz_add (op2, op2, temp1);
-
- /* Don't generate overly huge operands. */
- if (SIZ (op2) > 3 * MAX_SCHOENHAGE_THRESHOLD)
- break;
- }
one_test (op1, op2, ref, i);
}
- mpz_clear (bs);
- mpz_clear (op1);
- mpz_clear (op2);
- mpz_clear (ref);
- mpz_clear (gcd1);
- mpz_clear (gcd2);
- mpz_clear (temp1);
- mpz_clear (temp2);
- mpz_clear (temp3);
- mpz_clear (s);
- mpz_clear (t);
+ mpz_clears (bs, op1, op2, ref, gcd1, gcd2, temp1, temp2, temp3, s, NULL);
tests_end ();
exit (0);
one_test (mpz_t op1, mpz_t op2, mpz_t ref, int i)
{
/*
- printf ("%ld %ld %ld\n", SIZ (op1), SIZ (op2), SIZ (ref));
+ printf ("%d %d %d\n", SIZ (op1), SIZ (op2), ref != NULL ? SIZ (ref) : 0);
fflush (stdout);
*/
if (mpz_cmpabs_ui (s, 1) > 0)
{
mpz_mul_2exp (temp3, s, 1);
- if (mpz_cmpabs (temp3, temp2) > 0)
+ if (mpz_cmpabs (temp3, temp2) >= 0)
return 0;
}
if (mpz_cmpabs_ui (temp2, 1) > 0)
{
mpz_mul_2exp (temp2, temp2, 1);
- if (mpz_cmpabs (temp2, temp1) > 0)
+ if (mpz_cmpabs (temp2, temp1) >= 0)
return 0;
}
return 1;
Copyright 2003 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
/* Test mpz_get_d.
-Copyright 2002 Free Software Foundation, Inc.
+Copyright 2002, 2012 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
mpz_t z;
double got, want;
/* FIXME: It'd be better to base this on the float format. */
-#ifdef __vax
- int limit = 127; /* vax fp numbers have limited range */
+#if defined (__vax) || defined (__vax__)
+ int limit = 127 - 1; /* vax fp numbers have limited range */
#else
int limit = 512;
#endif
/* Test mpz_get_d_2exp.
-Copyright 2002, 2003 Free Software Foundation, Inc.
+Copyright 2002, 2003, 2012 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
#include "tests.h"
+static void
+check_zero (void)
+{
+ mpz_t z;
+ double got, want;
+ long got_exp, want_exp;
+
+ mpz_init_set_ui (z, 0);
+
+ want = 0.0;
+ want_exp = 0;
+ got = mpz_get_d_2exp (&got_exp, z);
+ if (got != want || got_exp != want_exp)
+ {
+ printf ("mpz_get_d_2exp wrong on zero\n");
+ mpz_trace (" z ", z);
+ d_trace (" want ", want);
+ d_trace (" got ", got);
+ printf (" want exp %ld\n", want_exp);
+ printf (" got exp %ld\n", got_exp);
+ abort();
+ }
+
+ mpz_clear (z);
+}
+
static void
check_onebit (void)
{
tests_start ();
mp_trace_base = -16;
+ check_zero ();
check_onebit ();
check_round ();
check_rand ();
Copyright 2000, 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2001, 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2002, 2003 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2001, 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include "config.h"
{ "0", 10, "0", 1 },
{ "abc", 10, "0", 0 },
+ { "0xf", 10, "0", 1 },
{ "ghi", 16, "0", 0 },
+ { "100", 90, "0", 0 },
{ "ff", 16, "255", 2 },
{ "-ff", 16, "-255", 3 },
{ "FF", 16, "255", 2 },
{ "-FF", 16, "-255", 3 },
- { "z", 36, "35", 1 },
- { "Z", 36, "35", 1 },
+ { "z", 36, "35", 1 },
+ { "Z", 36, "35", 1 },
+ { "1B", 59, "70", 2 },
+ { "a", 60, "36", 1 },
+ { "A", 61, "10", 1 },
{ "0x0", 0, "0", 3 },
- { "0x10", 0, "16", 4 },
- { "-0x0", 0, "0", 4 },
+ { "0X10", 0, "16", 4 },
+ { "-0X0", 0, "0", 4 },
{ "-0x10", 0, "-16", 5 },
+ { "0b0", 0, "0", 3 },
+ { "0B10", 0, "2", 4 },
+ { "-0B0", 0, "0", 4 },
+ { "-0b10", 0, "-2", 5 },
+
{ "00", 0, "0", 2 },
{ "010", 0, "8", 3 },
{ "-00", 0, "0", 3 },
{ "0x", 0, "0", 2 },
{ "0", 0, "0", 1 },
+ { " 030", 10, "30", 4 },
};
mpz_t got, want;
Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2005,
2008, 2009, 2012 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include "config.h"
Copyright 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
/* With no arguments the various Kronecker/Jacobi symbol routines are
#include "gmp-impl.h"
#include "tests.h"
-
#ifdef _LONG_LONG_LIMB
#define LL(l,ll) ll
#else
void
try_each (mpz_srcptr a, mpz_srcptr b, int answer)
{
+#if 0
+ fprintf(stderr, "asize = %d, bsize = %d\n",
+ mpz_sizeinbase (a, 2), mpz_sizeinbase (b, 2));
+#endif
if (option_pari)
{
try_pari (a, b, answer);
/* special values inducing a==b==1 at the end of jac_or_kron() */
{ "0x10000000000000000000000000000000000000000000000001",
"0x10000000000000000000000000000000000000000000000003", 1 },
+
+ /* Test for previous bugs in jacobi_2. */
+ { "0x43900000000", "0x42400000439", -1 }, /* 32-bit limbs */
+ { "0x4390000000000000000", "0x4240000000000000439", -1 }, /* 64-bit limbs */
+
+ { "198158408161039063", "198158360916398807", -1 },
+
+ /* Some tests involving large quotients in the continued fraction
+ expansion. */
+ { "37200210845139167613356125645445281805",
+ "451716845976689892447895811408978421929", -1 },
+ { "67674091930576781943923596701346271058970643542491743605048620644676477275152701774960868941561652032482173612421015",
+ "4902678867794567120224500687210807069172039735", 0 },
+ { "2666617146103764067061017961903284334497474492754652499788571378062969111250584288683585223600172138551198546085281683283672592", "2666617146103764067061017961903284334497474492754652499788571378062969111250584288683585223600172138551198546085281683290481773", 1 },
+
+ /* Exersizes the case asize == 1, btwos > 0 in mpz_jacobi. */
+ { "804609", "421248363205206617296534688032638102314410556521742428832362659824", 1 } ,
+ { "4190209", "2239744742177804210557442048984321017460028974602978995388383905961079286530650825925074203175536427000", 1 },
+
+ /* Exersizes the case asize == 1, btwos = 63 in mpz_jacobi
+ (relevant when GMP_LIMB_BITS == 64). */
+ { "17311973299000934401", "1675975991242824637446753124775689449936871337036614677577044717424700351103148799107651171694863695242089956242888229458836426332300124417011114380886016", 1 },
+ { "3220569220116583677", "41859917623035396746", -1 },
+
+ /* Other test cases that triggered bugs during development. */
+ { "37200210845139167613356125645445281805", "340116213441272389607827434472642576514", -1 },
+ { "74400421690278335226712251290890563610", "451716845976689892447895811408978421929", -1 },
};
int i;
for (i = 0; i < 50; i++)
{
mpz_urandomb (bs, rands, 32);
- size_range = mpz_get_ui (bs) % 10 + 2;
+ size_range = mpz_get_ui (bs) % 10 + i/8 + 2;
mpz_urandomb (bs, rands, size_range);
an = mpz_get_ui (bs);
}
+/* Assumes that b = prod p_k^e_k */
+int
+ref_jacobi (mpz_srcptr a, mpz_srcptr b, unsigned nprime,
+ mpz_t prime[], unsigned *exp)
+{
+ unsigned i;
+ int res;
+
+ for (i = 0, res = 1; i < nprime; i++)
+ if (exp[i])
+ {
+ int legendre = refmpz_legendre (a, prime[i]);
+ if (!legendre)
+ return 0;
+ if (exp[i] & 1)
+ res *= legendre;
+ }
+ return res;
+}
+
+void
+check_jacobi_factored (void)
+{
+#define PRIME_N 10
+#define PRIME_MAX_SIZE 50
+#define PRIME_MAX_EXP 4
+#define PRIME_A_COUNT 10
+#define PRIME_B_COUNT 5
+#define PRIME_MAX_B_SIZE 2000
+
+ gmp_randstate_ptr rands = RANDS;
+ mpz_t prime[PRIME_N];
+ unsigned exp[PRIME_N];
+ mpz_t a, b, t, bs;
+ unsigned i;
+
+ mpz_init (a);
+ mpz_init (b);
+ mpz_init (t);
+ mpz_init (bs);
+
+ /* Generate primes */
+ for (i = 0; i < PRIME_N; i++)
+ {
+ mp_size_t size;
+ mpz_init (prime[i]);
+ mpz_urandomb (bs, rands, 32);
+ size = mpz_get_ui (bs) % PRIME_MAX_SIZE + 2;
+ mpz_rrandomb (prime[i], rands, size);
+ if (mpz_cmp_ui (prime[i], 3) <= 0)
+ mpz_set_ui (prime[i], 3);
+ else
+ mpz_nextprime (prime[i], prime[i]);
+ }
+
+ for (i = 0; i < PRIME_B_COUNT; i++)
+ {
+ unsigned j, k;
+ mp_bitcnt_t bsize;
+
+ mpz_set_ui (b, 1);
+ bsize = 1;
+
+ for (j = 0; j < PRIME_N && bsize < PRIME_MAX_B_SIZE; j++)
+ {
+ mpz_urandomb (bs, rands, 32);
+ exp[j] = mpz_get_ui (bs) % PRIME_MAX_EXP;
+ mpz_pow_ui (t, prime[j], exp[j]);
+ mpz_mul (b, b, t);
+ bsize = mpz_sizeinbase (b, 2);
+ }
+ for (k = 0; k < PRIME_A_COUNT; k++)
+ {
+ int answer;
+ mpz_rrandomb (a, rands, bsize + 2);
+ answer = ref_jacobi (a, b, j, prime, exp);
+ try_all (a, b, answer);
+ }
+ }
+ for (i = 0; i < PRIME_N; i++)
+ mpz_clear (prime[i]);
+
+ mpz_clear (a);
+ mpz_clear (b);
+ mpz_clear (t);
+ mpz_clear (bs);
+
+#undef PRIME_N
+#undef PRIME_MAX_SIZE
+#undef PRIME_MAX_EXP
+#undef PRIME_A_COUNT
+#undef PRIME_B_COUNT
+#undef PRIME_MAX_B_SIZE
+}
+
+/* These tests compute (a|n), where the quotient sequence includes
+ large quotients, and n has a known factorization. Such inputs are
+ generated as follows. First, construct a large n, as a power of a
+ prime p of moderate size.
+
+ Next, compute a matrix from factors (q,1;1,0), with q chosen with
+ uniformly distributed size. We must stop with matrix elements of
+ roughly half the size of n. Denote elements of M as M = (m00, m01;
+ m10, m11).
+
+ We now look for solutions to
+
+ n = m00 x + m01 y
+ a = m10 x + m11 y
+
+ with x,y > 0. Since n >= m00 * m01, there exists a positive
+ solution to the first equation. Find those x, y, and substitute in
+ the second equation to get a. Then the quotient sequence for (a|n)
+ is precisely the quotients used when constructing M, followed by
+ the quotient sequence for (x|y).
+
+ Numbers should also be large enough that we exercise hgcd_jacobi,
+ which means that they should be larger than
+
+ max (GCD_DC_THRESHOLD, 3 * HGCD_THRESHOLD)
+
+ With an n of roughly 40000 bits, this should hold on most machines.
+*/
+
+void
+check_large_quotients (void)
+{
+#define COUNT 50
+#define PBITS 200
+#define PPOWER 201
+#define MAX_QBITS 500
+
+ gmp_randstate_ptr rands = RANDS;
+
+ mpz_t p, n, q, g, s, t, x, y, bs;
+ mpz_t M[2][2];
+ mp_bitcnt_t nsize;
+ unsigned i;
+
+ mpz_init (p);
+ mpz_init (n);
+ mpz_init (q);
+ mpz_init (g);
+ mpz_init (s);
+ mpz_init (t);
+ mpz_init (x);
+ mpz_init (y);
+ mpz_init (bs);
+ mpz_init (M[0][0]);
+ mpz_init (M[0][1]);
+ mpz_init (M[1][0]);
+ mpz_init (M[1][1]);
+
+ /* First generate a number with known factorization, as a random
+ smallish prime raised to an odd power. Then (a|n) = (a|p). */
+ mpz_rrandomb (p, rands, PBITS);
+ mpz_nextprime (p, p);
+ mpz_pow_ui (n, p, PPOWER);
+
+ nsize = mpz_sizeinbase (n, 2);
+
+ for (i = 0; i < COUNT; i++)
+ {
+ unsigned j;
+ unsigned chain_len;
+ int answer;
+ mp_bitcnt_t msize;
+
+ mpz_set_ui (M[0][0], 1);
+ mpz_set_ui (M[0][1], 0);
+ mpz_set_ui (M[1][0], 0);
+ mpz_set_ui (M[1][1], 1);
+
+ for (msize = 1; 2*(msize + MAX_QBITS) + 1 < nsize ;)
+ {
+ unsigned i;
+ mpz_rrandomb (bs, rands, 32);
+ mpz_rrandomb (q, rands, 1 + mpz_get_ui (bs) % MAX_QBITS);
+
+ /* Multiply by (q, 1; 1,0) from the right */
+ for (i = 0; i < 2; i++)
+ {
+ mp_bitcnt_t size;
+ mpz_swap (M[i][0], M[i][1]);
+ mpz_addmul (M[i][0], M[i][1], q);
+ size = mpz_sizeinbase (M[i][0], 2);
+ if (size > msize)
+ msize = size;
+ }
+ }
+ mpz_gcdext (g, s, t, M[0][0], M[0][1]);
+ ASSERT_ALWAYS (mpz_cmp_ui (g, 1) == 0);
+
+ /* Solve n = M[0][0] * x + M[0][1] * y */
+ if (mpz_sgn (s) > 0)
+ {
+ mpz_mul (x, n, s);
+ mpz_fdiv_qr (q, x, x, M[0][1]);
+ mpz_mul (y, q, M[0][0]);
+ mpz_addmul (y, t, n);
+ ASSERT_ALWAYS (mpz_sgn (y) > 0);
+ }
+ else
+ {
+ mpz_mul (y, n, t);
+ mpz_fdiv_qr (q, y, y, M[0][0]);
+ mpz_mul (x, q, M[0][1]);
+ mpz_addmul (x, s, n);
+ ASSERT_ALWAYS (mpz_sgn (x) > 0);
+ }
+ mpz_mul (x, x, M[1][0]);
+ mpz_addmul (x, y, M[1][1]);
+
+ /* Now (x|n) has the selected large quotients */
+ answer = refmpz_legendre (x, p);
+ try_zi_zi (x, n, answer);
+ }
+ mpz_clear (p);
+ mpz_clear (n);
+ mpz_clear (q);
+ mpz_clear (g);
+ mpz_clear (s);
+ mpz_clear (t);
+ mpz_clear (x);
+ mpz_clear (y);
+ mpz_clear (bs);
+ mpz_clear (M[0][0]);
+ mpz_clear (M[0][1]);
+ mpz_clear (M[1][0]);
+ mpz_clear (M[1][1]);
+#undef COUNT
+#undef PBITS
+#undef PPOWER
+#undef MAX_QBITS
+}
+
int
main (int argc, char *argv[])
{
check_data ();
check_squares_zi ();
check_a_zero ();
-
+ check_jacobi_factored ();
+ check_large_quotients ();
tests_end ();
exit (0);
}
Copyright 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
mpz_init (x);
mpz_init (y);
+ /* Check zeros. */
+ mpz_set_ui (want, 0);
+ mpz_set_ui (x, 1);
+ check_all (want, want, want);
+ check_all (want, want, x);
+ check_all (want, x, want);
+
/* New prime each time. */
mpz_set_ui (want, 1L);
for (i = 0; i < numberof (prime); i++)
Copyright 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
--- /dev/null
+/* Exercise mpz_mfac_uiui.
+
+Copyright 2000, 2001, 2002, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* Usage: t-mfac_uiui [x|num]
+
+ With no arguments testing goes up to the initial value of "limit" below.
+ With a number argument tests are carried that far, or with a literal "x"
+ tests are continued without limit (this being meant only for development
+ purposes). */
+
+#define MULTIFAC_WHEEL (2*3*11)
+#define MULTIFAC_WHEEL2 (5*13)
+
+int
+main (int argc, char *argv[])
+{
+ mpz_t ref[MULTIFAC_WHEEL], ref2[MULTIFAC_WHEEL2], res;
+ unsigned long n, j, m, m2;
+ unsigned long limit = 2222, step = 1;
+
+ tests_start ();
+
+ if (argc > 1 && argv[1][0] == 'x')
+ limit = ULONG_MAX;
+ else if (argc > 1)
+ limit = atoi (argv[1]);
+
+ /* for small limb testing */
+ limit = MIN (limit, MP_LIMB_T_MAX);
+
+ for (m = 0; m < MULTIFAC_WHEEL; m++)
+ mpz_init_set_ui(ref [m],1);
+ for (m2 = 0; m2 < MULTIFAC_WHEEL2; m2++)
+ mpz_init_set_ui(ref2 [m2],1);
+
+ mpz_init (res);
+
+ m = 0;
+ m2 = 0;
+ for (n = 0; n <= limit;)
+ {
+ mpz_mfac_uiui (res, n, MULTIFAC_WHEEL);
+ MPZ_CHECK_FORMAT (res);
+ if (mpz_cmp (ref[m], res) != 0)
+ {
+ printf ("mpz_mfac_uiui(%lu,%d) wrong\n", n, MULTIFAC_WHEEL);
+ printf (" got "); mpz_out_str (stdout, 10, res); printf("\n");
+ printf (" want "); mpz_out_str (stdout, 10, ref[m]); printf("\n");
+ abort ();
+ }
+ mpz_mfac_uiui (res, n, MULTIFAC_WHEEL2);
+ MPZ_CHECK_FORMAT (res);
+ if (mpz_cmp (ref2[m2], res) != 0)
+ {
+ printf ("mpz_mfac_uiui(%lu,%d) wrong\n", n, MULTIFAC_WHEEL2);
+ printf (" got "); mpz_out_str (stdout, 10, res); printf("\n");
+ printf (" want "); mpz_out_str (stdout, 10, ref2[m2]); printf("\n");
+ abort ();
+ }
+ if (n + step <= limit)
+ for (j = 0; j < step; j++) {
+ n++; m++; m2++;
+ if (m >= MULTIFAC_WHEEL) m -= MULTIFAC_WHEEL;
+ if (m2 >= MULTIFAC_WHEEL2) m2 -= MULTIFAC_WHEEL2;
+ mpz_mul_ui (ref[m], ref[m], n); /* Compute a reference, with current library */
+ mpz_mul_ui (ref2[m2], ref2[m2], n); /* Compute a reference, with current library */
+ }
+ else n += step;
+ }
+ mpz_fac_ui (ref[0], n);
+ mpz_mfac_uiui (res, n, 1);
+ MPZ_CHECK_FORMAT (res);
+ if (mpz_cmp (ref[0], res) != 0)
+ {
+ printf ("mpz_mfac_uiui(%lu,1) wrong\n", n);
+ printf (" got "); mpz_out_str (stdout, 10, res); printf("\n");
+ printf (" want "); mpz_out_str (stdout, 10, ref[0]); printf("\n");
+ abort ();
+ }
+
+ mpz_2fac_ui (ref[0], n);
+ mpz_mfac_uiui (res, n, 2);
+ MPZ_CHECK_FORMAT (res);
+ if (mpz_cmp (ref[0], res) != 0)
+ {
+ printf ("mpz_mfac_uiui(%lu,1) wrong\n", n);
+ printf (" got "); mpz_out_str (stdout, 10, res); printf("\n");
+ printf (" want "); mpz_out_str (stdout, 10, ref[0]); printf("\n");
+ abort ();
+ }
+
+ n++;
+ mpz_2fac_ui (ref[0], n);
+ mpz_mfac_uiui (res, n, 2);
+ MPZ_CHECK_FORMAT (res);
+ if (mpz_cmp (ref[0], res) != 0)
+ {
+ printf ("mpz_mfac_uiui(%lu,2) wrong\n", n);
+ printf (" got "); mpz_out_str (stdout, 10, res); printf("\n");
+ printf (" want "); mpz_out_str (stdout, 10, ref[0]); printf("\n");
+ abort ();
+ }
+
+ for (m = 0; m < MULTIFAC_WHEEL; m++)
+ mpz_clear (ref[m]);
+ for (m2 = 0; m2 < MULTIFAC_WHEEL2; m2++)
+ mpz_clear (ref2[m2]);
+ mpz_clear (res);
+
+ tests_end ();
+
+ exit (0);
+}
Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2003, 2004 Free
Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
#include "longlong.h"
#include "tests.h"
-void debug_mp __GMP_PROTO ((mpz_t));
-static void refmpz_mul __GMP_PROTO ((mpz_t, const mpz_t, const mpz_t));
-void dump_abort __GMP_PROTO ((int, char *, mpz_t, mpz_t, mpz_t, mpz_t));
+void debug_mp (mpz_t);
+static void refmpz_mul (mpz_t, const mpz_t, const mpz_t);
+void dump_abort (int, const char *, mpz_t, mpz_t, mpz_t, mpz_t);
#define FFT_MIN_BITSIZE 100000
}
void
-dump_abort (int i, char *s,
+dump_abort (int i, const char *s,
mpz_t op1, mpz_t op2, mpz_t product, mpz_t ref_product)
{
mp_size_t b, e;
Copyright 2001, 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2009 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
}
void
-run (char *start, int reps, char *end, short diffs[])
+run (const char *start, int reps, const char *end, short diffs[])
{
mpz_t x, y;
int i;
Copyright 2000, 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2008, 2009, 2010 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
struct
{
- char *num_as_str;
+ const char *num_as_str;
char want;
} tests[] =
{
check_tests ();
- n_tests = 1000;
+ n_tests = 500;
if (argc == 2)
n_tests = atoi (argv[1]);
check_random (n_tests);
Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2001, 2005 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
printf (" 0x"); mpz_out_str (stdout, 16, n); printf ("\n");
printf (" got %lu\n", got);
printf (" want %lu\n", data[i].want);
- abort();
+ abort ();
}
}
mpz_clear (n);
printf (" 0x"); mpz_out_str (stdout, 16, arg); printf ("\n");
printf (" got %lu\n", got);
printf (" want %lu\n", ref);
- abort();
abort ();
}
}
Copyright 1997, 1999, 2000, 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 1991, 1993, 1994, 1996, 1999, 2000, 2001, 2009, 2012 Free Software
Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
#include "gmp-impl.h"
#include "tests.h"
-void debug_mp __GMP_PROTO ((mpz_t, int));
+void debug_mp (mpz_t, int);
#define SIZEM 13
/* Check that all sizes up to just above MUL_TOOM22_THRESHOLD have been tested
a few times. FIXME: If SIZEM is set too low, this will never happen. */
int
-allsizes_seen (int *allsizes)
+allsizes_seen (unsigned int *allsizes)
{
mp_size_t i;
/* Test mpz_powm_ui, mpz_mul, mpz_mod.
-Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002 Free Software
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2002, 2013 Free Software
Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
#include "gmp-impl.h"
#include "tests.h"
-void dump_abort __GMP_PROTO ((mpz_t, mpz_t));
-void debug_mp __GMP_PROTO ((mpz_t, int));
+void debug_mp (mpz_t, int);
int
main (int argc, char **argv)
mp_size_t base_size, exp_size, mod_size;
unsigned long int exp2;
int i;
- int reps = 1000;
+ int reps = 100;
gmp_randstate_ptr rands;
mpz_t bs;
unsigned long bsi, size_range;
for (i = 0; i < reps; i++)
{
mpz_urandomb (bs, rands, 32);
- size_range = mpz_get_ui (bs) % 13 + 2;
+ size_range = mpz_get_ui (bs) % 18 + 2;
do /* Loop until mathematically well-defined. */
{
exit (0);
}
-void
-dump_abort (mpz_t dividend, mpz_t divisor)
-{
- fprintf (stderr, "ERROR\n");
- fprintf (stderr, "dividend = "); debug_mp (dividend, -16);
- fprintf (stderr, "divisor = "); debug_mp (divisor, -16);
- abort();
-}
-
void
debug_mp (mpz_t x, int base)
{
Copyright 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
--- /dev/null
+/* Exercise mpz_primorial_ui.
+
+Copyright 2000, 2001, 2002, 2012 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+
+/* Usage: t-primorial_ui [x|num]
+
+ With no arguments testing goes up to the initial value of "limit" below.
+ With a number argument tests are carried that far, or with a literal "x"
+ tests are continued without limit (this being meant only for development
+ purposes). */
+
+static int isprime (unsigned long int t);
+
+int
+main (int argc, char *argv[])
+{
+ unsigned long n;
+ unsigned long limit = 2222;
+ mpz_t f, r;
+
+ tests_start ();
+
+ if (argc > 1 && argv[1][0] == 'x')
+ limit = ULONG_MAX;
+ else if (argc > 1)
+ limit = atoi (argv[1]);
+
+ /* for small limb testing */
+ limit = MIN (limit, MP_LIMB_T_MAX);
+
+ mpz_init_set_ui (f, 1); /* 0# = 1 */
+ mpz_init (r);
+
+ for (n = 0; n < limit; n++)
+ {
+ mpz_primorial_ui (r, n);
+ MPZ_CHECK_FORMAT (r);
+
+ if (mpz_cmp (f, r) != 0)
+ {
+ printf ("mpz_primorial_ui(%lu) wrong\n", n);
+ printf (" got "); mpz_out_str (stdout, 10, r); printf("\n");
+ printf (" want "); mpz_out_str (stdout, 10, f); printf("\n");
+ abort ();
+ }
+
+ if (isprime (n+1))
+ mpz_mul_ui (f, f, n+1); /* p# = (p-1)# * (p) */
+ }
+
+ mpz_clear (f);
+ mpz_clear (r);
+
+ tests_end ();
+
+ exit (0);
+}
+
+static int
+isprime (unsigned long int t)
+{
+ unsigned long int q, r, d;
+
+ if (t < 3 || (t & 1) == 0)
+ return t == 2;
+
+ for (d = 3, r = 1; r != 0; d += 2)
+ {
+ q = t / d;
+ r = t - q * d;
+ if (q < d)
+ return 1;
+ }
+ return 0;
+}
--- /dev/null
+/* Test mpz_remove.
+
+Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001, 2009, 2012, 2013
+Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "gmp.h"
+#include "gmp-impl.h"
+#include "tests.h"
+
+void debug_mp (mpz_t);
+unsigned long int mpz_refremove (mpz_t, const mpz_t, const mpz_t);
+
+int
+main (int argc, char **argv)
+{
+ unsigned long int exp;
+ mpz_t t, dest, refdest, dividend, divisor;
+ mp_size_t dividend_size, divisor_size;
+ int i;
+ int reps = 1000;
+ unsigned long int pwr, refpwr;
+ gmp_randstate_ptr rands;
+ mpz_t bs;
+ unsigned long size_range;
+
+ tests_start ();
+ rands = RANDS;
+
+ if (argc == 2)
+ reps = atoi (argv[1]);
+
+ mpz_inits (bs, t, dest, refdest, dividend, divisor, NULL);
+
+ for (i = 0; i < reps; i++)
+ {
+ mpz_urandomb (bs, rands, 32);
+ size_range = mpz_get_ui (bs) % 18 + 1; /* 1..524288 bit operands */
+
+ do
+ {
+ mpz_urandomb (bs, rands, size_range);
+ divisor_size = mpz_get_ui (bs);
+ mpz_rrandomb (divisor, rands, divisor_size);
+ }
+ while (mpz_sgn (divisor) == 0);
+
+ mpz_urandomb (bs, rands, size_range);
+ dividend_size = mpz_get_ui (bs) + divisor_size;
+ mpz_rrandomb (dividend, rands, dividend_size);
+
+ mpz_urandomb (bs, rands, 32);
+ exp = mpz_get_ui (bs) % (5 + 10000 / mpz_sizeinbase (divisor, 2));
+ if (mpz_get_ui (bs) & 2)
+ mpz_neg (divisor, divisor);
+ mpz_pow_ui (t, divisor, exp);
+ mpz_mul (dividend, dividend, t);
+
+ refpwr = mpz_refremove (refdest, dividend, divisor);
+ pwr = mpz_remove (dest, dividend, divisor);
+
+ if (refpwr != pwr || mpz_cmp (refdest, dest) != 0)
+ {
+ fprintf (stderr, "ERROR after %d tests\n", i);
+ fprintf (stderr, "refpower = %lu\n", refpwr);
+ fprintf (stderr, " power = %lu\n", pwr);
+ fprintf (stderr, " op1 = "); debug_mp (dividend);
+ fprintf (stderr, " op2 = "); debug_mp (divisor);
+ fprintf (stderr, "refdest = "); debug_mp (refdest);
+ fprintf (stderr, " dest = "); debug_mp (dest);
+ abort ();
+ }
+ }
+
+ mpz_clears (bs, t, dest, refdest, dividend, divisor, NULL);
+
+ tests_end ();
+ exit (0);
+}
+
+unsigned long int
+mpz_refremove (mpz_t dest, const mpz_t src, const mpz_t f)
+{
+ unsigned long int pwr;
+
+ pwr = 0;
+
+ mpz_set (dest, src);
+ if (mpz_cmpabs_ui (f, 1) > 0)
+ {
+ mpz_t rem, x;
+
+ mpz_init (x);
+ mpz_init (rem);
+
+ for (;; pwr++)
+ {
+ mpz_tdiv_qr (x, rem, dest, f);
+ if (mpz_cmp_ui (rem, 0) != 0)
+ break;
+ mpz_swap (dest, x);
+ }
+
+ mpz_clear (x);
+ mpz_clear (rem);
+ }
+
+ return pwr;
+}
+
+void
+debug_mp (mpz_t x)
+{
+ size_t siz = mpz_sizeinbase (x, 16);
+
+ if (siz > 65)
+ {
+ mpz_t q;
+ mpz_init (q);
+ mpz_tdiv_q_2exp (q, x, 4 * (mpz_sizeinbase (x, 16) - 25));
+ gmp_fprintf (stderr, "%ZX...", q);
+ mpz_tdiv_r_2exp (q, x, 4 * 25);
+ gmp_fprintf (stderr, "%025ZX [%d]\n", q, (int) siz);
+ mpz_clear (q);
+ }
+ else
+ {
+ gmp_fprintf (stderr, "%ZX\n", x);
+ }
+}
Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2009 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
#include "gmp-impl.h"
#include "tests.h"
-void debug_mp __GMP_PROTO ((mpz_t, int));
+void debug_mp (mpz_t, int);
void
check_one (mpz_t root1, mpz_t x2, unsigned long nth, int i)
mpz_add (temp2, temp, rem2);
/* Is power of result > argument? */
- if (mpz_cmp (root1, root2) != 0 || mpz_cmp (x2, temp2) != 0 || mpz_cmp (temp, x2) > 0)
+ if (mpz_cmp (root1, root2) != 0 || mpz_cmp (x2, temp2) != 0 || mpz_cmpabs (temp, x2) > 0)
{
fprintf (stderr, "ERROR after test %d\n", i);
debug_mp (x2, 10);
abort ();
}
- if (nth <= 10000) /* skip too expensive test */
+ if (nth <= 10000 && mpz_sgn(x2) > 0) /* skip too expensive test */
{
mpz_add_ui (temp2, root1, 1L);
mpz_pow_ui (temp2, temp2, nth);
}
check_one (root1, x2, nth, i);
+
+ if (((nth & 1) != 0) && ((bsi & 2) != 0))
+ {
+ mpz_neg (x2, x2);
+ mpz_neg (root1, root1);
+ check_one (root1, x2, nth, i);
+ }
}
mpz_clear (bs);
Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2000, 2001, 2002, 2003, 2006 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
void
-check_one (mpz_srcptr want, int base, const char *str)
+check_one (mpz_srcptr want, int fail, int base, const char *str)
{
mpz_t got;
mpz_init (got);
- if (mpz_set_str (got, str, base) != 0)
+ if (mpz_set_str (got, str, base) != fail)
{
printf ("mpz_set_str unexpectedly failed\n");
printf (" base %d\n", base);
}
MPZ_CHECK_FORMAT (got);
- if (mpz_cmp (got, want) != 0)
+ if (fail == 0 && mpz_cmp (got, want) != 0)
{
printf ("mpz_set_str wrong\n");
printf (" base %d\n", base);
mpz_init (z);
mpz_set_ui (z, 0L);
- check_one (z, 0, "0 ");
- check_one (z, 0, "0 ");
- check_one (z, 10, "0 ");
- check_one (z, 10, "0 ");
- check_one (z, 10, "0000000 ");
+ check_one (z, 0, 0, "0 ");
+ check_one (z, 0, 0, " 0 0 0 ");
+ check_one (z, 0, 0, " -0B 0 ");
+ check_one (z, 0, 0, " 0X 0 ");
+ check_one (z, 0, 10, "0 ");
+ check_one (z, 0, 10, "-0 ");
+ check_one (z, 0, 10, " 0 000 000 ");
mpz_set_ui (z, 123L);
- check_one (z, 0, "123 ");
- check_one (z, 0, "123 ");
- check_one (z, 10, "123 ");
- check_one (z, 10, "123 ");
- check_one (z, 0, " 123 ");
- check_one (z, 0, " 123 ");
- check_one (z, 10, " 0000123 ");
- check_one (z, 10, " 123 ");
+ check_one (z, 0, 0, "123 ");
+ check_one (z, 0, 0, "123 ");
+ check_one (z, 0, 0, "0173 ");
+ check_one (z, 0, 0, " 0b 1 11 10 11 ");
+ check_one (z, 0, 0, " 0x 7b ");
+ check_one (z, 0, 0, "0x7B");
+ check_one (z, 0, 10, "123 ");
+ check_one (z, 0, 10, "123 ");
+ check_one (z, 0, 0, " 123 ");
+ check_one (z, 0, 0, " 123 ");
+ check_one (z, 0, 10, " 0000123 ");
+ check_one (z, 0, 10, " 123 ");
+ check_one (z,-1, 10, "1%");
+ check_one (z,-1, 0, "3!");
+ check_one (z,-1, 0, "0123456789");
+ check_one (z,-1, 0, "13579BDF");
+ check_one (z,-1, 0, "0b0102");
+ check_one (z,-1, 0, "0x010G");
+ check_one (z,-1, 37,"0x010G");
+ check_one (z,-1, 99,"0x010G");
mpz_clear (z);
}
Copyright 2001, 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 1991, 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation,
Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
#include "gmp-impl.h"
#include "tests.h"
-void dump_abort __GMP_PROTO ((mpz_t, mpz_t, mpz_t));
-void debug_mp __GMP_PROTO ((mpz_t, int));
+void dump_abort (mpz_t, mpz_t, mpz_t);
+void debug_mp (mpz_t, int);
int
main (int argc, char **argv)
Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
#include "gmp-impl.h"
#include "tests.h"
-void dump_abort __GMP_PROTO ((mpz_t, mpz_t));
-void debug_mp __GMP_PROTO ((mpz_t, int));
+void dump_abort (mpz_t, mpz_t);
+void debug_mp (mpz_t, int);
int
main (int argc, char **argv)
Copyright 1993, 1994, 1996, 2000, 2001, 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
#include "gmp-impl.h"
#include "tests.h"
-void dump_abort __GMP_PROTO ((char *, mpz_t, unsigned long));
-void debug_mp __GMP_PROTO ((mpz_t, int));
+void dump_abort (const char *, mpz_t, unsigned long);
+void debug_mp (mpz_t, int);
int
main (int argc, char **argv)
}
void
-dump_abort (char *str, mpz_t dividend, unsigned long divisor)
+dump_abort (const char *str, mpz_t dividend, unsigned long divisor)
{
fprintf (stderr, "ERROR: %s\n", str);
fprintf (stderr, "dividend = "); debug_mp (dividend, -16);
# Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
#
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+# Public License for more details.
#
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/.
INCLUDES = -I$(top_srcdir) -I$(top_srcdir)/tests
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
# Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
#
-# This file is part of the GNU MP Library.
+# This file is part of the GNU MP Library test suite.
#
-# The GNU MP Library is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
+# The GNU MP Library test suite is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 3 of the License,
+# or (at your option) any later version.
#
-# The GNU MP Library is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-# License for more details.
+# The GNU MP Library test suite is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+# Public License for more details.
#
-# You should have received a copy of the GNU Lesser General Public License
-# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+# You should have received a copy of the GNU General Public License along with
+# the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/.
VPATH = @srcdir@
+am__make_dryrun = \
+ { \
+ am__dry=no; \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
+ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+ *) \
+ for am__flg in $$MAKEFLAGS; do \
+ case $$am__flg in \
+ *=*|--*) ;; \
+ *n*) am__dry=yes; break;; \
+ esac; \
+ done;; \
+ esac; \
+ test $$am__dry = yes; \
+ }
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
check_PROGRAMS = t-iset$(EXEEXT) t-lc2exp$(EXEEXT) t-mt$(EXEEXT) \
t-rand$(EXEEXT) t-urbui$(EXEEXT) t-urmui$(EXEEXT) \
t-urndmm$(EXEEXT)
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/configure.in
+ $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
am__DEPENDENCIES_1 =
libstat_la_DEPENDENCIES = $(top_builddir)/libgmp.la \
$(am__DEPENDENCIES_1)
-am_libstat_la_OBJECTS = statlib$U.lo zdiv_round$U.lo
+am_libstat_la_OBJECTS = statlib.lo zdiv_round.lo
libstat_la_OBJECTS = $(am_libstat_la_OBJECTS)
findlc_SOURCES = findlc.c
-findlc_OBJECTS = findlc$U.$(OBJEXT)
+findlc_OBJECTS = findlc.$(OBJEXT)
findlc_DEPENDENCIES = libstat.la
gen_SOURCES = gen.c
-gen_OBJECTS = gen$U.$(OBJEXT)
+gen_OBJECTS = gen.$(OBJEXT)
gen_LDADD = $(LDADD)
gen_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
-am_gen_static_OBJECTS = gen$U.$(OBJEXT)
+am_gen_static_OBJECTS = gen.$(OBJEXT)
gen_static_OBJECTS = $(am_gen_static_OBJECTS)
gen_static_LDADD = $(LDADD)
gen_static_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(gen_static_LDFLAGS) $(LDFLAGS) -o $@
spect_SOURCES = spect.c
-spect_OBJECTS = spect$U.$(OBJEXT)
+spect_OBJECTS = spect.$(OBJEXT)
spect_DEPENDENCIES = libstat.la
stat_SOURCES = stat.c
-stat_OBJECTS = stat$U.$(OBJEXT)
+stat_OBJECTS = stat.$(OBJEXT)
stat_DEPENDENCIES = libstat.la
t_iset_SOURCES = t-iset.c
-t_iset_OBJECTS = t-iset$U.$(OBJEXT)
+t_iset_OBJECTS = t-iset.$(OBJEXT)
t_iset_LDADD = $(LDADD)
t_iset_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_lc2exp_SOURCES = t-lc2exp.c
-t_lc2exp_OBJECTS = t-lc2exp$U.$(OBJEXT)
+t_lc2exp_OBJECTS = t-lc2exp.$(OBJEXT)
t_lc2exp_LDADD = $(LDADD)
t_lc2exp_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_mt_SOURCES = t-mt.c
-t_mt_OBJECTS = t-mt$U.$(OBJEXT)
+t_mt_OBJECTS = t-mt.$(OBJEXT)
t_mt_LDADD = $(LDADD)
t_mt_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_rand_SOURCES = t-rand.c
-t_rand_OBJECTS = t-rand$U.$(OBJEXT)
+t_rand_OBJECTS = t-rand.$(OBJEXT)
t_rand_LDADD = $(LDADD)
t_rand_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_urbui_SOURCES = t-urbui.c
-t_urbui_OBJECTS = t-urbui$U.$(OBJEXT)
+t_urbui_OBJECTS = t-urbui.$(OBJEXT)
t_urbui_LDADD = $(LDADD)
t_urbui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_urmui_SOURCES = t-urmui.c
-t_urmui_OBJECTS = t-urmui$U.$(OBJEXT)
+t_urmui_OBJECTS = t-urmui.$(OBJEXT)
t_urmui_LDADD = $(LDADD)
t_urmui_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
t_urndmm_SOURCES = t-urndmm.c
-t_urndmm_OBJECTS = t-urndmm$U.$(OBJEXT)
+t_urndmm_OBJECTS = t-urndmm.$(OBJEXT)
t_urndmm_LDADD = $(LDADD)
t_urndmm_DEPENDENCIES = $(top_builddir)/tests/libtests.la \
$(top_builddir)/libgmp.la
DIST_SOURCES = $(libstat_la_SOURCES) findlc.c gen.c \
$(gen_static_SOURCES) spect.c stat.c t-iset.c t-lc2exp.c \
t-mt.c t-rand.c t-urbui.c t-urmui.c t-urndmm.c
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
ETAGS = etags
CTAGS = ctags
am__tty_colors = \
SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
STRIP = @STRIP@
TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
U_FOR_BUILD = @U_FOR_BUILD@
VERSION = @VERSION@
WITH_READLINE_01 = @WITH_READLINE_01@
mkdir_p = @mkdir_p@
mpn_objects = @mpn_objects@
mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
-libstat.la: $(libstat_la_OBJECTS) $(libstat_la_DEPENDENCIES)
+libstat.la: $(libstat_la_OBJECTS) $(libstat_la_DEPENDENCIES) $(EXTRA_libstat_la_DEPENDENCIES)
$(LINK) $(libstat_la_OBJECTS) $(libstat_la_LIBADD) $(LIBS)
clean-checkPROGRAMS:
list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
echo " rm -f" $$list; \
rm -f $$list
-findlc$(EXEEXT): $(findlc_OBJECTS) $(findlc_DEPENDENCIES)
+findlc$(EXEEXT): $(findlc_OBJECTS) $(findlc_DEPENDENCIES) $(EXTRA_findlc_DEPENDENCIES)
@rm -f findlc$(EXEEXT)
$(LINK) $(findlc_OBJECTS) $(findlc_LDADD) $(LIBS)
-gen$(EXEEXT): $(gen_OBJECTS) $(gen_DEPENDENCIES)
+gen$(EXEEXT): $(gen_OBJECTS) $(gen_DEPENDENCIES) $(EXTRA_gen_DEPENDENCIES)
@rm -f gen$(EXEEXT)
$(LINK) $(gen_OBJECTS) $(gen_LDADD) $(LIBS)
-gen.static$(EXEEXT): $(gen_static_OBJECTS) $(gen_static_DEPENDENCIES)
+gen.static$(EXEEXT): $(gen_static_OBJECTS) $(gen_static_DEPENDENCIES) $(EXTRA_gen_static_DEPENDENCIES)
@rm -f gen.static$(EXEEXT)
$(gen_static_LINK) $(gen_static_OBJECTS) $(gen_static_LDADD) $(LIBS)
-spect$(EXEEXT): $(spect_OBJECTS) $(spect_DEPENDENCIES)
+spect$(EXEEXT): $(spect_OBJECTS) $(spect_DEPENDENCIES) $(EXTRA_spect_DEPENDENCIES)
@rm -f spect$(EXEEXT)
$(LINK) $(spect_OBJECTS) $(spect_LDADD) $(LIBS)
-stat$(EXEEXT): $(stat_OBJECTS) $(stat_DEPENDENCIES)
+stat$(EXEEXT): $(stat_OBJECTS) $(stat_DEPENDENCIES) $(EXTRA_stat_DEPENDENCIES)
@rm -f stat$(EXEEXT)
$(LINK) $(stat_OBJECTS) $(stat_LDADD) $(LIBS)
-t-iset$(EXEEXT): $(t_iset_OBJECTS) $(t_iset_DEPENDENCIES)
+t-iset$(EXEEXT): $(t_iset_OBJECTS) $(t_iset_DEPENDENCIES) $(EXTRA_t_iset_DEPENDENCIES)
@rm -f t-iset$(EXEEXT)
$(LINK) $(t_iset_OBJECTS) $(t_iset_LDADD) $(LIBS)
-t-lc2exp$(EXEEXT): $(t_lc2exp_OBJECTS) $(t_lc2exp_DEPENDENCIES)
+t-lc2exp$(EXEEXT): $(t_lc2exp_OBJECTS) $(t_lc2exp_DEPENDENCIES) $(EXTRA_t_lc2exp_DEPENDENCIES)
@rm -f t-lc2exp$(EXEEXT)
$(LINK) $(t_lc2exp_OBJECTS) $(t_lc2exp_LDADD) $(LIBS)
-t-mt$(EXEEXT): $(t_mt_OBJECTS) $(t_mt_DEPENDENCIES)
+t-mt$(EXEEXT): $(t_mt_OBJECTS) $(t_mt_DEPENDENCIES) $(EXTRA_t_mt_DEPENDENCIES)
@rm -f t-mt$(EXEEXT)
$(LINK) $(t_mt_OBJECTS) $(t_mt_LDADD) $(LIBS)
-t-rand$(EXEEXT): $(t_rand_OBJECTS) $(t_rand_DEPENDENCIES)
+t-rand$(EXEEXT): $(t_rand_OBJECTS) $(t_rand_DEPENDENCIES) $(EXTRA_t_rand_DEPENDENCIES)
@rm -f t-rand$(EXEEXT)
$(LINK) $(t_rand_OBJECTS) $(t_rand_LDADD) $(LIBS)
-t-urbui$(EXEEXT): $(t_urbui_OBJECTS) $(t_urbui_DEPENDENCIES)
+t-urbui$(EXEEXT): $(t_urbui_OBJECTS) $(t_urbui_DEPENDENCIES) $(EXTRA_t_urbui_DEPENDENCIES)
@rm -f t-urbui$(EXEEXT)
$(LINK) $(t_urbui_OBJECTS) $(t_urbui_LDADD) $(LIBS)
-t-urmui$(EXEEXT): $(t_urmui_OBJECTS) $(t_urmui_DEPENDENCIES)
+t-urmui$(EXEEXT): $(t_urmui_OBJECTS) $(t_urmui_DEPENDENCIES) $(EXTRA_t_urmui_DEPENDENCIES)
@rm -f t-urmui$(EXEEXT)
$(LINK) $(t_urmui_OBJECTS) $(t_urmui_LDADD) $(LIBS)
-t-urndmm$(EXEEXT): $(t_urndmm_OBJECTS) $(t_urndmm_DEPENDENCIES)
+t-urndmm$(EXEEXT): $(t_urndmm_OBJECTS) $(t_urndmm_DEPENDENCIES) $(EXTRA_t_urndmm_DEPENDENCIES)
@rm -f t-urndmm$(EXEEXT)
$(LINK) $(t_urndmm_OBJECTS) $(t_urndmm_LDADD) $(LIBS)
distclean-compile:
-rm -f *.tab.c
-$(top_builddir)/ansi2knr:
- $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
- -test "$U" = "" || rm -f *_.c
.c.o:
$(COMPILE) -c $<
.c.lo:
$(LTCOMPILE) -c -o $@ $<
-findlc_.c: findlc.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/findlc.c; then echo $(srcdir)/findlc.c; else echo findlc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gen_.c: gen.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gen.c; then echo $(srcdir)/gen.c; else echo gen.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-spect_.c: spect.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/spect.c; then echo $(srcdir)/spect.c; else echo spect.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-stat_.c: stat.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/stat.c; then echo $(srcdir)/stat.c; else echo stat.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-statlib_.c: statlib.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/statlib.c; then echo $(srcdir)/statlib.c; else echo statlib.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-iset_.c: t-iset.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-iset.c; then echo $(srcdir)/t-iset.c; else echo t-iset.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-lc2exp_.c: t-lc2exp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-lc2exp.c; then echo $(srcdir)/t-lc2exp.c; else echo t-lc2exp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-mt_.c: t-mt.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-mt.c; then echo $(srcdir)/t-mt.c; else echo t-mt.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-rand_.c: t-rand.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-rand.c; then echo $(srcdir)/t-rand.c; else echo t-rand.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-urbui_.c: t-urbui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-urbui.c; then echo $(srcdir)/t-urbui.c; else echo t-urbui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-urmui_.c: t-urmui.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-urmui.c; then echo $(srcdir)/t-urmui.c; else echo t-urmui.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-t-urndmm_.c: t-urndmm.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/t-urndmm.c; then echo $(srcdir)/t-urndmm.c; else echo t-urndmm.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-zdiv_round_.c: zdiv_round.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/zdiv_round.c; then echo $(srcdir)/zdiv_round.c; else echo zdiv_round.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-findlc_.$(OBJEXT) findlc_.lo gen_.$(OBJEXT) gen_.lo spect_.$(OBJEXT) \
-spect_.lo stat_.$(OBJEXT) stat_.lo statlib_.$(OBJEXT) statlib_.lo \
-t-iset_.$(OBJEXT) t-iset_.lo t-lc2exp_.$(OBJEXT) t-lc2exp_.lo \
-t-mt_.$(OBJEXT) t-mt_.lo t-rand_.$(OBJEXT) t-rand_.lo \
-t-urbui_.$(OBJEXT) t-urbui_.lo t-urmui_.$(OBJEXT) t-urmui_.lo \
-t-urndmm_.$(OBJEXT) t-urndmm_.lo zdiv_round_.$(OBJEXT) zdiv_round_.lo \
-: $(ANSI2KNR)
mostlyclean-libtool:
-rm -f *.lo
fi; \
dashes=`echo "$$dashes" | sed s/./=/g`; \
if test "$$failed" -eq 0; then \
- echo "$$grn$$dashes"; \
+ col="$$grn"; \
else \
- echo "$$red$$dashes"; \
+ col="$$red"; \
fi; \
- echo "$$banner"; \
- test -z "$$skipped" || echo "$$skipped"; \
- test -z "$$report" || echo "$$report"; \
- echo "$$dashes$$std"; \
+ echo "$${col}$$dashes$${std}"; \
+ echo "$${col}$$banner$${std}"; \
+ test -z "$$skipped" || echo "$${col}$$skipped$${std}"; \
+ test -z "$$report" || echo "$${col}$$report$${std}"; \
+ echo "$${col}$$dashes$${std}"; \
test "$$failed" -eq 0; \
else :; fi
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
mostlyclean: mostlyclean-am
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
uninstall-am:
-.MAKE: $(top_builddir)/ansi2knr check-am install-am install-strip
+.MAKE: check-am install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \
clean-checkPROGRAMS clean-generic clean-libtool ctags \
install-ps install-ps-am install-strip installcheck \
installcheck-am installdirs maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
- mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
- pdf-am ps ps-am tags uninstall uninstall-am
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags uninstall uninstall-am
allprogs: $(EXTRA_PROGRAMS)
/*
Copyright 2000 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 1999, 2000, 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
/* Examples:
/*
Copyright 1999 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
/* This file requires the following header files: gmp.h */
/*
Copyright 1999 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
/* T is upper dimension. Z_A is the LC multiplier, which is
relatively prime to Z_M, the LC modulus. The result is put in
/*
Copyright 1999, 2000 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
/* Examples:
/*
Copyright 1999, 2000 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
/* The theories for these functions are taken from D. Knuth's "The Art
of Computer Programming: Volume 2, Seminumerical Algorithms", Third
Copyright 2003 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2002, 2011 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include "gmp.h"
/*
Copyright 2000, 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdlib.h>
#include <stdio.h>
/* These were generated by this very program. Do not edit! */
/* Integers. */
-char *z1[ENTS] = {"0", "1", "1", "1", "1", "0", "1", "1", "1", "1"};
-char *z2[ENTS] = {"0", "3", "1", "3", "3", "0", "3", "3", "3", "1"};
-char *z3[ENTS] = {"4", "3", "1", "7", "3", "0", "3", "3", "3", "1"};
-char *z4[ENTS] = {"c", "3", "1", "f", "b", "8", "3", "3", "3", "1"};
-char *z5[ENTS] = {"1c", "13", "11", "1f", "b", "18", "3", "13", "3", "1"};
+const char *z1[ENTS] = {"0", "1", "1", "1", "1", "0", "1", "1", "1", "1"};
+const char *z2[ENTS] = {"0", "3", "1", "3", "3", "0", "3", "3", "3", "1"};
+const char *z3[ENTS] = {"4", "3", "1", "7", "3", "0", "3", "3", "3", "1"};
+const char *z4[ENTS] = {"c", "3", "1", "f", "b", "8", "3", "3", "3", "1"};
+const char *z5[ENTS] = {"1c", "13", "11", "1f", "b", "18", "3", "13", "3", "1"};
-char *z10[ENTS] = {"29c", "213", "f1", "17f", "12b", "178", "383", "d3", "3a3", "281"};
+const char *z10[ENTS] = {"29c", "213", "f1", "17f", "12b", "178", "383", "d3", "3a3", "281"};
-char *z15[ENTS] = {"29c", "1a13", "74f1", "257f", "592b", "4978", "4783", "7cd3", "5ba3", "4681"};
-char *z16[ENTS] = {"29c", "9a13", "74f1", "a57f", "d92b", "4978", "c783", "fcd3", "5ba3", "c681"};
-char *z17[ENTS] = {"51e", "f17a", "54ff", "1a335", "cf65", "5d6f", "583f", "618f", "1bc6", "98ff"};
+const char *z15[ENTS] = {"29c", "1a13", "74f1", "257f", "592b", "4978", "4783", "7cd3", "5ba3", "4681"};
+const char *z16[ENTS] = {"29c", "9a13", "74f1", "a57f", "d92b", "4978", "c783", "fcd3", "5ba3", "c681"};
+const char *z17[ENTS] = {"51e", "f17a", "54ff", "1a335", "cf65", "5d6f", "583f", "618f", "1bc6", "98ff"};
-char *z31[ENTS] = {"3aecd515", "13ae8ec6", "518c8090", "81ca077", "70b7134", "7ee78d71", "323a7636", "2122cb1a", "19811941", "41fd605"};
-char *z32[ENTS] = {"baecd515", "13ae8ec6", "518c8090", "881ca077", "870b7134", "7ee78d71", "323a7636", "a122cb1a", "99811941", "841fd605"};
-char *z33[ENTS] = {"1faf4cca", "15d6ef83b", "9095fe72", "1b6a3dff6", "b17cbddd", "16e5209d4", "6f65b12c", "493bbbc6", "abf2a5d5", "6d491a3c"};
+const char *z31[ENTS] = {"3aecd515", "13ae8ec6", "518c8090", "81ca077", "70b7134", "7ee78d71", "323a7636", "2122cb1a", "19811941", "41fd605"};
+const char *z32[ENTS] = {"baecd515", "13ae8ec6", "518c8090", "881ca077", "870b7134", "7ee78d71", "323a7636", "a122cb1a", "99811941", "841fd605"};
+const char *z33[ENTS] = {"1faf4cca", "15d6ef83b", "9095fe72", "1b6a3dff6", "b17cbddd", "16e5209d4", "6f65b12c", "493bbbc6", "abf2a5d5", "6d491a3c"};
-char *z63[ENTS] = {"48a74f367fa7b5c8", "3ba9e9dc1b263076", "1e0ac84e7678e0fb", "11416581728b3e35", "36ab610523f0f1f7", "3e540e8e95c0eb4b", "439ae16057dbc9d3", "734fb260db243950", "7d3a317effc289bf", "1d80301fb3d1a0d1"};
-char *z64[ENTS] = {"48a74f367fa7b5c8", "bba9e9dc1b263076", "9e0ac84e7678e0fb", "11416581728b3e35", "b6ab610523f0f1f7", "be540e8e95c0eb4b", "439ae16057dbc9d3", "f34fb260db243950", "fd3a317effc289bf", "1d80301fb3d1a0d1"};
-char *z65[ENTS] = {"1ff77710d846d49f0", "1b1411701d709ee10", "31ffa81a208b6af4", "446638d431d3c681", "df5c569d5baa8b55", "197d99ea9bf28e5a0", "191ade09edd94cfae", "194acefa6dde5e18d", "1afc1167c56272d92", "d092994da72f206f"};
+const char *z63[ENTS] = {"48a74f367fa7b5c8", "3ba9e9dc1b263076", "1e0ac84e7678e0fb", "11416581728b3e35", "36ab610523f0f1f7", "3e540e8e95c0eb4b", "439ae16057dbc9d3", "734fb260db243950", "7d3a317effc289bf", "1d80301fb3d1a0d1"};
+const char *z64[ENTS] = {"48a74f367fa7b5c8", "bba9e9dc1b263076", "9e0ac84e7678e0fb", "11416581728b3e35", "b6ab610523f0f1f7", "be540e8e95c0eb4b", "439ae16057dbc9d3", "f34fb260db243950", "fd3a317effc289bf", "1d80301fb3d1a0d1"};
+const char *z65[ENTS] = {"1ff77710d846d49f0", "1b1411701d709ee10", "31ffa81a208b6af4", "446638d431d3c681", "df5c569d5baa8b55", "197d99ea9bf28e5a0", "191ade09edd94cfae", "194acefa6dde5e18d", "1afc1167c56272d92", "d092994da72f206f"};
-char *z127[ENTS] = {"2f66ba932aaf58a071fd8f0742a99a0c", "73cfa3c664c9c1753507ca60ec6b8425", "53ea074ca131dec12cd68b8aa8e20278", "3cf5ac8c343532f8a53cc0eb47581f73", "50c11d5869e208aa1b9aa317b8c2d0a9", "b23163c892876472b1ef19642eace09", "489f4c03d41f87509c8d6c90ce674f95", "2ab8748c96aa6762ea1932b44c9d7164", "98cb5591fc05ad31afbbc1d67b90edd", "77848bb991fd0be331adcf1457fbc672"};
-char *z128[ENTS] = {"af66ba932aaf58a071fd8f0742a99a0c", "73cfa3c664c9c1753507ca60ec6b8425", "53ea074ca131dec12cd68b8aa8e20278", "3cf5ac8c343532f8a53cc0eb47581f73", "50c11d5869e208aa1b9aa317b8c2d0a9", "8b23163c892876472b1ef19642eace09", "489f4c03d41f87509c8d6c90ce674f95", "aab8748c96aa6762ea1932b44c9d7164", "98cb5591fc05ad31afbbc1d67b90edd", "f7848bb991fd0be331adcf1457fbc672"};
+const char *z127[ENTS] = {"2f66ba932aaf58a071fd8f0742a99a0c", "73cfa3c664c9c1753507ca60ec6b8425", "53ea074ca131dec12cd68b8aa8e20278", "3cf5ac8c343532f8a53cc0eb47581f73", "50c11d5869e208aa1b9aa317b8c2d0a9", "b23163c892876472b1ef19642eace09", "489f4c03d41f87509c8d6c90ce674f95", "2ab8748c96aa6762ea1932b44c9d7164", "98cb5591fc05ad31afbbc1d67b90edd", "77848bb991fd0be331adcf1457fbc672"};
+const char *z128[ENTS] = {"af66ba932aaf58a071fd8f0742a99a0c", "73cfa3c664c9c1753507ca60ec6b8425", "53ea074ca131dec12cd68b8aa8e20278", "3cf5ac8c343532f8a53cc0eb47581f73", "50c11d5869e208aa1b9aa317b8c2d0a9", "8b23163c892876472b1ef19642eace09", "489f4c03d41f87509c8d6c90ce674f95", "aab8748c96aa6762ea1932b44c9d7164", "98cb5591fc05ad31afbbc1d67b90edd", "f7848bb991fd0be331adcf1457fbc672"};
/* Floats. */
-char *f1[ENTS] = {"0.@0", "0.8@0", "0.8@0", "0.8@0", "0.8@0", "0.@0", "0.8@0", "0.8@0", "0.8@0", "0.8@0"};
-char *f2[ENTS] = {"0.@0", "0.c@0", "0.4@0", "0.c@0", "0.c@0", "0.@0", "0.c@0", "0.c@0", "0.c@0", "0.4@0"};
-char *f3[ENTS] = {"0.8@0", "0.6@0", "0.2@0", "0.e@0", "0.6@0", "0.@0", "0.6@0", "0.6@0", "0.6@0", "0.2@0"};
-char *f4[ENTS] = {"0.c@0", "0.3@0", "0.1@0", "0.f@0", "0.b@0", "0.8@0", "0.3@0", "0.3@0", "0.3@0", "0.1@0"};
-char *f5[ENTS] = {"0.e@0", "0.98@0", "0.88@0", "0.f8@0", "0.58@0", "0.c@0", "0.18@0", "0.98@0", "0.18@0", "0.8@-1"};
+const char *f1[ENTS] = {"0.@0", "0.8@0", "0.8@0", "0.8@0", "0.8@0", "0.@0", "0.8@0", "0.8@0", "0.8@0", "0.8@0"};
+const char *f2[ENTS] = {"0.@0", "0.c@0", "0.4@0", "0.c@0", "0.c@0", "0.@0", "0.c@0", "0.c@0", "0.c@0", "0.4@0"};
+const char *f3[ENTS] = {"0.8@0", "0.6@0", "0.2@0", "0.e@0", "0.6@0", "0.@0", "0.6@0", "0.6@0", "0.6@0", "0.2@0"};
+const char *f4[ENTS] = {"0.c@0", "0.3@0", "0.1@0", "0.f@0", "0.b@0", "0.8@0", "0.3@0", "0.3@0", "0.3@0", "0.1@0"};
+const char *f5[ENTS] = {"0.e@0", "0.98@0", "0.88@0", "0.f8@0", "0.58@0", "0.c@0", "0.18@0", "0.98@0", "0.18@0", "0.8@-1"};
-char *f10[ENTS] = {"0.a7@0", "0.84c@0", "0.3c4@0", "0.5fc@0", "0.4ac@0", "0.5e@0", "0.e0c@0", "0.34c@0", "0.e8c@0", "0.a04@0"};
+const char *f10[ENTS] = {"0.a7@0", "0.84c@0", "0.3c4@0", "0.5fc@0", "0.4ac@0", "0.5e@0", "0.e0c@0", "0.34c@0", "0.e8c@0", "0.a04@0"};
-char *f15[ENTS] = {"0.538@-1", "0.3426@0", "0.e9e2@0", "0.4afe@0", "0.b256@0", "0.92f@0", "0.8f06@0", "0.f9a6@0", "0.b746@0", "0.8d02@0"};
-char *f16[ENTS] = {"0.29c@-1", "0.9a13@0", "0.74f1@0", "0.a57f@0", "0.d92b@0", "0.4978@0", "0.c783@0", "0.fcd3@0", "0.5ba3@0", "0.c681@0"};
-char *f17[ENTS] = {"0.28f@-1", "0.78bd@0", "0.2a7f8@0", "0.d19a8@0", "0.67b28@0", "0.2eb78@0", "0.2c1f8@0", "0.30c78@0", "0.de3@-1", "0.4c7f8@0"};
+const char *f15[ENTS] = {"0.538@-1", "0.3426@0", "0.e9e2@0", "0.4afe@0", "0.b256@0", "0.92f@0", "0.8f06@0", "0.f9a6@0", "0.b746@0", "0.8d02@0"};
+const char *f16[ENTS] = {"0.29c@-1", "0.9a13@0", "0.74f1@0", "0.a57f@0", "0.d92b@0", "0.4978@0", "0.c783@0", "0.fcd3@0", "0.5ba3@0", "0.c681@0"};
+const char *f17[ENTS] = {"0.28f@-1", "0.78bd@0", "0.2a7f8@0", "0.d19a8@0", "0.67b28@0", "0.2eb78@0", "0.2c1f8@0", "0.30c78@0", "0.de3@-1", "0.4c7f8@0"};
-char *f31[ENTS] = {"0.75d9aa2a@0", "0.275d1d8c@0", "0.a319012@0", "0.103940ee@0", "0.e16e268@-1", "0.fdcf1ae2@0", "0.6474ec6c@0", "0.42459634@0", "0.33023282@0", "0.83fac0a@-1"};
-char *f32[ENTS] = {"0.baecd515@0", "0.13ae8ec6@0", "0.518c809@0", "0.881ca077@0", "0.870b7134@0", "0.7ee78d71@0", "0.323a7636@0", "0.a122cb1a@0", "0.99811941@0", "0.841fd605@0"};
-char *f33[ENTS] = {"0.fd7a665@-1", "0.aeb77c1d8@0", "0.484aff39@0", "0.db51effb@0", "0.58be5eee8@0", "0.b72904ea@0", "0.37b2d896@0", "0.249ddde3@0", "0.55f952ea8@0", "0.36a48d1e@0"};
+const char *f31[ENTS] = {"0.75d9aa2a@0", "0.275d1d8c@0", "0.a319012@0", "0.103940ee@0", "0.e16e268@-1", "0.fdcf1ae2@0", "0.6474ec6c@0", "0.42459634@0", "0.33023282@0", "0.83fac0a@-1"};
+const char *f32[ENTS] = {"0.baecd515@0", "0.13ae8ec6@0", "0.518c809@0", "0.881ca077@0", "0.870b7134@0", "0.7ee78d71@0", "0.323a7636@0", "0.a122cb1a@0", "0.99811941@0", "0.841fd605@0"};
+const char *f33[ENTS] = {"0.fd7a665@-1", "0.aeb77c1d8@0", "0.484aff39@0", "0.db51effb@0", "0.58be5eee8@0", "0.b72904ea@0", "0.37b2d896@0", "0.249ddde3@0", "0.55f952ea8@0", "0.36a48d1e@0"};
-char *f63[ENTS] = {"0.914e9e6cff4f6b9@0", "0.7753d3b8364c60ec@0", "0.3c15909cecf1c1f6@0", "0.2282cb02e5167c6a@0", "0.6d56c20a47e1e3ee@0", "0.7ca81d1d2b81d696@0", "0.8735c2c0afb793a6@0", "0.e69f64c1b64872a@0", "0.fa7462fdff85137e@0", "0.3b00603f67a341a2@0"};
-char *f64[ENTS] = {"0.48a74f367fa7b5c8@0", "0.bba9e9dc1b263076@0", "0.9e0ac84e7678e0fb@0", "0.11416581728b3e35@0", "0.b6ab610523f0f1f7@0", "0.be540e8e95c0eb4b@0", "0.439ae16057dbc9d3@0", "0.f34fb260db24395@0", "0.fd3a317effc289bf@0", "0.1d80301fb3d1a0d1@0"};
-char *f65[ENTS] = {"0.ffbbb886c236a4f8@0", "0.d8a08b80eb84f708@0", "0.18ffd40d1045b57a@0", "0.22331c6a18e9e3408@0", "0.6fae2b4eadd545aa8@0", "0.cbeccf54df9472d@0", "0.c8d6f04f6eca67d7@0", "0.ca5677d36ef2f0c68@0", "0.d7e08b3e2b1396c9@0", "0.68494ca6d39790378@0"};
+const char *f63[ENTS] = {"0.914e9e6cff4f6b9@0", "0.7753d3b8364c60ec@0", "0.3c15909cecf1c1f6@0", "0.2282cb02e5167c6a@0", "0.6d56c20a47e1e3ee@0", "0.7ca81d1d2b81d696@0", "0.8735c2c0afb793a6@0", "0.e69f64c1b64872a@0", "0.fa7462fdff85137e@0", "0.3b00603f67a341a2@0"};
+const char *f64[ENTS] = {"0.48a74f367fa7b5c8@0", "0.bba9e9dc1b263076@0", "0.9e0ac84e7678e0fb@0", "0.11416581728b3e35@0", "0.b6ab610523f0f1f7@0", "0.be540e8e95c0eb4b@0", "0.439ae16057dbc9d3@0", "0.f34fb260db24395@0", "0.fd3a317effc289bf@0", "0.1d80301fb3d1a0d1@0"};
+const char *f65[ENTS] = {"0.ffbbb886c236a4f8@0", "0.d8a08b80eb84f708@0", "0.18ffd40d1045b57a@0", "0.22331c6a18e9e3408@0", "0.6fae2b4eadd545aa8@0", "0.cbeccf54df9472d@0", "0.c8d6f04f6eca67d7@0", "0.ca5677d36ef2f0c68@0", "0.d7e08b3e2b1396c9@0", "0.68494ca6d39790378@0"};
-char *f127[ENTS] = {"0.5ecd7526555eb140e3fb1e0e85533418@0", "0.e79f478cc99382ea6a0f94c1d8d7084a@0", "0.a7d40e994263bd8259ad171551c404f@0", "0.79eb5918686a65f14a7981d68eb03ee6@0", "0.a1823ab0d3c411543735462f7185a152@0", "0.16462c791250ec8e563de32c85d59c12@0", "0.913e9807a83f0ea1391ad9219cce9f2a@0", "0.5570e9192d54cec5d4326568993ae2c8@0", "0.13196ab23f80b5a635f7783acf721dba@0", "0.ef09177323fa17c6635b9e28aff78ce4@0"};
-char *f128[ENTS] = {"0.af66ba932aaf58a071fd8f0742a99a0c@0", "0.73cfa3c664c9c1753507ca60ec6b8425@0", "0.53ea074ca131dec12cd68b8aa8e20278@0", "0.3cf5ac8c343532f8a53cc0eb47581f73@0", "0.50c11d5869e208aa1b9aa317b8c2d0a9@0", "0.8b23163c892876472b1ef19642eace09@0", "0.489f4c03d41f87509c8d6c90ce674f95@0", "0.aab8748c96aa6762ea1932b44c9d7164@0", "0.98cb5591fc05ad31afbbc1d67b90edd@-1", "0.f7848bb991fd0be331adcf1457fbc672@0"};
+const char *f127[ENTS] = {"0.5ecd7526555eb140e3fb1e0e85533418@0", "0.e79f478cc99382ea6a0f94c1d8d7084a@0", "0.a7d40e994263bd8259ad171551c404f@0", "0.79eb5918686a65f14a7981d68eb03ee6@0", "0.a1823ab0d3c411543735462f7185a152@0", "0.16462c791250ec8e563de32c85d59c12@0", "0.913e9807a83f0ea1391ad9219cce9f2a@0", "0.5570e9192d54cec5d4326568993ae2c8@0", "0.13196ab23f80b5a635f7783acf721dba@0", "0.ef09177323fa17c6635b9e28aff78ce4@0"};
+const char *f128[ENTS] = {"0.af66ba932aaf58a071fd8f0742a99a0c@0", "0.73cfa3c664c9c1753507ca60ec6b8425@0", "0.53ea074ca131dec12cd68b8aa8e20278@0", "0.3cf5ac8c343532f8a53cc0eb47581f73@0", "0.50c11d5869e208aa1b9aa317b8c2d0a9@0", "0.8b23163c892876472b1ef19642eace09@0", "0.489f4c03d41f87509c8d6c90ce674f95@0", "0.aab8748c96aa6762ea1932b44c9d7164@0", "0.98cb5591fc05ad31afbbc1d67b90edd@-1", "0.f7848bb991fd0be331adcf1457fbc672@0"};
struct rt
{
- char **s;
+ const char **s;
int nbits;
};
int
-#if __STDC__
main (int argc, char *argv[])
-#else
-main (argc, argv)
- int argc;
- char *argv[];
-#endif
{
static char usage[] = "\
usage: t-rand [function nbits]\n\
Copyright 2003, 2005 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2003, 2005 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include "gmp.h"
/*
Copyright 1999 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include "gmp.h"
Copyright 1996, 2001, 2004, 2005 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
of the normal gmp code. Speed isn't a consideration.
Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
-2007, 2008, 2009, 2012 Free Software Foundation, Inc.
+2007, 2008, 2009, 2011, 2012, 2013 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
/* Most routines have assertions representing what the mpn routines are
byte_overlap_p (const void *v_xp, mp_size_t xsize,
const void *v_yp, mp_size_t ysize)
{
- const char *xp = v_xp;
- const char *yp = v_yp;
+ const char *xp = (const char *) v_xp;
+ const char *yp = (const char *) v_yp;
ASSERT (xsize >= 0);
ASSERT (ysize >= 0);
#define AORS_1(operation) \
{ \
- mp_limb_t i; \
+ mp_size_t i; \
\
ASSERT (refmpn_overlap_fullonly_p (rp, sp, size)); \
ASSERT (size >= 1); \
return refmpn_sub_nc (rp, s1p, s2p, size, CNST_LIMB(0));
}
+mp_limb_t
+refmpn_addcnd_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, mp_limb_t cnd)
+{
+ if (cnd != 0)
+ return refmpn_add_n (rp, s1p, s2p, size);
+ else
+ {
+ refmpn_copyi (rp, s1p, size);
+ return 0;
+ }
+}
+mp_limb_t
+refmpn_subcnd_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p, mp_size_t size, mp_limb_t cnd)
+{
+ if (cnd != 0)
+ return refmpn_sub_n (rp, s1p, s2p, size);
+ else
+ {
+ refmpn_copyi (rp, s1p, size);
+ return 0;
+ }
+}
+
+
+#define AORS_ERR1_N(operation) \
+ { \
+ mp_size_t i; \
+ mp_limb_t carry2; \
+ \
+ ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size)); \
+ ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size)); \
+ ASSERT (! refmpn_overlap_p (rp, size, yp, size)); \
+ ASSERT (! refmpn_overlap_p (ep, 2, s1p, size)); \
+ ASSERT (! refmpn_overlap_p (ep, 2, s2p, size)); \
+ ASSERT (! refmpn_overlap_p (ep, 2, yp, size)); \
+ ASSERT (! refmpn_overlap_p (ep, 2, rp, size)); \
+ \
+ ASSERT (carry == 0 || carry == 1); \
+ ASSERT (size >= 1); \
+ ASSERT_MPN (s1p, size); \
+ ASSERT_MPN (s2p, size); \
+ ASSERT_MPN (yp, size); \
+ \
+ ep[0] = ep[1] = CNST_LIMB(0); \
+ \
+ for (i = 0; i < size; i++) \
+ { \
+ carry = operation (&rp[i], s1p[i], s2p[i], carry); \
+ if (carry == 1) \
+ { \
+ carry2 = ref_addc_limb (&ep[0], ep[0], yp[size - 1 - i]); \
+ carry2 = ref_addc_limb (&ep[1], ep[1], carry2); \
+ ASSERT (carry2 == 0); \
+ } \
+ } \
+ return carry; \
+ }
+
+mp_limb_t
+refmpn_add_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
+ mp_ptr ep, mp_srcptr yp,
+ mp_size_t size, mp_limb_t carry)
+{
+ AORS_ERR1_N (adc);
+}
+mp_limb_t
+refmpn_sub_err1_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
+ mp_ptr ep, mp_srcptr yp,
+ mp_size_t size, mp_limb_t carry)
+{
+ AORS_ERR1_N (sbb);
+}
+
+
+#define AORS_ERR2_N(operation) \
+ { \
+ mp_size_t i; \
+ mp_limb_t carry2; \
+ \
+ ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size)); \
+ ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size)); \
+ ASSERT (! refmpn_overlap_p (rp, size, y1p, size)); \
+ ASSERT (! refmpn_overlap_p (rp, size, y2p, size)); \
+ ASSERT (! refmpn_overlap_p (ep, 4, s1p, size)); \
+ ASSERT (! refmpn_overlap_p (ep, 4, s2p, size)); \
+ ASSERT (! refmpn_overlap_p (ep, 4, y1p, size)); \
+ ASSERT (! refmpn_overlap_p (ep, 4, y2p, size)); \
+ ASSERT (! refmpn_overlap_p (ep, 4, rp, size)); \
+ \
+ ASSERT (carry == 0 || carry == 1); \
+ ASSERT (size >= 1); \
+ ASSERT_MPN (s1p, size); \
+ ASSERT_MPN (s2p, size); \
+ ASSERT_MPN (y1p, size); \
+ ASSERT_MPN (y2p, size); \
+ \
+ ep[0] = ep[1] = CNST_LIMB(0); \
+ ep[2] = ep[3] = CNST_LIMB(0); \
+ \
+ for (i = 0; i < size; i++) \
+ { \
+ carry = operation (&rp[i], s1p[i], s2p[i], carry); \
+ if (carry == 1) \
+ { \
+ carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]); \
+ carry2 = ref_addc_limb (&ep[1], ep[1], carry2); \
+ ASSERT (carry2 == 0); \
+ carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]); \
+ carry2 = ref_addc_limb (&ep[3], ep[3], carry2); \
+ ASSERT (carry2 == 0); \
+ } \
+ } \
+ return carry; \
+ }
+
+mp_limb_t
+refmpn_add_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
+ mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p,
+ mp_size_t size, mp_limb_t carry)
+{
+ AORS_ERR2_N (adc);
+}
+mp_limb_t
+refmpn_sub_err2_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
+ mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p,
+ mp_size_t size, mp_limb_t carry)
+{
+ AORS_ERR2_N (sbb);
+}
+
+
+#define AORS_ERR3_N(operation) \
+ { \
+ mp_size_t i; \
+ mp_limb_t carry2; \
+ \
+ ASSERT (refmpn_overlap_fullonly_p (rp, s1p, size)); \
+ ASSERT (refmpn_overlap_fullonly_p (rp, s2p, size)); \
+ ASSERT (! refmpn_overlap_p (rp, size, y1p, size)); \
+ ASSERT (! refmpn_overlap_p (rp, size, y2p, size)); \
+ ASSERT (! refmpn_overlap_p (rp, size, y3p, size)); \
+ ASSERT (! refmpn_overlap_p (ep, 6, s1p, size)); \
+ ASSERT (! refmpn_overlap_p (ep, 6, s2p, size)); \
+ ASSERT (! refmpn_overlap_p (ep, 6, y1p, size)); \
+ ASSERT (! refmpn_overlap_p (ep, 6, y2p, size)); \
+ ASSERT (! refmpn_overlap_p (ep, 6, y3p, size)); \
+ ASSERT (! refmpn_overlap_p (ep, 6, rp, size)); \
+ \
+ ASSERT (carry == 0 || carry == 1); \
+ ASSERT (size >= 1); \
+ ASSERT_MPN (s1p, size); \
+ ASSERT_MPN (s2p, size); \
+ ASSERT_MPN (y1p, size); \
+ ASSERT_MPN (y2p, size); \
+ ASSERT_MPN (y3p, size); \
+ \
+ ep[0] = ep[1] = CNST_LIMB(0); \
+ ep[2] = ep[3] = CNST_LIMB(0); \
+ ep[4] = ep[5] = CNST_LIMB(0); \
+ \
+ for (i = 0; i < size; i++) \
+ { \
+ carry = operation (&rp[i], s1p[i], s2p[i], carry); \
+ if (carry == 1) \
+ { \
+ carry2 = ref_addc_limb (&ep[0], ep[0], y1p[size - 1 - i]); \
+ carry2 = ref_addc_limb (&ep[1], ep[1], carry2); \
+ ASSERT (carry2 == 0); \
+ carry2 = ref_addc_limb (&ep[2], ep[2], y2p[size - 1 - i]); \
+ carry2 = ref_addc_limb (&ep[3], ep[3], carry2); \
+ ASSERT (carry2 == 0); \
+ carry2 = ref_addc_limb (&ep[4], ep[4], y3p[size - 1 - i]); \
+ carry2 = ref_addc_limb (&ep[5], ep[5], carry2); \
+ ASSERT (carry2 == 0); \
+ } \
+ } \
+ return carry; \
+ }
+
+mp_limb_t
+refmpn_add_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
+ mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p,
+ mp_size_t size, mp_limb_t carry)
+{
+ AORS_ERR3_N (adc);
+}
+mp_limb_t
+refmpn_sub_err3_n (mp_ptr rp, mp_srcptr s1p, mp_srcptr s2p,
+ mp_ptr ep, mp_srcptr y1p, mp_srcptr y2p, mp_srcptr y3p,
+ mp_size_t size, mp_limb_t carry)
+{
+ AORS_ERR3_N (sbb);
+}
+
+
mp_limb_t
refmpn_addlsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
mp_size_t n, unsigned int s)
{
return refmpn_addlsh_n (rp, up, vp, n, 2);
}
+mp_limb_t
+refmpn_addlsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
+{
+ return refmpn_addlsh_n (rp, rp, vp, n, s);
+}
+mp_limb_t
+refmpn_addlsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
+{
+ return refmpn_addlsh_n (rp, rp, vp, n, 1);
+}
+mp_limb_t
+refmpn_addlsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
+{
+ return refmpn_addlsh_n (rp, rp, vp, n, 2);
+}
+mp_limb_t
+refmpn_addlsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
+{
+ return refmpn_addlsh_n (rp, vp, rp, n, s);
+}
+mp_limb_t
+refmpn_addlsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
+{
+ return refmpn_addlsh_n (rp, vp, rp, n, 1);
+}
+mp_limb_t
+refmpn_addlsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
+{
+ return refmpn_addlsh_n (rp, vp, rp, n, 2);
+}
+mp_limb_t
+refmpn_addlsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+ mp_size_t n, unsigned int s, mp_limb_t carry)
+{
+ mp_limb_t cy;
+
+ ASSERT (carry >= 0 && carry <= (CNST_LIMB(1) << s));
+
+ cy = refmpn_addlsh_n (rp, up, vp, n, s);
+ cy += refmpn_add_1 (rp, rp, n, carry);
+ return cy;
+}
+mp_limb_t
+refmpn_addlsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
+{
+ return refmpn_addlsh_nc (rp, up, vp, n, 1, carry);
+}
+mp_limb_t
+refmpn_addlsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
+{
+ return refmpn_addlsh_nc (rp, up, vp, n, 2, carry);
+}
mp_limb_t
refmpn_sublsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
{
return refmpn_sublsh_n (rp, up, vp, n, 1);
}
+mp_limb_t
+refmpn_sublsh2_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+ return refmpn_sublsh_n (rp, up, vp, n, 2);
+}
+mp_limb_t
+refmpn_sublsh_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
+{
+ return refmpn_sublsh_n (rp, rp, vp, n, s);
+}
+mp_limb_t
+refmpn_sublsh1_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
+{
+ return refmpn_sublsh_n (rp, rp, vp, n, 1);
+}
+mp_limb_t
+refmpn_sublsh2_n_ip1 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
+{
+ return refmpn_sublsh_n (rp, rp, vp, n, 2);
+}
+mp_limb_t
+refmpn_sublsh_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n, unsigned int s)
+{
+ return refmpn_sublsh_n (rp, vp, rp, n, s);
+}
+mp_limb_t
+refmpn_sublsh1_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
+{
+ return refmpn_sublsh_n (rp, vp, rp, n, 1);
+}
+mp_limb_t
+refmpn_sublsh2_n_ip2 (mp_ptr rp, mp_srcptr vp, mp_size_t n)
+{
+ return refmpn_sublsh_n (rp, vp, rp, n, 2);
+}
+mp_limb_t
+refmpn_sublsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+ mp_size_t n, unsigned int s, mp_limb_t carry)
+{
+ mp_limb_t cy;
+
+ ASSERT (carry >= 0 && carry <= (CNST_LIMB(1) << s));
+
+ cy = refmpn_sublsh_n (rp, up, vp, n, s);
+ cy += refmpn_sub_1 (rp, rp, n, carry);
+ return cy;
+}
+mp_limb_t
+refmpn_sublsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
+{
+ return refmpn_sublsh_nc (rp, up, vp, n, 1, carry);
+}
+mp_limb_t
+refmpn_sublsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_t carry)
+{
+ return refmpn_sublsh_nc (rp, up, vp, n, 2, carry);
+}
mp_limb_signed_t
refmpn_rsblsh_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
{
return refmpn_rsblsh_n (rp, up, vp, n, 2);
}
+mp_limb_signed_t
+refmpn_rsblsh_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp,
+ mp_size_t n, unsigned int s, mp_limb_signed_t carry)
+{
+ mp_limb_signed_t cy;
+
+ ASSERT (carry == -1 || (carry >> s) == 0);
+
+ cy = refmpn_rsblsh_n (rp, up, vp, n, s);
+ if (carry > 0)
+ cy += refmpn_add_1 (rp, rp, n, carry);
+ else
+ cy -= refmpn_sub_1 (rp, rp, n, -carry);
+ return cy;
+}
+mp_limb_signed_t
+refmpn_rsblsh1_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry)
+{
+ return refmpn_rsblsh_nc (rp, up, vp, n, 1, carry);
+}
+mp_limb_signed_t
+refmpn_rsblsh2_nc (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n, mp_limb_signed_t carry)
+{
+ return refmpn_rsblsh_nc (rp, up, vp, n, 2, carry);
+}
mp_limb_t
refmpn_rsh1add_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
{
return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 4);
}
+mp_limb_t
+refmpn_mul_5 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+ return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 5);
+}
+mp_limb_t
+refmpn_mul_6 (mp_ptr rp, mp_srcptr sp, mp_size_t size, mp_srcptr mult)
+{
+ return refmpn_mul_N (rp, sp, size, mult, (mp_size_t) 6);
+}
#define AORSMUL_1C(operation_n) \
{ \
prodp[usize+i] = refmpn_addmul_1 (prodp+i, up, usize, vp[i]);
}
+
+/* The same as mpn/generic/mulmid_basecase.c, but using refmpn functions. */
+void
+refmpn_mulmid_basecase (mp_ptr rp,
+ mp_srcptr up, mp_size_t un,
+ mp_srcptr vp, mp_size_t vn)
+{
+ mp_limb_t cy;
+ mp_size_t i;
+
+ ASSERT (un >= vn);
+ ASSERT (vn >= 1);
+ ASSERT (! refmpn_overlap_p (rp, un - vn + 3, up, un));
+ ASSERT (! refmpn_overlap_p (rp, un - vn + 3, vp, vn));
+ ASSERT_MPN (up, un);
+ ASSERT_MPN (vp, vn);
+
+ rp[un - vn + 1] = refmpn_mul_1 (rp, up + vn - 1, un - vn + 1, vp[0]);
+ rp[un - vn + 2] = CNST_LIMB (0);
+ for (i = 1; i < vn; i++)
+ {
+ cy = refmpn_addmul_1 (rp, up + vn - i - 1, un - vn + 1, vp[i]);
+ cy = ref_addc_limb (&rp[un - vn + 1], rp[un - vn + 1], cy);
+ cy = ref_addc_limb (&rp[un - vn + 2], rp[un - vn + 2], cy);
+ ASSERT (cy == 0);
+ }
+}
+
+void
+refmpn_toom42_mulmid (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n,
+ mp_ptr scratch)
+{
+ refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n);
+}
+
+void
+refmpn_mulmid_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+{
+ /* FIXME: this could be made faster by using refmpn_mul and then subtracting
+ off products near the middle product region boundary */
+ refmpn_mulmid_basecase (rp, up, 2*n - 1, vp, n);
+}
+
+void
+refmpn_mulmid (mp_ptr rp, mp_srcptr up, mp_size_t un,
+ mp_srcptr vp, mp_size_t vn)
+{
+ /* FIXME: this could be made faster by using refmpn_mul and then subtracting
+ off products near the middle product region boundary */
+ refmpn_mulmid_basecase (rp, up, un, vp, vn);
+}
+
+
+
#define TOOM3_THRESHOLD (MAX (MUL_TOOM33_THRESHOLD, SQR_TOOM3_THRESHOLD))
#define TOOM4_THRESHOLD (MAX (MUL_TOOM44_THRESHOLD, SQR_TOOM4_THRESHOLD))
+#define TOOM6_THRESHOLD (MAX (MUL_TOOM6H_THRESHOLD, SQR_TOOM6_THRESHOLD))
#if WANT_FFT
#define FFT_THRESHOLD (MAX (MUL_FFT_THRESHOLD, SQR_FFT_THRESHOLD))
#else
{
mp_ptr tp;
mp_size_t tn;
- mp_limb_t cy;
if (vn < TOOM3_THRESHOLD)
{
- /* In the mpn_mul_basecase and mpn_kara_mul_n range, use our own
- mul_basecase. */
+ /* In the mpn_mul_basecase and toom2 range, use our own mul_basecase. */
if (vn != 0)
refmpn_mul_basecase (wp, up, un, vp, vn);
else
if (vn < TOOM4_THRESHOLD)
{
- /* In the mpn_toom33_mul range, use mpn_toom22_mul. */
+ /* In the toom3 range, use mpn_toom22_mul. */
tn = 2 * vn + mpn_toom22_mul_itch (vn, vn);
tp = refmpn_malloc_limbs (tn);
mpn_toom22_mul (tp, up, vn, vp, vn, tp + 2 * vn);
}
- else if (vn < FFT_THRESHOLD)
+ else if (vn < TOOM6_THRESHOLD)
{
- /* In the mpn_toom44_mul range, use mpn_toom33_mul. */
+ /* In the toom4 range, use mpn_toom33_mul. */
tn = 2 * vn + mpn_toom33_mul_itch (vn, vn);
tp = refmpn_malloc_limbs (tn);
mpn_toom33_mul (tp, up, vn, vp, vn, tp + 2 * vn);
}
- else
+ else if (vn < FFT_THRESHOLD)
{
- /* Finally, for the largest operands, use mpn_toom44_mul. */
+ /* In the toom6 range, use mpn_toom44_mul. */
tn = 2 * vn + mpn_toom44_mul_itch (vn, vn);
tp = refmpn_malloc_limbs (tn);
mpn_toom44_mul (tp, up, vn, vp, vn, tp + 2 * vn);
}
+ else
+ {
+ /* Finally, for the largest operands, use mpn_toom6h_mul. */
+ tn = 2 * vn + mpn_toom6h_mul_itch (vn, vn);
+ tp = refmpn_malloc_limbs (tn);
+ mpn_toom6h_mul (tp, up, vn, vp, vn, tp + 2 * vn);
+ }
if (un != vn)
{
refmpn_mul (wp + vn, up + vn, un - vn, vp, vn);
MPN_COPY (wp, tp, vn);
- cy = refmpn_add (wp + vn, wp + vn, un, tp + vn, vn);
+ ASSERT_NOCARRY (refmpn_add (wp + vn, wp + vn, un, tp + vn, vn));
}
else
{
}
}
-void
+mp_limb_t
refmpn_redc_1 (mp_ptr rp, mp_ptr up, mp_srcptr mp, mp_size_t n, mp_limb_t invm)
{
mp_size_t j;
for (j = n - 1; j >= 0; j--)
{
- up[0] = mpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
+ up[0] = refmpn_addmul_1 (up, mp, n, (up[0] * invm) & GMP_NUMB_MASK);
up++;
}
cy = mpn_add_n (rp, up, up - n, n);
- if (cy != 0)
- mpn_sub_n (rp, rp, mp, n);
+ return cy;
}
size_t
Copyright 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include "gmp.h"
#include "gmp-impl.h"
Copyright 1997, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
/* always do assertion checking */
#define WANT_ASSERT 1
int
refmpz_jacobi (mpz_srcptr a, mpz_srcptr b)
{
- mpz_t b_odd;
- mpz_init_set (b_odd, b);
- if (mpz_sgn (b_odd) != 0)
- mpz_fdiv_q_2exp (b_odd, b_odd, mpz_scan1 (b_odd, 0L));
- return refmpz_kronecker (a, b_odd);
+ ASSERT_ALWAYS (mpz_sgn (b) > 0);
+ ASSERT_ALWAYS (mpz_odd_p (b));
+
+ return refmpz_kronecker (a, b);
}
+/* Legendre symbol via powm. p must be an odd prime. */
int
-refmpz_legendre (mpz_srcptr a, mpz_srcptr b)
+refmpz_legendre (mpz_srcptr a, mpz_srcptr p)
{
- return refmpz_jacobi (a, b);
+ int res;
+
+ mpz_t r;
+ mpz_t e;
+
+ ASSERT_ALWAYS (mpz_sgn (p) > 0);
+ ASSERT_ALWAYS (mpz_odd_p (p));
+
+ mpz_init (r);
+ mpz_init (e);
+
+ mpz_fdiv_r (r, a, p);
+
+ mpz_set (e, p);
+ mpz_sub_ui (e, e, 1);
+ mpz_fdiv_q_2exp (e, e, 1);
+ mpz_powm (r, r, e, p);
+
+ /* Normalize to a more or less symmetric range around zero */
+ if (mpz_cmp (r, e) > 0)
+ mpz_sub (r, r, p);
+
+ ASSERT_ALWAYS (mpz_cmpabs_ui (r, 1) <= 0);
+
+ res = mpz_sgn (r);
+
+ mpz_clear (r);
+ mpz_clear (e);
+
+ return res;
}
Copyright 1999, 2000, 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include "config.h"
Copyright 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
#ifdef ULONG_MAX
-char *ulong_max_def = "defined";
+const char *ulong_max_def = "defined";
#else
-char *ulong_max_def = "not defined";
+const char *ulong_max_def = "not defined";
#endif
#ifdef LONG_MAX
-char *long_max_def = "defined";
+const char *long_max_def = "defined";
#else
-char *long_max_def = "not defined";
+const char *long_max_def = "not defined";
#endif
#ifdef UINT_MAX
-char *uint_max_def = "defined";
+const char *uint_max_def = "defined";
#else
-char *uint_max_def = "not defined";
+const char *uint_max_def = "not defined";
#endif
#ifdef INT_MAX
-char *int_max_def = "defined";
+const char *int_max_def = "defined";
#else
-char *int_max_def = "not defined";
+const char *int_max_def = "not defined";
#endif
#ifdef USHRT_MAX
-char *ushrt_max_def = "defined";
+const char *ushrt_max_def = "defined";
#else
-char *ushrt_max_def = "not defined";
+const char *ushrt_max_def = "not defined";
#endif
#ifdef SHRT_MAX
-char *shrt_max_def = "defined";
+const char *shrt_max_def = "defined";
#else
-char *shrt_max_def = "not defined";
+const char *shrt_max_def = "not defined";
#endif
#include "gmp-impl.h"
Copyright 2001, 2002, 2003 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2001 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2004 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
Copyright 2002 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
/* Test popc_limb.
-Copyright 2002 Free Software Foundation, Inc.
+Copyright 2002, 2012 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
}
}
+ src = 0;
+ want = 0;
+ for (i = 0; i < GMP_LIMB_BITS; i++)
+ {
+ src += CNST_LIMB(1) << i;
+ want += 1;
+
+ popc_limb (got, src);
+ if (got != want)
+ {
+ goto error;
+ }
+ }
+
for (i = 0; i < 100; i++)
{
mpn_random2 (&src, (mp_size_t) 1);
Copyright 2004 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include <stdlib.h>
/* Tests support prototypes etc.
-Copyright 2000, 2001, 2002, 2003, 2004, 2008, 2009 Free Software Foundation,
-Inc.
+Copyright 2000, 2001, 2002, 2003, 2004, 2008, 2009, 2010, 2011, 2012 Free
+Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#ifndef __TESTS_H__
#endif
-void tests_start __GMP_PROTO ((void));
-void tests_end __GMP_PROTO ((void));
+void tests_start (void);
+void tests_end (void);
-void tests_memory_start __GMP_PROTO ((void));
-void tests_memory_end __GMP_PROTO ((void));
-void *tests_allocate __GMP_PROTO ((size_t size));
-void *tests_reallocate __GMP_PROTO ((void *ptr, size_t old_size, size_t new_size));
-void tests_free __GMP_PROTO ((void *ptr, size_t size));
-void tests_free_nosize __GMP_PROTO ((void *ptr));
-int tests_memory_valid __GMP_PROTO ((void *ptr));
+void tests_memory_start (void);
+void tests_memory_end (void);
+void *tests_allocate (size_t);
+void *tests_reallocate (void *, size_t, size_t);
+void tests_free (void *, size_t);
+void tests_free_nosize (void *);
+int tests_memory_valid (void *);
-void tests_rand_start __GMP_PROTO ((void));
-void tests_rand_end __GMP_PROTO ((void));
+void tests_rand_start (void);
+void tests_rand_end (void);
-double tests_infinity_d __GMP_PROTO (());
-int tests_hardware_getround __GMP_PROTO ((void));
-int tests_hardware_setround __GMP_PROTO ((int));
-int tests_isinf __GMP_PROTO ((double));
-int tests_dbl_mant_bits __GMP_PROTO ((void));
+double tests_infinity_d ();
+int tests_hardware_getround (void);
+int tests_hardware_setround (int);
+int tests_isinf (double);
+int tests_dbl_mant_bits (void);
-void x86_fldcw __GMP_PROTO ((unsigned short));
-unsigned short x86_fstcw __GMP_PROTO ((void));
+void x86_fldcw (unsigned short);
+unsigned short x86_fstcw (void);
/* tests_setjmp_sigfpe is like a setjmp, establishing a trap for SIGFPE.
(signal (SIGFPE, tests_sigfpe_handler), \
setjmp (tests_sigfpe_target))
-RETSIGTYPE tests_sigfpe_handler __GMP_PROTO ((int));
-void tests_sigfpe_done __GMP_PROTO ((void));
+RETSIGTYPE tests_sigfpe_handler (int);
+void tests_sigfpe_done (void);
extern jmp_buf tests_sigfpe_target;
#if HAVE_CALLING_CONVENTIONS
-extern mp_limb_t (*calling_conventions_function) __GMP_PROTO ((ANYARGS));
-mp_limb_t calling_conventions __GMP_PROTO ((ANYARGS));
-int calling_conventions_check __GMP_PROTO ((void));
+extern mp_limb_t (*calling_conventions_function) (ANYARGS);
+mp_limb_t calling_conventions (ANYARGS);
+int calling_conventions_check (void);
#define CALLING_CONVENTIONS(function) \
(calling_conventions_function = (function), calling_conventions)
#define CALLING_CONVENTIONS_CHECK() (calling_conventions_check())
extern int mp_trace_base;
-void mp_limb_trace __GMP_PROTO ((const char *, mp_limb_t));
-void mpn_trace __GMP_PROTO ((const char *name, mp_srcptr ptr, mp_size_t size));
-void mpn_tracea __GMP_PROTO ((const char *name, const mp_ptr *a, int count,
- mp_size_t size));
-void mpn_tracen __GMP_PROTO ((const char *name, int num, mp_srcptr ptr,
- mp_size_t size));
-void mpn_trace_file __GMP_PROTO ((const char *filename,
- mp_srcptr ptr, mp_size_t size));
-void mpn_tracea_file __GMP_PROTO ((const char *filename,
- const mp_ptr *a, int count, mp_size_t size));
-void mpf_trace __GMP_PROTO ((const char *name, mpf_srcptr z));
-void mpq_trace __GMP_PROTO ((const char *name, mpq_srcptr q));
-void mpz_trace __GMP_PROTO ((const char *name, mpz_srcptr z));
-void mpz_tracen __GMP_PROTO ((const char *name, int num, mpz_srcptr z));
-void byte_trace __GMP_PROTO ((const char *, const void *, mp_size_t));
-void byte_tracen __GMP_PROTO ((const char *, int, const void *, mp_size_t));
-void d_trace __GMP_PROTO ((const char *, double));
-
-
-void spinner __GMP_PROTO ((void));
+void mp_limb_trace (const char *, mp_limb_t);
+void mpn_trace (const char *, mp_srcptr, mp_size_t);
+void mpn_tracea (const char *, const mp_ptr *, int, mp_size_t);
+void mpn_tracen (const char *, int, mp_srcptr, mp_size_t);
+void mpn_trace_file (const char *, mp_srcptr, mp_size_t);
+void mpn_tracea_file (const char *, const mp_ptr *, int, mp_size_t);
+void mpf_trace (const char *, mpf_srcptr);
+void mpq_trace (const char *, mpq_srcptr);
+void mpz_trace (const char *, mpz_srcptr);
+void mpz_tracen (const char *, int, mpz_srcptr);
+void byte_trace (const char *, const void *, mp_size_t);
+void byte_tracen (const char *, int, const void *, mp_size_t);
+void d_trace (const char *, double);
+
+
+void spinner (void);
extern unsigned long spinner_count;
extern int spinner_wanted;
extern int spinner_tick;
-void *align_pointer __GMP_PROTO ((void *p, size_t align));
-void *__gmp_allocate_func_aligned __GMP_PROTO ((size_t bytes, size_t align));
-void *__gmp_allocate_or_reallocate __GMP_PROTO ((void *ptr,
- size_t oldsize, size_t newsize));
-char *__gmp_allocate_strdup __GMP_PROTO ((const char *s));
-char *strtoupper __GMP_PROTO ((char *s_orig));
-mp_limb_t urandom __GMP_PROTO ((void));
-void call_rand_algs __GMP_PROTO ((void (*func) (const char *, gmp_randstate_t)));
-
-
-void mpf_set_str_or_abort __GMP_PROTO ((mpf_ptr f, const char *str, int base));
-
-
-void mpq_set_str_or_abort __GMP_PROTO ((mpq_ptr q, const char *str, int base));
-
-
-void mpz_erandomb __GMP_PROTO ((mpz_ptr rop, gmp_randstate_t rstate,
- unsigned long nbits));
-void mpz_erandomb_nonzero __GMP_PROTO ((mpz_ptr rop, gmp_randstate_t rstate,
- unsigned long nbits));
-void mpz_errandomb __GMP_PROTO ((mpz_ptr rop, gmp_randstate_t rstate,
- unsigned long nbits));
-void mpz_errandomb_nonzero __GMP_PROTO ((mpz_ptr rop, gmp_randstate_t rstate,
- unsigned long nbits));
-void mpz_init_set_n __GMP_PROTO ((mpz_ptr z, mp_srcptr p, mp_size_t size));
-void mpz_negrandom __GMP_PROTO ((mpz_ptr rop, gmp_randstate_t rstate));
-int mpz_pow2abs_p __GMP_PROTO ((mpz_srcptr z)) __GMP_ATTRIBUTE_PURE;
-void mpz_set_n __GMP_PROTO ((mpz_ptr z, mp_srcptr p, mp_size_t size));
-void mpz_set_str_or_abort __GMP_PROTO ((mpz_ptr z, const char *str, int base));
-
-mp_size_t mpn_diff_highest __GMP_PROTO ((mp_srcptr p1, mp_srcptr p2, mp_size_t n)) __GMP_ATTRIBUTE_PURE;
-mp_size_t mpn_diff_lowest __GMP_PROTO ((mp_srcptr p1, mp_srcptr p2, mp_size_t n)) __GMP_ATTRIBUTE_PURE;
-mp_size_t byte_diff_highest __GMP_PROTO ((const void *p1, const void *p2, mp_size_t size)) __GMP_ATTRIBUTE_PURE;
-mp_size_t byte_diff_lowest __GMP_PROTO ((const void *p1, const void *p2, mp_size_t size)) __GMP_ATTRIBUTE_PURE;
-
-
-mp_limb_t ref_addc_limb __GMP_PROTO ((mp_limb_t *, mp_limb_t, mp_limb_t));
-mp_limb_t ref_bswap_limb __GMP_PROTO ((mp_limb_t src));
-unsigned long ref_popc_limb __GMP_PROTO ((mp_limb_t src));
-mp_limb_t ref_subc_limb __GMP_PROTO ((mp_limb_t *, mp_limb_t, mp_limb_t));
-
-
-void refmpf_add __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
-void refmpf_add_ulp __GMP_PROTO ((mpf_ptr f));
-void refmpf_fill __GMP_PROTO ((mpf_ptr f, mp_size_t size, mp_limb_t value));
-void refmpf_normalize __GMP_PROTO ((mpf_ptr f));
-void refmpf_set_prec_limbs __GMP_PROTO ((mpf_ptr f, unsigned long prec));
-unsigned long refmpf_set_overlap __GMP_PROTO ((mpf_ptr dst, mpf_srcptr src));
-void refmpf_sub __GMP_PROTO ((mpf_ptr, mpf_srcptr, mpf_srcptr));
-int refmpf_validate __GMP_PROTO ((const char *name, mpf_srcptr got, mpf_srcptr want));
-int refmpf_validate_division __GMP_PROTO ((const char *name, mpf_srcptr got,
- mpf_srcptr n, mpf_srcptr d));
-
-
-mp_limb_t refmpn_add __GMP_PROTO ((mp_ptr rp,
- mp_srcptr s1p, mp_size_t s1size,
- mp_srcptr s2p, mp_size_t s2size));
-mp_limb_t refmpn_add_1 __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size,
- mp_limb_t n));
-mp_limb_t refmpn_add_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
- mp_size_t size));
-mp_limb_t refmpn_add_nc __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
- mp_size_t size, mp_limb_t carry));
-mp_limb_t refmpn_addlsh1_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
- mp_size_t size));
-mp_limb_t refmpn_addlsh2_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
- mp_size_t size));
-mp_limb_t refmpn_addlsh_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
- mp_size_t size, unsigned int));
-mp_limb_t refmpn_addmul_1 __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_size_t size,
- mp_limb_t multiplier));
-mp_limb_t refmpn_addmul_1c __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_size_t size,
- mp_limb_t multiplier, mp_limb_t carry));
-mp_limb_t refmpn_addmul_2 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
- mp_size_t size, mp_srcptr mult));
-mp_limb_t refmpn_addmul_3 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
- mp_size_t size, mp_srcptr mult));
-mp_limb_t refmpn_addmul_4 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
- mp_size_t size, mp_srcptr mult));
-mp_limb_t refmpn_addmul_5 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
- mp_size_t size, mp_srcptr mult));
-mp_limb_t refmpn_addmul_6 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
- mp_size_t size, mp_srcptr mult));
-mp_limb_t refmpn_addmul_7 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
- mp_size_t size, mp_srcptr mult));
-mp_limb_t refmpn_addmul_8 __GMP_PROTO ((mp_ptr dst, mp_srcptr src,
- mp_size_t size, mp_srcptr mult));
-
-mp_limb_t refmpn_add_n_sub_n __GMP_PROTO ((mp_ptr r1p, mp_ptr r2p,
- mp_srcptr s1p, mp_srcptr s2p,
- mp_size_t size));
-mp_limb_t refmpn_add_n_sub_nc __GMP_PROTO ((mp_ptr r1p, mp_ptr r2p,
- mp_srcptr s1p, mp_srcptr s2p,
- mp_size_t size, mp_limb_t carry));
-
-void refmpn_and_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
- mp_size_t size));
-void refmpn_andn_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
- mp_size_t size));
-
-mp_limb_t refmpn_big_base __GMP_PROTO ((int));
-
-int refmpn_chars_per_limb __GMP_PROTO ((int));
-void refmpn_clrbit __GMP_PROTO ((mp_ptr, unsigned long));
-int refmpn_cmp __GMP_PROTO ((mp_srcptr s1p, mp_srcptr s2p, mp_size_t size));
-int refmpn_cmp_allowzero __GMP_PROTO ((mp_srcptr, mp_srcptr, mp_size_t));
-int refmpn_cmp_twosizes __GMP_PROTO ((mp_srcptr xp, mp_size_t xsize,
- mp_srcptr yp, mp_size_t ysize));
-
-void refmpn_com __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size));
-void refmpn_copy __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size));
-void refmpn_copyi __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size));
-void refmpn_copyd __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size));
-void refmpn_copy_extend __GMP_PROTO ((mp_ptr wp, mp_size_t wsize, mp_srcptr xp, mp_size_t xsize));
-
-unsigned refmpn_count_leading_zeros __GMP_PROTO ((mp_limb_t x));
-unsigned refmpn_count_trailing_zeros __GMP_PROTO ((mp_limb_t x));
-
-mp_limb_t refmpn_divexact_by3 __GMP_PROTO ((mp_ptr rp, mp_srcptr sp,
- mp_size_t size));
-mp_limb_t refmpn_divexact_by3c __GMP_PROTO ((mp_ptr rp, mp_srcptr sp,
- mp_size_t size, mp_limb_t carry));
-
-mp_limb_t refmpn_divmod_1 __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size,
- mp_limb_t divisor));
-mp_limb_t refmpn_divmod_1c __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size,
- mp_limb_t divisor, mp_limb_t carry));
-mp_limb_t refmpn_divrem_1 __GMP_PROTO ((mp_ptr rp, mp_size_t xsize,
- mp_srcptr sp, mp_size_t size,
- mp_limb_t divisor));
-mp_limb_t refmpn_divrem_1c __GMP_PROTO ((mp_ptr rp, mp_size_t xsize,
- mp_srcptr sp, mp_size_t size,
- mp_limb_t divisor, mp_limb_t carry));
-mp_limb_t refmpn_divrem_2 __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t,
- mp_srcptr));
-
-int refmpn_equal_anynail __GMP_PROTO ((mp_srcptr, mp_srcptr, mp_size_t));
-
-void refmpn_fill __GMP_PROTO ((mp_ptr p, mp_size_t s, mp_limb_t v));
-
-mp_limb_t refmpn_gcd_1 __GMP_PROTO ((mp_srcptr xp, mp_size_t xsize, mp_limb_t y));
-mp_limb_t refmpn_gcd __GMP_PROTO ((mp_ptr gp, mp_ptr xp, mp_size_t xsize,
- mp_ptr yp, mp_size_t ysize));
-
-size_t refmpn_get_str __GMP_PROTO ((unsigned char *, int, mp_ptr, mp_size_t));
-
-unsigned long refmpn_hamdist __GMP_PROTO ((mp_srcptr s1p, mp_srcptr s2p,
- mp_size_t size));
-
-mp_limb_t refmpn_invert_limb __GMP_PROTO ((mp_limb_t d));
-void refmpn_ior_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
- mp_size_t size));
-void refmpn_iorn_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
- mp_size_t size));
-
-mp_limb_t refmpn_lshift __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned));
-mp_limb_t refmpn_lshift_or_copy __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned));
-mp_limb_t refmpn_lshift_or_copy_any __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned));
-mp_limb_t refmpn_lshiftc __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, unsigned));
-void refmpn_com __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
-
-mp_ptr refmpn_malloc_limbs __GMP_PROTO ((mp_size_t size));
-mp_ptr refmpn_malloc_limbs_aligned __GMP_PROTO ((mp_size_t n, size_t m));
-void refmpn_free_limbs __GMP_PROTO ((mp_ptr p));
-mp_limb_t refmpn_msbone __GMP_PROTO ((mp_limb_t x));
-mp_limb_t refmpn_msbone_mask __GMP_PROTO ((mp_limb_t x));
-mp_ptr refmpn_memdup_limbs __GMP_PROTO ((mp_srcptr ptr, mp_size_t size));
-
-mp_limb_t refmpn_mod_1 __GMP_PROTO ((mp_srcptr sp, mp_size_t size,
- mp_limb_t divisor));
-mp_limb_t refmpn_mod_1c __GMP_PROTO ((mp_srcptr sp, mp_size_t size,
- mp_limb_t divisor, mp_limb_t carry));
-mp_limb_t refmpn_mod_34lsub1 __GMP_PROTO ((mp_srcptr p, mp_size_t n));
-
-mp_limb_t refmpn_mul_1 __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_size_t size,
- mp_limb_t multiplier));
-mp_limb_t refmpn_mul_1c __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_size_t size,
- mp_limb_t multiplier, mp_limb_t carry));
-mp_limb_t refmpn_mul_2 __GMP_PROTO ((mp_ptr dst, mp_srcptr src, mp_size_t size,
- mp_srcptr mult));
-mp_limb_t refmpn_mul_3 __GMP_PROTO ((mp_ptr dst, mp_srcptr src, mp_size_t size,
- mp_srcptr mult));
-mp_limb_t refmpn_mul_4 __GMP_PROTO ((mp_ptr dst, mp_srcptr src, mp_size_t size,
- mp_srcptr mult));
-
-void refmpn_mul_basecase __GMP_PROTO ((mp_ptr prodp,
- mp_srcptr up, mp_size_t usize,
- mp_srcptr vp, mp_size_t vsize));
-void refmpn_mullo_n __GMP_PROTO ((mp_ptr prodp,
- mp_srcptr up, mp_srcptr vp, mp_size_t vsize));
-void refmpn_mul_any __GMP_PROTO ((mp_ptr prodp,
- mp_srcptr up, mp_size_t usize,
- mp_srcptr vp, mp_size_t vsize));
-void refmpn_mul_n __GMP_PROTO ((mp_ptr prodp, mp_srcptr up, mp_srcptr vp,
- mp_size_t size));
-void refmpn_mul __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t));
-
-void refmpn_nand_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
- mp_size_t size));
-void refmpn_nior_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
- mp_size_t size));
-mp_limb_t refmpn_neg __GMP_PROTO ((mp_ptr dst, mp_srcptr src, mp_size_t size));
-mp_size_t refmpn_normalize __GMP_PROTO ((mp_srcptr, mp_size_t));
-
-unsigned long refmpn_popcount __GMP_PROTO ((mp_srcptr sp, mp_size_t size));
-mp_limb_t refmpn_preinv_divrem_1 __GMP_PROTO ((mp_ptr rp, mp_size_t xsize,
- mp_srcptr sp, mp_size_t size,
- mp_limb_t divisor,
- mp_limb_t inverse, unsigned shift));
-mp_limb_t refmpn_preinv_mod_1 __GMP_PROTO ((mp_srcptr sp, mp_size_t size,
- mp_limb_t divisor,
- mp_limb_t divisor_inverse));
-
-void refmpn_random __GMP_PROTO ((mp_ptr, mp_size_t));
-void refmpn_random2 __GMP_PROTO ((mp_ptr, mp_size_t));
-mp_limb_t refmpn_random_limb __GMP_PROTO ((void));
-
-mp_limb_t refmpn_rsh1add_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
- mp_size_t size));
-mp_limb_t refmpn_rsh1sub_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
- mp_size_t size));
-mp_limb_t refmpn_rshift __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_size_t size,
- unsigned shift));
-mp_limb_t refmpn_rshift_or_copy __GMP_PROTO ((mp_ptr wp,
- mp_srcptr xp, mp_size_t size,
- unsigned shift));
-mp_limb_t refmpn_rshift_or_copy_any __GMP_PROTO ((mp_ptr wp,
- mp_srcptr xp, mp_size_t size,
- unsigned shift));
-
-mp_limb_t refmpn_sb_div_qr __GMP_PROTO ((mp_ptr,
- mp_ptr, mp_size_t,
- mp_srcptr, mp_size_t));
-unsigned long refmpn_scan0 __GMP_PROTO ((mp_srcptr, unsigned long));
-unsigned long refmpn_scan1 __GMP_PROTO ((mp_srcptr, unsigned long));
-void refmpn_setbit __GMP_PROTO ((mp_ptr, unsigned long));
-void refmpn_sqr __GMP_PROTO ((mp_ptr dst, mp_srcptr src, mp_size_t size));
-mp_size_t refmpn_sqrtrem __GMP_PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t));
-
-void refmpn_sub_ddmmss __GMP_PROTO ((mp_limb_t *, mp_limb_t *,
- mp_limb_t, mp_limb_t,
- mp_limb_t, mp_limb_t));
-mp_limb_t refmpn_sub __GMP_PROTO ((mp_ptr rp,
- mp_srcptr s1p, mp_size_t s1size,
- mp_srcptr s2p, mp_size_t s2size));
-mp_limb_t refmpn_sub_1 __GMP_PROTO ((mp_ptr rp, mp_srcptr sp, mp_size_t size,
- mp_limb_t n));
-mp_limb_t refmpn_sub_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
- mp_size_t size));
-mp_limb_t refmpn_sub_nc __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
- mp_size_t size, mp_limb_t carry));
-mp_limb_t refmpn_sublsh1_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
- mp_size_t size));
-mp_limb_t refmpn_sublsh_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
- mp_size_t size, unsigned int));
-mp_limb_t refmpn_submul_1 __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_size_t size,
- mp_limb_t multiplier));
-mp_limb_t refmpn_submul_1c __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_size_t size,
- mp_limb_t multiplier, mp_limb_t carry));
-
-mp_limb_signed_t refmpn_rsblsh1_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
-mp_limb_signed_t refmpn_rsblsh2_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
-mp_limb_signed_t refmpn_rsblsh_n __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int));
-
-void refmpn_tdiv_qr __GMP_PROTO ((mp_ptr qp, mp_ptr rp, mp_size_t qxn,
- mp_ptr np, mp_size_t nsize,
- mp_srcptr dp, mp_size_t dsize));
-int refmpn_tstbit __GMP_PROTO ((mp_srcptr, unsigned long));
-
-mp_limb_t refmpn_udiv_qrnnd __GMP_PROTO ((mp_limb_t *, mp_limb_t, mp_limb_t, mp_limb_t));
-mp_limb_t refmpn_udiv_qrnnd_r __GMP_PROTO ((mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t *));
-mp_limb_t refmpn_umul_ppmm __GMP_PROTO ((mp_limb_t *, mp_limb_t, mp_limb_t));
-mp_limb_t refmpn_umul_ppmm_r __GMP_PROTO ((mp_limb_t, mp_limb_t, mp_limb_t *));
-
-void refmpn_xnor_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
- mp_size_t size));
-void refmpn_xor_n __GMP_PROTO ((mp_ptr wp, mp_srcptr xp, mp_srcptr yp,
- mp_size_t size));
-
-void refmpn_zero __GMP_PROTO ((mp_ptr p, mp_size_t s));
-void refmpn_zero_extend __GMP_PROTO ((mp_ptr, mp_size_t, mp_size_t));
-int refmpn_zero_p __GMP_PROTO ((mp_srcptr ptr, mp_size_t size));
-
-void refmpn_binvert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
-void refmpn_invert __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
-
-
-void refmpq_add __GMP_PROTO ((mpq_ptr w, mpq_srcptr x, mpq_srcptr y));
-void refmpq_sub __GMP_PROTO ((mpq_ptr w, mpq_srcptr x, mpq_srcptr y));
-
-
-void refmpz_combit __GMP_PROTO ((mpz_ptr r, unsigned long bit));
-unsigned long refmpz_hamdist __GMP_PROTO ((mpz_srcptr x, mpz_srcptr y));
-int refmpz_kronecker __GMP_PROTO ((mpz_srcptr a_orig, mpz_srcptr b_orig));
-int refmpz_jacobi __GMP_PROTO ((mpz_srcptr a_orig, mpz_srcptr b_orig));
-int refmpz_legendre __GMP_PROTO ((mpz_srcptr a_orig, mpz_srcptr b_orig));
-int refmpz_kronecker_si __GMP_PROTO ((mpz_srcptr, long));
-int refmpz_kronecker_ui __GMP_PROTO ((mpz_srcptr, unsigned long));
-int refmpz_si_kronecker __GMP_PROTO ((long, mpz_srcptr));
-int refmpz_ui_kronecker __GMP_PROTO ((unsigned long, mpz_srcptr));
-
-void refmpz_pow_ui __GMP_PROTO ((mpz_ptr w, mpz_srcptr b, unsigned long e));
+void *align_pointer (void *, size_t);
+void *__gmp_allocate_func_aligned (size_t, size_t);
+void *__gmp_allocate_or_reallocate (void *, size_t, size_t);
+char *__gmp_allocate_strdup (const char *);
+char *strtoupper (char *);
+mp_limb_t urandom (void);
+void call_rand_algs (void (*func) (const char *, gmp_randstate_t));
+
+
+void mpf_set_str_or_abort (mpf_ptr, const char *, int);
+
+
+void mpq_set_str_or_abort (mpq_ptr, const char *, int);
+
+
+void mpz_erandomb (mpz_ptr, gmp_randstate_t, unsigned long);
+void mpz_erandomb_nonzero (mpz_ptr, gmp_randstate_t, unsigned long);
+void mpz_errandomb (mpz_ptr, gmp_randstate_t, unsigned long);
+void mpz_errandomb_nonzero (mpz_ptr, gmp_randstate_t, unsigned long);
+void mpz_init_set_n (mpz_ptr, mp_srcptr, mp_size_t);
+void mpz_negrandom (mpz_ptr, gmp_randstate_t);
+int mpz_pow2abs_p (mpz_srcptr) __GMP_ATTRIBUTE_PURE;
+void mpz_set_n (mpz_ptr, mp_srcptr, mp_size_t);
+void mpz_set_str_or_abort (mpz_ptr, const char *, int);
+
+mp_size_t mpn_diff_highest (mp_srcptr, mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
+mp_size_t mpn_diff_lowest (mp_srcptr, mp_srcptr, mp_size_t) __GMP_ATTRIBUTE_PURE;
+mp_size_t byte_diff_highest (const void *, const void *, mp_size_t) __GMP_ATTRIBUTE_PURE;
+mp_size_t byte_diff_lowest (const void *, const void *, mp_size_t) __GMP_ATTRIBUTE_PURE;
+
+
+mp_limb_t ref_addc_limb (mp_limb_t *, mp_limb_t, mp_limb_t);
+mp_limb_t ref_bswap_limb (mp_limb_t);
+unsigned long ref_popc_limb (mp_limb_t);
+mp_limb_t ref_subc_limb (mp_limb_t *, mp_limb_t, mp_limb_t);
+
+
+void refmpf_add (mpf_ptr, mpf_srcptr, mpf_srcptr);
+void refmpf_add_ulp (mpf_ptr );
+void refmpf_fill (mpf_ptr, mp_size_t, mp_limb_t);
+void refmpf_normalize (mpf_ptr);
+void refmpf_set_prec_limbs (mpf_ptr, unsigned long);
+unsigned long refmpf_set_overlap (mpf_ptr, mpf_srcptr);
+void refmpf_sub (mpf_ptr, mpf_srcptr, mpf_srcptr);
+int refmpf_validate (const char *, mpf_srcptr, mpf_srcptr);
+int refmpf_validate_division (const char *, mpf_srcptr, mpf_srcptr, mpf_srcptr);
+
+
+mp_limb_t refmpn_addcnd_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_subcnd_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+mp_limb_t refmpn_add (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_add_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_add_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_add_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_add_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_add_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_add_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_addlsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_addlsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_addlsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned);
+mp_limb_t refmpn_addlsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_addlsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_addlsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned);
+mp_limb_t refmpn_addlsh1_n_ip2 (mp_ptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_addlsh2_n_ip2 (mp_ptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_addlsh_n_ip2 (mp_ptr, mp_srcptr, mp_size_t, unsigned);
+mp_limb_t refmpn_addlsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_addlsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_addlsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned, mp_limb_t);
+mp_limb_t refmpn_addmul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_addmul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+mp_limb_t refmpn_addmul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_addmul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_addmul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_addmul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_addmul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_addmul_7 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_addmul_8 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+
+mp_limb_t refmpn_add_n_sub_n (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_add_n_sub_nc (mp_ptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+void refmpn_and_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void refmpn_andn_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+
+mp_limb_t refmpn_big_base (int);
+
+int refmpn_chars_per_limb (int);
+void refmpn_clrbit (mp_ptr, unsigned long);
+int refmpn_cmp (mp_srcptr, mp_srcptr, mp_size_t);
+int refmpn_cmp_allowzero (mp_srcptr, mp_srcptr, mp_size_t);
+int refmpn_cmp_twosizes (mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+
+void refmpn_com (mp_ptr, mp_srcptr, mp_size_t);
+void refmpn_copy (mp_ptr, mp_srcptr, mp_size_t);
+void refmpn_copyi (mp_ptr, mp_srcptr, mp_size_t);
+void refmpn_copyd (mp_ptr, mp_srcptr, mp_size_t);
+void refmpn_copy_extend (mp_ptr, mp_size_t, mp_srcptr, mp_size_t);
+
+unsigned refmpn_count_leading_zeros (mp_limb_t);
+unsigned refmpn_count_trailing_zeros (mp_limb_t);
+
+mp_limb_t refmpn_divexact_by3 (mp_ptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_divexact_by3c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+
+mp_limb_t refmpn_divmod_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_divmod_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+mp_limb_t refmpn_divrem_1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_divrem_1c (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+mp_limb_t refmpn_divrem_2 (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr);
+
+int refmpn_equal_anynail (mp_srcptr, mp_srcptr, mp_size_t);
+
+void refmpn_fill (mp_ptr, mp_size_t, mp_limb_t);
+
+mp_limb_t refmpn_gcd_1 (mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_gcd (mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
+
+size_t refmpn_get_str (unsigned char *, int, mp_ptr, mp_size_t);
+
+unsigned long refmpn_hamdist (mp_srcptr, mp_srcptr, mp_size_t);
+
+mp_limb_t refmpn_invert_limb (mp_limb_t);
+void refmpn_ior_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void refmpn_iorn_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+
+mp_limb_t refmpn_lshift (mp_ptr, mp_srcptr, mp_size_t, unsigned);
+mp_limb_t refmpn_lshift_or_copy (mp_ptr, mp_srcptr, mp_size_t, unsigned);
+mp_limb_t refmpn_lshift_or_copy_any (mp_ptr, mp_srcptr, mp_size_t, unsigned);
+mp_limb_t refmpn_lshiftc (mp_ptr, mp_srcptr, mp_size_t, unsigned);
+void refmpn_com (mp_ptr, mp_srcptr, mp_size_t);
+
+mp_ptr refmpn_malloc_limbs (mp_size_t);
+mp_ptr refmpn_malloc_limbs_aligned (mp_size_t, size_t);
+void refmpn_free_limbs (mp_ptr);
+mp_limb_t refmpn_msbone (mp_limb_t);
+mp_limb_t refmpn_msbone_mask (mp_limb_t);
+mp_ptr refmpn_memdup_limbs (mp_srcptr, mp_size_t);
+
+mp_limb_t refmpn_mod_1 (mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_mod_1c (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+mp_limb_t refmpn_mod_34lsub1 (mp_srcptr, mp_size_t);
+
+mp_limb_t refmpn_mul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_mul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+mp_limb_t refmpn_mul_2 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_mul_3 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_mul_4 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_mul_5 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+mp_limb_t refmpn_mul_6 (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr);
+
+void refmpn_mul_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+void refmpn_mulmid_basecase (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+void refmpn_toom42_mulmid (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr);
+void refmpn_mulmid_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void refmpn_mulmid (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+void refmpn_mullo_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void refmpn_mul_any (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+void refmpn_mul_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void refmpn_mul (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+
+void refmpn_nand_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void refmpn_nior_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_neg (mp_ptr, mp_srcptr, mp_size_t);
+mp_size_t refmpn_normalize (mp_srcptr, mp_size_t);
+
+unsigned long refmpn_popcount (mp_srcptr, mp_size_t);
+mp_limb_t refmpn_preinv_divrem_1 (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, unsigned);
+mp_limb_t refmpn_preinv_mod_1 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+
+void refmpn_random (mp_ptr, mp_size_t);
+void refmpn_random2 (mp_ptr, mp_size_t);
+mp_limb_t refmpn_random_limb (void);
+
+mp_limb_t refmpn_rsh1add_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_rsh1sub_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_rshift (mp_ptr, mp_srcptr, mp_size_t, unsigned);
+mp_limb_t refmpn_rshift_or_copy (mp_ptr, mp_srcptr, mp_size_t, unsigned);
+mp_limb_t refmpn_rshift_or_copy_any (mp_ptr, mp_srcptr, mp_size_t, unsigned);
+
+mp_limb_t refmpn_sb_div_qr (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t);
+unsigned long refmpn_scan0 (mp_srcptr, unsigned long);
+unsigned long refmpn_scan1 (mp_srcptr, unsigned long);
+void refmpn_setbit (mp_ptr, unsigned long);
+void refmpn_sqr (mp_ptr, mp_srcptr, mp_size_t);
+mp_size_t refmpn_sqrtrem (mp_ptr, mp_ptr, mp_srcptr, mp_size_t);
+
+void refmpn_sub_ddmmss (mp_limb_t *, mp_limb_t *, mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t);
+mp_limb_t refmpn_sub (mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_sub_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_sub_err1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_sub_err2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_sub_err3_n (mp_ptr, mp_srcptr, mp_srcptr, mp_ptr, mp_srcptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_sub_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_sub_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_sublsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_sublsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_sublsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
+mp_limb_t refmpn_sublsh1_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_sublsh2_n_ip1 (mp_ptr, mp_srcptr, mp_size_t);
+mp_limb_t refmpn_sublsh_n_ip1 (mp_ptr, mp_srcptr, mp_size_t, unsigned int);
+mp_limb_t refmpn_sublsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_sublsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_sublsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_t);
+mp_limb_t refmpn_submul_1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t refmpn_submul_1c (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t);
+
+mp_limb_signed_t refmpn_rsblsh1_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_signed_t refmpn_rsblsh2_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+mp_limb_signed_t refmpn_rsblsh_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int);
+mp_limb_signed_t refmpn_rsblsh1_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_signed_t);
+mp_limb_signed_t refmpn_rsblsh2_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_limb_signed_t);
+mp_limb_signed_t refmpn_rsblsh_nc (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, unsigned int, mp_limb_signed_t);
+
+void refmpn_tdiv_qr (mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr, mp_size_t);
+int refmpn_tstbit (mp_srcptr, unsigned long);
+
+mp_limb_t refmpn_udiv_qrnnd (mp_limb_t *, mp_limb_t, mp_limb_t, mp_limb_t);
+mp_limb_t refmpn_udiv_qrnnd_r (mp_limb_t, mp_limb_t, mp_limb_t, mp_limb_t *);
+mp_limb_t refmpn_umul_ppmm (mp_limb_t *, mp_limb_t, mp_limb_t);
+mp_limb_t refmpn_umul_ppmm_r (mp_limb_t, mp_limb_t, mp_limb_t *);
+
+void refmpn_xnor_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void refmpn_xor_n (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+
+void refmpn_zero (mp_ptr, mp_size_t);
+void refmpn_zero_extend (mp_ptr, mp_size_t, mp_size_t);
+int refmpn_zero_p (mp_srcptr, mp_size_t);
+
+void refmpn_binvert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
+void refmpn_invert (mp_ptr, mp_srcptr, mp_size_t, mp_ptr);
+
+
+void refmpq_add (mpq_ptr, mpq_srcptr, mpq_srcptr);
+void refmpq_sub (mpq_ptr, mpq_srcptr, mpq_srcptr);
+
+
+void refmpz_combit (mpz_ptr, unsigned long);
+unsigned long refmpz_hamdist (mpz_srcptr, mpz_srcptr);
+int refmpz_kronecker (mpz_srcptr, mpz_srcptr);
+int refmpz_jacobi (mpz_srcptr, mpz_srcptr);
+int refmpz_legendre (mpz_srcptr, mpz_srcptr);
+int refmpz_kronecker_si (mpz_srcptr, long);
+int refmpz_kronecker_ui (mpz_srcptr, unsigned long);
+int refmpz_si_kronecker (long, mpz_srcptr);
+int refmpz_ui_kronecker (unsigned long, mpz_srcptr);
+
+void refmpz_pow_ui (mpz_ptr, mpz_srcptr, unsigned long);
#if defined (__cplusplus)
full implementation, just enough for our purposes. */
#ifdef __cplusplus
-#if HAVE_SSTREAM
+#if 1 || HAVE_SSTREAM
#include <sstream>
#else /* ! HAVE_SSTREAM */
#include <string>
#define TESTS_REPS(count, argv, argc) \
do { \
char *envval, *end; \
- long repfactor; \
+ double repfactor; \
+ int reps_nondefault = 0; \
if (argc > 1) \
{ \
count = strtol (argv[1], &end, 0); \
} \
argv++; \
argc--; \
+ reps_nondefault = 1; \
} \
envval = getenv ("GMP_CHECK_REPFACTOR"); \
if (envval != NULL) \
{ \
- repfactor = strtol (envval, &end, 0); \
+ repfactor = strtod (envval, &end); \
if (*end || repfactor <= 0) \
{ \
- fprintf (stderr, "Invalid repfactor: %ld.\n", repfactor); \
+ fprintf (stderr, "Invalid repfactor: %f.\n", repfactor); \
exit (1); \
} \
count *= repfactor; \
+ reps_nondefault = 1; \
} \
+ if (reps_nondefault) \
+ printf ("Running test with %ld repetitions (include this in bug reports)\n",\
+ (long) count); \
} while (0)
Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation,
Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
/* Future: Would like commas printed between limbs in hex or binary, but
void
byte_trace (const char *name, const void *ptr, mp_size_t size)
{
- char *fmt;
+ const char *fmt;
mp_size_t i;
mp_trace_start (name);
dnl x86 calling conventions checking.
-dnl Copyright 2000, 2003 Free Software Foundation, Inc.
-dnl
-dnl This file is part of the GNU MP Library.
-dnl
-dnl The GNU MP Library is free software; you can redistribute it and/or
-dnl modify it under the terms of the GNU Lesser General Public License as
+dnl Copyright 2000, 2003, 2010 Free Software Foundation, Inc.
+
+dnl This file is part of the GNU MP Library test suite.
+
+dnl The GNU MP Library test suite is free software; you can redistribute it
+dnl and/or modify it under the terms of the GNU General Public License as
dnl published by the Free Software Foundation; either version 3 of the
dnl License, or (at your option) any later version.
-dnl
-dnl The GNU MP Library is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl Lesser General Public License for more details.
-dnl
-dnl You should have received a copy of the GNU Lesser General Public License
-dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
+dnl The GNU MP Library test suite is distributed in the hope that it will be
+dnl useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+dnl Public License for more details.
+
+dnl You should have received a copy of the GNU General Public License along
+dnl with the GNU MP Library test suite. If not, see
+dnl http://www.gnu.org/licenses/.
+
+
+dnl The current version of the code attempts to keep the call/return
+dnl prediction stack valid, but matching calls and returns.
include(`../config.m4')
C Execute an fldcw, setting the x87 control word to cw.
PROLOGUE(x86_fldcw)
- fldcw 4(%esp)
- ret
+ fldcw 4(%esp)
+ ret
EPILOGUE()
C Execute an fstcw, returning the current x87 control word.
PROLOGUE(x86_fstcw)
- xorl %eax, %eax
- pushl %eax
- fstcw (%esp)
- popl %eax
- ret
+ xorl %eax, %eax
+ pushl %eax
+ fstcw (%esp)
+ popl %eax
+ ret
EPILOGUE()
-dnl Instrumented profiling doesn't come out quite right below, since we
-dnl don't do an actual "ret". There's only a few instructions here, so
-dnl there's no great need to get them separately accounted, just let them
-dnl get attributed to the caller.
+dnl Instrumented profiling doesn't come out quite right below, since we don't
+dnl do an actual "ret". There's only a few instructions here, so there's no
+dnl great need to get them separately accounted, just let them get attributed
+dnl to the caller. FIXME this comment might no longer be true.
ifelse(WANT_PROFILING,instrument,
`define(`WANT_PROFILING',no)')
C Perhaps the finit should be done only if the tags word isn't clear, but
C nothing uses the rounding mode or anything at the moment.
-define(G,
-m4_assert_numargs(1)
-`GSYM_PREFIX`'$1')
+define(`WANT_EBX', eval(4*0)($1))
+define(`WANT_EBP', eval(4*1)($1))
+define(`WANT_ESI', eval(4*2)($1))
+define(`WANT_EDI', eval(4*3)($1))
- .text
- ALIGN(8)
-PROLOGUE(calling_conventions)
- movl (%esp), %eax
- movl %eax, G(calling_conventions_retaddr)
+define(`JUNK_EAX', eval(4*4)($1))
+define(`JUNK_ECX', eval(4*5)($1))
+define(`JUNK_EDX', eval(4*6)($1))
- movl $L(return), (%esp)
+define(`SAVE_EBX', eval(4*7)($1))
+define(`SAVE_EBP', eval(4*8)($1))
+define(`SAVE_ESI', eval(4*9)($1))
+define(`SAVE_EDI', eval(4*10)($1))
- movl %ebx, G(calling_conventions_save_ebx)
- movl %esi, G(calling_conventions_save_esi)
- movl %edi, G(calling_conventions_save_edi)
- movl %ebp, G(calling_conventions_save_ebp)
+define(`RETADDR', eval(4*11)($1))
- movl $0x01234567, %ebx
- movl $0x89ABCDEF, %esi
- movl $0xFEDCBA98, %edi
- movl $0x76543210, %ebp
+define(`EBX', eval(4*12)($1))
+define(`EBP', eval(4*13)($1))
+define(`ESI', eval(4*14)($1))
+define(`EDI', eval(4*15)($1))
+define(`EFLAGS', eval(4*16)($1))
- C try to provoke a problem by starting with junk in the registers,
- C especially in %eax and %edx which will be return values
- movl $0x70246135, %eax
- movl $0x8ACE9BDF, %ecx
- movl $0xFDB97531, %edx
- jmp *G(calling_conventions_function)
+define(G,
+m4_assert_numargs(1)
+`GSYM_PREFIX`'$1')
-L(return):
- movl %ebx, G(calling_conventions_ebx)
- movl %esi, G(calling_conventions_esi)
- movl %edi, G(calling_conventions_edi)
- movl %ebp, G(calling_conventions_ebp)
+ TEXT
+ ALIGN(8)
+PROLOGUE(calling_conventions)
+ LEA( G(calling_conventions_values), %ecx)
+ popl RETADDR(%ecx)
+
+ movl %ebx, SAVE_EBX(%ecx)
+ movl %ebp, SAVE_EBP(%ecx)
+ movl %esi, SAVE_ESI(%ecx)
+ movl %edi, SAVE_EDI(%ecx)
+
+ C Values we expect to see unchanged, as per amd64check.c
+ movl WANT_EBX(%ecx), %ebx
+ movl WANT_EBP(%ecx), %ebp
+ movl WANT_ESI(%ecx), %esi
+ movl WANT_EDI(%ecx), %edi
+
+ C Try to provoke a problem by starting with junk in the caller-saves
+ C registers, especially in %eax and %edx which will be return values
+ movl JUNK_EAX(%ecx), %eax
+ movl JUNK_EDX(%ecx), %edx
+C movl JUNK_ECX(%ecx), %ecx
+
+ifdef(`PIC',`
+ LEA( G(calling_conventions_function), %ecx)
+ call *(%ecx)
+',`
+ call *G(calling_conventions_function)
+')
+
+ LEA( G(calling_conventions_values), %ecx)
+
+ movl %ebx, EBX(%ecx)
+ movl %ebp, EBP(%ecx)
+ movl %esi, ESI(%ecx)
+ movl %edi, EDI(%ecx)
pushf
popl %ebx
- movl %ebx, G(calling_conventions_eflags)
+ movl %ebx, EFLAGS(%ecx)
+ movl SAVE_EBX(%ecx), %ebx
+ movl SAVE_ESI(%ecx), %esi
+ movl SAVE_EDI(%ecx), %edi
+ movl SAVE_EBP(%ecx), %ebp
+
+ pushl RETADDR(%ecx)
+
+ifdef(`PIC',`
+ LEA( G(calling_conventions_fenv), %ecx)
+ fstenv (%ecx)
+',`
fstenv G(calling_conventions_fenv)
+')
finit
- movl G(calling_conventions_save_ebx), %ebx
- movl G(calling_conventions_save_esi), %esi
- movl G(calling_conventions_save_edi), %edi
- movl G(calling_conventions_save_ebp), %ebp
-
- jmp *G(calling_conventions_retaddr)
+ ret
EPILOGUE()
-
/* x86 calling conventions checking. */
/*
-Copyright 2000, 2001 Free Software Foundation, Inc.
+Copyright 2000, 2001, 2010 Free Software Foundation, Inc.
-This file is part of the GNU MP Library.
+This file is part of the GNU MP Library test suite.
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 3 of the License, or (at your
-option) any later version.
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-License for more details.
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
+Public License for more details.
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include "gmp.h"
#include "tests.h"
-/* temporaries */
-int calling_conventions_save_ebx;
-int calling_conventions_save_esi;
-int calling_conventions_save_edi;
-int calling_conventions_save_ebp;
-int calling_conventions_retaddr;
-int calling_conventions_retval;
+/* Vector if constants and register values. We use one vector to allow access
+ via a base pointer, very beneficial for the PIC-enabled amd64call.asm. */
+mp_limb_t calling_conventions_values[17] =
+{
+ CNST_LIMB(0x12345678), /* want_ebx */
+ CNST_LIMB(0x89ABCDEF), /* want_ebp */
+ CNST_LIMB(0xDEADBEEF), /* want_esi */
+ CNST_LIMB(0xFFEEDDCC), /* want_edi */
+
+ CNST_LIMB(0xFEEDABBA), /* JUNK_EAX */
+ CNST_LIMB(0xAB78DE89), /* JUNK_ECX */
+ CNST_LIMB(0x12389018) /* JUNK_EDX */
+
+ /* rest of array used for dynamic values. */
+};
+
+/* Index starts for various regions in above vector. */
+#define WANT 0
+#define JUNK 4
+#define SAVE 7
+#define RETADDR 11
+#define VAL 12
+#define EFLAGS 16
+
/* values to check */
struct {
unsigned tag;
unsigned other[4];
} calling_conventions_fenv;
-int calling_conventions_ebx;
-int calling_conventions_esi;
-int calling_conventions_edi;
-int calling_conventions_ebp;
-int calling_conventions_eflags;
/* expected values, as per x86call.asm */
#define VALUE_EBX 0x01234567
#define VALUE_EDI 0xFEDCBA98
#define VALUE_EBP 0x76543210
+
+const char *regname[] = {"ebx", "ebp", "esi", "edi"};
+
#define DIR_BIT(eflags) (((eflags) & (1<<10)) != 0)
{
const char *header = "Violated calling conventions:\n";
int ret = 1;
+ int i;
#define CHECK(callreg, regstr, value) \
if (callreg != value) \
ret = 0; \
}
- CHECK (calling_conventions_ebx, "ebx", VALUE_EBX);
- CHECK (calling_conventions_esi, "esi", VALUE_ESI);
- CHECK (calling_conventions_edi, "edi", VALUE_EDI);
- CHECK (calling_conventions_ebp, "ebp", VALUE_EBP);
+ for (i = 0; i < 4; i++)
+ {
+ CHECK (calling_conventions_values[VAL+i], regname[i], calling_conventions_values[WANT+i]);
+ }
- if (DIR_BIT (calling_conventions_eflags) != 0)
+ if (DIR_BIT (calling_conventions_values[EFLAGS]) != 0)
{
printf ("%s eflags dir bit got %d want 0\n",
- header, DIR_BIT (calling_conventions_eflags));
+ header, DIR_BIT (calling_conventions_values[EFLAGS]));
header = "";
ret = 0;
}
## Process this file with automake to generate Makefile.in
-# Copyright 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2002, 2003, 2005, 2006, 2007, 2008, 2009,
+# 2010, 2011 Free Software Foundation, Inc.
#
# This file is part of the GNU MP Library.
#
common.c divrem1div.c divrem1inv.c divrem2div.c divrem2inv.c \
freq.c \
gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c \
- jacbase1.c jacbase2.c jacbase3.c \
- mod_1_div.c mod_1_inv.c modlinv.c \
+ hgcd_lehmer.c hgcd_appr_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c \
+ jacbase1.c jacbase2.c jacbase3.c jacbase4.c \
+ mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c \
noop.c powm_mod.c powm_redc.c pre_divrem_1.c \
set_strb.c set_strs.c set_strp.c time.c
# program. This can always be forced with "make speed_LDFLAGS=-all-static
# ..." if desired, see tune/README.
-EXTRA_PROGRAMS = speed speed-dynamic speed-ext tuneup
+EXTRA_PROGRAMS = speed speed-dynamic speed-ext tuneup tune-gcd-p
DEPENDENCIES = libspeed.la
-LDADD = $(DEPENDENCIES)
+LDADD = $(DEPENDENCIES) $(TUNE_LIBS)
speed_SOURCES = speed.c
speed_LDFLAGS = $(STATIC)
speed_ext_LDFLAGS = $(STATIC)
tuneup_SOURCES = tuneup.c
-nodist_tuneup_SOURCES = sqr_basecase.c $(TUNE_MPN_SRCS)
+nodist_tuneup_SOURCES = sqr_basecase.c fac_ui.c $(TUNE_MPN_SRCS)
tuneup_DEPENDENCIES = $(TUNE_SQR_OBJ) libspeed.la
-tuneup_LDADD = $(tuneup_DEPENDENCIES)
+tuneup_LDADD = $(tuneup_DEPENDENCIES) $(TUNE_LIBS)
tuneup_LDFLAGS = $(STATIC)
+tune_gcd_p_SOURCES = tune-gcd-p.c
+tune_gcd_p_DEPENDENCIES = ../mpn/gcd.c
+tune_gcd_p_LDFLAGS = $(STATIC)
+
tune:
$(MAKE) $(AM_MAKEFLAGS) tuneup$(EXEEXT)
# $(MANY_CLEAN) and $(MANY_DISTCLEAN) are hooks for many.pl
CLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LTLIBRARIES) \
- $(TUNE_MPN_SRCS) sqr_asm.asm \
+ $(TUNE_MPN_SRCS) fac_ui.c sqr_asm.asm \
stg.gnuplot stg.data \
mtg.gnuplot mtg.data \
fibg.gnuplot fibg.data \
# recompiled object will be rebuilt if that file changes.
TUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) divrem_1.c mod_1.c
-TUNE_MPN_SRCS_BASIC = bdiv_q.c bdiv_qr.c \
+TUNE_MPN_SRCS_BASIC = div_qr_2.c bdiv_q.c bdiv_qr.c \
dcpi1_div_qr.c dcpi1_divappr_q.c dcpi1_bdiv_qr.c dcpi1_bdiv_q.c \
invertappr.c invert.c binvert.c divrem_2.c gcd.c gcdext.c \
- get_str.c set_str.c matrix22_mul.c hgcd.c mul_n.c sqr.c \
+ get_str.c set_str.c matrix22_mul.c \
+ hgcd.c hgcd_appr.c hgcd_reduce.c \
+ mul_n.c sqr.c powm_sec.c \
mullo_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c sqrmod_bnm1.c \
+ mulmid.c mulmid_n.c toom42_mulmid.c \
nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c \
toom22_mul.c toom2_sqr.c toom33_mul.c toom3_sqr.c toom44_mul.c toom4_sqr.c
echo 'define(SQR_TOOM2_THRESHOLD_OVERRIDE,SQR_TOOM2_THRESHOLD_MAX)' >sqr_asm.asm
echo 'include(../mpn/sqr_basecase.asm)' >>sqr_asm.asm
+# FIXME: Should it depend on $(top_builddir)/fac_ui.h too?
+fac_ui.c: $(top_builddir)/mpz/fac_ui.c
+ echo "#define TUNE_PROGRAM_BUILD 1" >fac_ui.c
+ echo "#define __gmpz_fac_ui mpz_fac_ui_tune" >>fac_ui.c
+ echo "#define __gmpz_oddfac_1 mpz_oddfac_1_tune" >>fac_ui.c
+ echo "#include \"mpz/oddfac_1.c\"" >>fac_ui.c
+ echo "#include \"mpz/fac_ui.c\"" >>fac_ui.c
include ../mpn/Makeasm.am
-# Makefile.in generated by automake 1.11.1 from Makefile.am.
+# Makefile.in generated by automake 1.11.6 from Makefile.am.
# @configure_input@
# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
-# 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation,
-# Inc.
+# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+# Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
# with or without modifications, as long as this notice is preserved.
@SET_MAKE@
-# Copyright 2000, 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
+# Copyright 2000, 2001, 2002, 2003, 2005, 2006, 2007, 2008, 2009,
+# 2010, 2011 Free Software Foundation, Inc.
#
# This file is part of the GNU MP Library.
#
# along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
VPATH = @srcdir@
+am__make_dryrun = \
+ { \
+ am__dry=no; \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ echo 'am--echo: ; @echo "AM" OK' | $(MAKE) -f - 2>/dev/null \
+ | grep '^AM OK$$' >/dev/null || am__dry=yes;; \
+ *) \
+ for am__flg in $$MAKEFLAGS; do \
+ case $$am__flg in \
+ *=*|--*) ;; \
+ *n*) am__dry=yes; break;; \
+ esac; \
+ done;; \
+ esac; \
+ test $$am__dry = yes; \
+ }
pkgdatadir = $(datadir)/@PACKAGE@
pkgincludedir = $(includedir)/@PACKAGE@
pkglibdir = $(libdir)/@PACKAGE@
POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
-ANSI2KNR = $(top_builddir)/ansi2knr
EXTRA_PROGRAMS = speed$(EXEEXT) speed-dynamic$(EXEEXT) \
- speed-ext$(EXEEXT) tuneup$(EXEEXT)
+ speed-ext$(EXEEXT) tuneup$(EXEEXT) tune-gcd-p$(EXEEXT)
DIST_COMMON = README $(noinst_HEADERS) $(srcdir)/../mpn/Makeasm.am \
$(srcdir)/Makefile.am $(srcdir)/Makefile.in
subdir = tune
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \
- $(top_srcdir)/configure.in
+ $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
mkinstalldirs = $(install_sh) -d
am__DEPENDENCIES_1 =
am__DEPENDENCIES_2 = $(am__DEPENDENCIES_1) \
$(top_builddir)/tests/libtests.la $(top_builddir)/libgmp.la
-am_libspeed_la_OBJECTS = common$U.lo divrem1div$U.lo divrem1inv$U.lo \
- divrem2div$U.lo divrem2inv$U.lo freq$U.lo gcdext_single$U.lo \
- gcdext_double$U.lo gcdextod$U.lo gcdextos$U.lo jacbase1$U.lo \
- jacbase2$U.lo jacbase3$U.lo mod_1_div$U.lo mod_1_inv$U.lo \
- modlinv$U.lo noop$U.lo powm_mod$U.lo powm_redc$U.lo \
- pre_divrem_1$U.lo set_strb$U.lo set_strs$U.lo set_strp$U.lo \
- time$U.lo
+am_libspeed_la_OBJECTS = common.lo divrem1div.lo divrem1inv.lo \
+ divrem2div.lo divrem2inv.lo freq.lo gcdext_single.lo \
+ gcdext_double.lo gcdextod.lo gcdextos.lo hgcd_lehmer.lo \
+ hgcd_appr_lehmer.lo hgcd_reduce_1.lo hgcd_reduce_2.lo \
+ jacbase1.lo jacbase2.lo jacbase3.lo jacbase4.lo mod_1_div.lo \
+ mod_1_inv.lo mod_1_1-1.lo mod_1_1-2.lo modlinv.lo noop.lo \
+ powm_mod.lo powm_redc.lo pre_divrem_1.lo set_strb.lo \
+ set_strs.lo set_strp.lo time.lo
libspeed_la_OBJECTS = $(am_libspeed_la_OBJECTS)
libspeed_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(libspeed_la_LDFLAGS) $(LDFLAGS) -o $@
-am_speed_OBJECTS = speed$U.$(OBJEXT)
+am_speed_OBJECTS = speed.$(OBJEXT)
speed_OBJECTS = $(am_speed_OBJECTS)
speed_LDADD = $(LDADD)
-speed_DEPENDENCIES = $(DEPENDENCIES)
+speed_DEPENDENCIES = $(DEPENDENCIES) $(am__DEPENDENCIES_1)
speed_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
--mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(speed_LDFLAGS) \
$(LDFLAGS) -o $@
-am_speed_dynamic_OBJECTS = speed$U.$(OBJEXT)
+am_speed_dynamic_OBJECTS = speed.$(OBJEXT)
speed_dynamic_OBJECTS = $(am_speed_dynamic_OBJECTS)
speed_dynamic_LDADD = $(LDADD)
-speed_dynamic_DEPENDENCIES = $(DEPENDENCIES)
-am_speed_ext_OBJECTS = speed-ext$U.$(OBJEXT)
+speed_dynamic_DEPENDENCIES = $(DEPENDENCIES) $(am__DEPENDENCIES_1)
+am_speed_ext_OBJECTS = speed-ext.$(OBJEXT)
speed_ext_OBJECTS = $(am_speed_ext_OBJECTS)
speed_ext_LDADD = $(LDADD)
-speed_ext_DEPENDENCIES = $(DEPENDENCIES)
+speed_ext_DEPENDENCIES = $(DEPENDENCIES) $(am__DEPENDENCIES_1)
speed_ext_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
$(speed_ext_LDFLAGS) $(LDFLAGS) -o $@
-am_tuneup_OBJECTS = tuneup$U.$(OBJEXT)
-am__objects_1 = bdiv_q$U.$(OBJEXT) bdiv_qr$U.$(OBJEXT) \
- dcpi1_div_qr$U.$(OBJEXT) dcpi1_divappr_q$U.$(OBJEXT) \
- dcpi1_bdiv_qr$U.$(OBJEXT) dcpi1_bdiv_q$U.$(OBJEXT) \
- invertappr$U.$(OBJEXT) invert$U.$(OBJEXT) binvert$U.$(OBJEXT) \
- divrem_2$U.$(OBJEXT) gcd$U.$(OBJEXT) gcdext$U.$(OBJEXT) \
- get_str$U.$(OBJEXT) set_str$U.$(OBJEXT) \
- matrix22_mul$U.$(OBJEXT) hgcd$U.$(OBJEXT) mul_n$U.$(OBJEXT) \
- sqr$U.$(OBJEXT) mullo_n$U.$(OBJEXT) mul_fft$U.$(OBJEXT) \
- mul$U.$(OBJEXT) tdiv_qr$U.$(OBJEXT) mulmod_bnm1$U.$(OBJEXT) \
- sqrmod_bnm1$U.$(OBJEXT) nussbaumer_mul$U.$(OBJEXT) \
- toom6h_mul$U.$(OBJEXT) toom8h_mul$U.$(OBJEXT) \
- toom6_sqr$U.$(OBJEXT) toom8_sqr$U.$(OBJEXT) \
- toom22_mul$U.$(OBJEXT) toom2_sqr$U.$(OBJEXT) \
- toom33_mul$U.$(OBJEXT) toom3_sqr$U.$(OBJEXT) \
- toom44_mul$U.$(OBJEXT) toom4_sqr$U.$(OBJEXT)
-am__objects_2 = $(am__objects_1) divrem_1$U.$(OBJEXT) \
- mod_1$U.$(OBJEXT)
-nodist_tuneup_OBJECTS = sqr_basecase$U.$(OBJEXT) $(am__objects_2)
+am_tune_gcd_p_OBJECTS = tune-gcd-p.$(OBJEXT)
+tune_gcd_p_OBJECTS = $(am_tune_gcd_p_OBJECTS)
+tune_gcd_p_LDADD = $(LDADD)
+tune_gcd_p_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(tune_gcd_p_LDFLAGS) $(LDFLAGS) -o $@
+am_tuneup_OBJECTS = tuneup.$(OBJEXT)
+am__objects_1 = div_qr_2.$(OBJEXT) bdiv_q.$(OBJEXT) bdiv_qr.$(OBJEXT) \
+ dcpi1_div_qr.$(OBJEXT) dcpi1_divappr_q.$(OBJEXT) \
+ dcpi1_bdiv_qr.$(OBJEXT) dcpi1_bdiv_q.$(OBJEXT) \
+ invertappr.$(OBJEXT) invert.$(OBJEXT) binvert.$(OBJEXT) \
+ divrem_2.$(OBJEXT) gcd.$(OBJEXT) gcdext.$(OBJEXT) \
+ get_str.$(OBJEXT) set_str.$(OBJEXT) matrix22_mul.$(OBJEXT) \
+ hgcd.$(OBJEXT) hgcd_appr.$(OBJEXT) hgcd_reduce.$(OBJEXT) \
+ mul_n.$(OBJEXT) sqr.$(OBJEXT) powm_sec.$(OBJEXT) \
+ mullo_n.$(OBJEXT) mul_fft.$(OBJEXT) mul.$(OBJEXT) \
+ tdiv_qr.$(OBJEXT) mulmod_bnm1.$(OBJEXT) sqrmod_bnm1.$(OBJEXT) \
+ mulmid.$(OBJEXT) mulmid_n.$(OBJEXT) toom42_mulmid.$(OBJEXT) \
+ nussbaumer_mul.$(OBJEXT) toom6h_mul.$(OBJEXT) \
+ toom8h_mul.$(OBJEXT) toom6_sqr.$(OBJEXT) toom8_sqr.$(OBJEXT) \
+ toom22_mul.$(OBJEXT) toom2_sqr.$(OBJEXT) toom33_mul.$(OBJEXT) \
+ toom3_sqr.$(OBJEXT) toom44_mul.$(OBJEXT) toom4_sqr.$(OBJEXT)
+am__objects_2 = $(am__objects_1) divrem_1.$(OBJEXT) mod_1.$(OBJEXT)
+nodist_tuneup_OBJECTS = sqr_basecase.$(OBJEXT) fac_ui.$(OBJEXT) \
+ $(am__objects_2)
tuneup_OBJECTS = $(am_tuneup_OBJECTS) $(nodist_tuneup_OBJECTS)
am__DEPENDENCIES_3 = $(am__DEPENDENCIES_1) libspeed.la
tuneup_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) \
$(LDFLAGS) -o $@
SOURCES = $(libspeed_la_SOURCES) $(speed_SOURCES) \
$(speed_dynamic_SOURCES) $(speed_ext_SOURCES) \
- $(tuneup_SOURCES) $(nodist_tuneup_SOURCES)
+ $(tune_gcd_p_SOURCES) $(tuneup_SOURCES) \
+ $(nodist_tuneup_SOURCES)
DIST_SOURCES = $(libspeed_la_SOURCES) $(speed_SOURCES) \
$(speed_dynamic_SOURCES) $(speed_ext_SOURCES) \
- $(tuneup_SOURCES)
+ $(tune_gcd_p_SOURCES) $(tuneup_SOURCES)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
HEADERS = $(noinst_HEADERS)
ETAGS = etags
CTAGS = ctags
SPEED_CYCLECOUNTER_OBJ = @SPEED_CYCLECOUNTER_OBJ@
STRIP = @STRIP@
TAL_OBJECT = @TAL_OBJECT@
+TUNE_LIBS = @TUNE_LIBS@
TUNE_SQR_OBJ = @TUNE_SQR_OBJ@
-U = @U@
U_FOR_BUILD = @U_FOR_BUILD@
VERSION = @VERSION@
WITH_READLINE_01 = @WITH_READLINE_01@
mkdir_p = @mkdir_p@
mpn_objects = @mpn_objects@
mpn_objs_in_libgmp = @mpn_objs_in_libgmp@
-mpn_objs_in_libmp = @mpn_objs_in_libmp@
oldincludedir = @oldincludedir@
pdfdir = @pdfdir@
prefix = @prefix@
common.c divrem1div.c divrem1inv.c divrem2div.c divrem2inv.c \
freq.c \
gcdext_single.c gcdext_double.c gcdextod.c gcdextos.c \
- jacbase1.c jacbase2.c jacbase3.c \
- mod_1_div.c mod_1_inv.c modlinv.c \
+ hgcd_lehmer.c hgcd_appr_lehmer.c hgcd_reduce_1.c hgcd_reduce_2.c \
+ jacbase1.c jacbase2.c jacbase3.c jacbase4.c \
+ mod_1_div.c mod_1_inv.c mod_1_1-1.c mod_1_1-2.c modlinv.c \
noop.c powm_mod.c powm_redc.c pre_divrem_1.c \
set_strb.c set_strs.c set_strp.c time.c
libspeed_la_LIBADD = $(libspeed_la_DEPENDENCIES) $(LIBM)
libspeed_la_LDFLAGS = $(STATIC)
DEPENDENCIES = libspeed.la
-LDADD = $(DEPENDENCIES)
+LDADD = $(DEPENDENCIES) $(TUNE_LIBS)
speed_SOURCES = speed.c
speed_LDFLAGS = $(STATIC)
speed_dynamic_SOURCES = speed.c
speed_ext_SOURCES = speed-ext.c
speed_ext_LDFLAGS = $(STATIC)
tuneup_SOURCES = tuneup.c
-nodist_tuneup_SOURCES = sqr_basecase.c $(TUNE_MPN_SRCS)
+nodist_tuneup_SOURCES = sqr_basecase.c fac_ui.c $(TUNE_MPN_SRCS)
tuneup_DEPENDENCIES = $(TUNE_SQR_OBJ) libspeed.la
-tuneup_LDADD = $(tuneup_DEPENDENCIES)
+tuneup_LDADD = $(tuneup_DEPENDENCIES) $(TUNE_LIBS)
tuneup_LDFLAGS = $(STATIC)
+tune_gcd_p_SOURCES = tune-gcd-p.c
+tune_gcd_p_DEPENDENCIES = ../mpn/gcd.c
+tune_gcd_p_LDFLAGS = $(STATIC)
# $(MANY_CLEAN) and $(MANY_DISTCLEAN) are hooks for many.pl
CLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LTLIBRARIES) \
- $(TUNE_MPN_SRCS) sqr_asm.asm \
+ $(TUNE_MPN_SRCS) fac_ui.c sqr_asm.asm \
stg.gnuplot stg.data \
mtg.gnuplot mtg.data \
fibg.gnuplot fibg.data \
# FIXME: Would like say mul_n.c to depend on $(top_builddir)/mul_n.c so the
# recompiled object will be rebuilt if that file changes.
TUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) divrem_1.c mod_1.c
-TUNE_MPN_SRCS_BASIC = bdiv_q.c bdiv_qr.c \
+TUNE_MPN_SRCS_BASIC = div_qr_2.c bdiv_q.c bdiv_qr.c \
dcpi1_div_qr.c dcpi1_divappr_q.c dcpi1_bdiv_qr.c dcpi1_bdiv_q.c \
invertappr.c invert.c binvert.c divrem_2.c gcd.c gcdext.c \
- get_str.c set_str.c matrix22_mul.c hgcd.c mul_n.c sqr.c \
+ get_str.c set_str.c matrix22_mul.c \
+ hgcd.c hgcd_appr.c hgcd_reduce.c \
+ mul_n.c sqr.c powm_sec.c \
mullo_n.c mul_fft.c mul.c tdiv_qr.c mulmod_bnm1.c sqrmod_bnm1.c \
+ mulmid.c mulmid_n.c toom42_mulmid.c \
nussbaumer_mul.c toom6h_mul.c toom8h_mul.c toom6_sqr.c toom8_sqr.c \
toom22_mul.c toom2_sqr.c toom33_mul.c toom3_sqr.c toom44_mul.c toom4_sqr.c
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
+$(srcdir)/../mpn/Makeasm.am:
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
-libspeed.la: $(libspeed_la_OBJECTS) $(libspeed_la_DEPENDENCIES)
+libspeed.la: $(libspeed_la_OBJECTS) $(libspeed_la_DEPENDENCIES) $(EXTRA_libspeed_la_DEPENDENCIES)
$(libspeed_la_LINK) $(libspeed_la_OBJECTS) $(libspeed_la_LIBADD) $(LIBS)
-speed$(EXEEXT): $(speed_OBJECTS) $(speed_DEPENDENCIES)
+speed$(EXEEXT): $(speed_OBJECTS) $(speed_DEPENDENCIES) $(EXTRA_speed_DEPENDENCIES)
@rm -f speed$(EXEEXT)
$(speed_LINK) $(speed_OBJECTS) $(speed_LDADD) $(LIBS)
-speed-dynamic$(EXEEXT): $(speed_dynamic_OBJECTS) $(speed_dynamic_DEPENDENCIES)
+speed-dynamic$(EXEEXT): $(speed_dynamic_OBJECTS) $(speed_dynamic_DEPENDENCIES) $(EXTRA_speed_dynamic_DEPENDENCIES)
@rm -f speed-dynamic$(EXEEXT)
$(LINK) $(speed_dynamic_OBJECTS) $(speed_dynamic_LDADD) $(LIBS)
-speed-ext$(EXEEXT): $(speed_ext_OBJECTS) $(speed_ext_DEPENDENCIES)
+speed-ext$(EXEEXT): $(speed_ext_OBJECTS) $(speed_ext_DEPENDENCIES) $(EXTRA_speed_ext_DEPENDENCIES)
@rm -f speed-ext$(EXEEXT)
$(speed_ext_LINK) $(speed_ext_OBJECTS) $(speed_ext_LDADD) $(LIBS)
-tuneup$(EXEEXT): $(tuneup_OBJECTS) $(tuneup_DEPENDENCIES)
+tune-gcd-p$(EXEEXT): $(tune_gcd_p_OBJECTS) $(tune_gcd_p_DEPENDENCIES) $(EXTRA_tune_gcd_p_DEPENDENCIES)
+ @rm -f tune-gcd-p$(EXEEXT)
+ $(tune_gcd_p_LINK) $(tune_gcd_p_OBJECTS) $(tune_gcd_p_LDADD) $(LIBS)
+tuneup$(EXEEXT): $(tuneup_OBJECTS) $(tuneup_DEPENDENCIES) $(EXTRA_tuneup_DEPENDENCIES)
@rm -f tuneup$(EXEEXT)
$(tuneup_LINK) $(tuneup_OBJECTS) $(tuneup_LDADD) $(LIBS)
distclean-compile:
-rm -f *.tab.c
-$(top_builddir)/ansi2knr:
- $(am__cd) $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) ./ansi2knr
-
-mostlyclean-kr:
- -test "$U" = "" || rm -f *_.c
.c.o:
$(COMPILE) -c $<
.c.lo:
$(LTCOMPILE) -c -o $@ $<
-bdiv_q_.c: bdiv_q.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/bdiv_q.c; then echo $(srcdir)/bdiv_q.c; else echo bdiv_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-bdiv_qr_.c: bdiv_qr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/bdiv_qr.c; then echo $(srcdir)/bdiv_qr.c; else echo bdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-binvert_.c: binvert.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/binvert.c; then echo $(srcdir)/binvert.c; else echo binvert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-common_.c: common.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/common.c; then echo $(srcdir)/common.c; else echo common.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dcpi1_bdiv_q_.c: dcpi1_bdiv_q.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dcpi1_bdiv_q.c; then echo $(srcdir)/dcpi1_bdiv_q.c; else echo dcpi1_bdiv_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dcpi1_bdiv_qr_.c: dcpi1_bdiv_qr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dcpi1_bdiv_qr.c; then echo $(srcdir)/dcpi1_bdiv_qr.c; else echo dcpi1_bdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dcpi1_div_qr_.c: dcpi1_div_qr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dcpi1_div_qr.c; then echo $(srcdir)/dcpi1_div_qr.c; else echo dcpi1_div_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-dcpi1_divappr_q_.c: dcpi1_divappr_q.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/dcpi1_divappr_q.c; then echo $(srcdir)/dcpi1_divappr_q.c; else echo dcpi1_divappr_q.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divrem1div_.c: divrem1div.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem1div.c; then echo $(srcdir)/divrem1div.c; else echo divrem1div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divrem1inv_.c: divrem1inv.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem1inv.c; then echo $(srcdir)/divrem1inv.c; else echo divrem1inv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divrem2div_.c: divrem2div.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem2div.c; then echo $(srcdir)/divrem2div.c; else echo divrem2div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divrem2inv_.c: divrem2inv.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem2inv.c; then echo $(srcdir)/divrem2inv.c; else echo divrem2inv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divrem_1_.c: divrem_1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem_1.c; then echo $(srcdir)/divrem_1.c; else echo divrem_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-divrem_2_.c: divrem_2.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/divrem_2.c; then echo $(srcdir)/divrem_2.c; else echo divrem_2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-freq_.c: freq.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/freq.c; then echo $(srcdir)/freq.c; else echo freq.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gcd_.c: gcd.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcd.c; then echo $(srcdir)/gcd.c; else echo gcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gcdext_.c: gcdext.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdext.c; then echo $(srcdir)/gcdext.c; else echo gcdext.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gcdext_double_.c: gcdext_double.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdext_double.c; then echo $(srcdir)/gcdext_double.c; else echo gcdext_double.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gcdext_single_.c: gcdext_single.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdext_single.c; then echo $(srcdir)/gcdext_single.c; else echo gcdext_single.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gcdextod_.c: gcdextod.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdextod.c; then echo $(srcdir)/gcdextod.c; else echo gcdextod.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-gcdextos_.c: gcdextos.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/gcdextos.c; then echo $(srcdir)/gcdextos.c; else echo gcdextos.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-get_str_.c: get_str.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/get_str.c; then echo $(srcdir)/get_str.c; else echo get_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-hgcd_.c: hgcd.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/hgcd.c; then echo $(srcdir)/hgcd.c; else echo hgcd.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-invert_.c: invert.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/invert.c; then echo $(srcdir)/invert.c; else echo invert.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-invertappr_.c: invertappr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/invertappr.c; then echo $(srcdir)/invertappr.c; else echo invertappr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-jacbase1_.c: jacbase1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/jacbase1.c; then echo $(srcdir)/jacbase1.c; else echo jacbase1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-jacbase2_.c: jacbase2.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/jacbase2.c; then echo $(srcdir)/jacbase2.c; else echo jacbase2.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-jacbase3_.c: jacbase3.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/jacbase3.c; then echo $(srcdir)/jacbase3.c; else echo jacbase3.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-matrix22_mul_.c: matrix22_mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/matrix22_mul.c; then echo $(srcdir)/matrix22_mul.c; else echo matrix22_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mod_1_.c: mod_1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1.c; then echo $(srcdir)/mod_1.c; else echo mod_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mod_1_div_.c: mod_1_div.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_div.c; then echo $(srcdir)/mod_1_div.c; else echo mod_1_div.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mod_1_inv_.c: mod_1_inv.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mod_1_inv.c; then echo $(srcdir)/mod_1_inv.c; else echo mod_1_inv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-modlinv_.c: modlinv.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/modlinv.c; then echo $(srcdir)/modlinv.c; else echo modlinv.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_.c: mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul.c; then echo $(srcdir)/mul.c; else echo mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_fft_.c: mul_fft.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_fft.c; then echo $(srcdir)/mul_fft.c; else echo mul_fft.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mul_n_.c: mul_n.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mul_n.c; then echo $(srcdir)/mul_n.c; else echo mul_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mullo_n_.c: mullo_n.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mullo_n.c; then echo $(srcdir)/mullo_n.c; else echo mullo_n.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-mulmod_bnm1_.c: mulmod_bnm1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/mulmod_bnm1.c; then echo $(srcdir)/mulmod_bnm1.c; else echo mulmod_bnm1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-noop_.c: noop.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/noop.c; then echo $(srcdir)/noop.c; else echo noop.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-nussbaumer_mul_.c: nussbaumer_mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/nussbaumer_mul.c; then echo $(srcdir)/nussbaumer_mul.c; else echo nussbaumer_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-powm_mod_.c: powm_mod.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/powm_mod.c; then echo $(srcdir)/powm_mod.c; else echo powm_mod.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-powm_redc_.c: powm_redc.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/powm_redc.c; then echo $(srcdir)/powm_redc.c; else echo powm_redc.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-pre_divrem_1_.c: pre_divrem_1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/pre_divrem_1.c; then echo $(srcdir)/pre_divrem_1.c; else echo pre_divrem_1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_str_.c: set_str.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_str.c; then echo $(srcdir)/set_str.c; else echo set_str.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_strb_.c: set_strb.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_strb.c; then echo $(srcdir)/set_strb.c; else echo set_strb.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_strp_.c: set_strp.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_strp.c; then echo $(srcdir)/set_strp.c; else echo set_strp.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-set_strs_.c: set_strs.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/set_strs.c; then echo $(srcdir)/set_strs.c; else echo set_strs.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-speed_.c: speed.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/speed.c; then echo $(srcdir)/speed.c; else echo speed.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-speed-ext_.c: speed-ext.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/speed-ext.c; then echo $(srcdir)/speed-ext.c; else echo speed-ext.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sqr_.c: sqr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqr.c; then echo $(srcdir)/sqr.c; else echo sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sqr_basecase_.c: sqr_basecase.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqr_basecase.c; then echo $(srcdir)/sqr_basecase.c; else echo sqr_basecase.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-sqrmod_bnm1_.c: sqrmod_bnm1.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/sqrmod_bnm1.c; then echo $(srcdir)/sqrmod_bnm1.c; else echo sqrmod_bnm1.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tdiv_qr_.c: tdiv_qr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tdiv_qr.c; then echo $(srcdir)/tdiv_qr.c; else echo tdiv_qr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-time_.c: time.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/time.c; then echo $(srcdir)/time.c; else echo time.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom22_mul_.c: toom22_mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom22_mul.c; then echo $(srcdir)/toom22_mul.c; else echo toom22_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom2_sqr_.c: toom2_sqr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom2_sqr.c; then echo $(srcdir)/toom2_sqr.c; else echo toom2_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom33_mul_.c: toom33_mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom33_mul.c; then echo $(srcdir)/toom33_mul.c; else echo toom33_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom3_sqr_.c: toom3_sqr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom3_sqr.c; then echo $(srcdir)/toom3_sqr.c; else echo toom3_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom44_mul_.c: toom44_mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom44_mul.c; then echo $(srcdir)/toom44_mul.c; else echo toom44_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom4_sqr_.c: toom4_sqr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom4_sqr.c; then echo $(srcdir)/toom4_sqr.c; else echo toom4_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom6_sqr_.c: toom6_sqr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom6_sqr.c; then echo $(srcdir)/toom6_sqr.c; else echo toom6_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom6h_mul_.c: toom6h_mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom6h_mul.c; then echo $(srcdir)/toom6h_mul.c; else echo toom6h_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom8_sqr_.c: toom8_sqr.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom8_sqr.c; then echo $(srcdir)/toom8_sqr.c; else echo toom8_sqr.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-toom8h_mul_.c: toom8h_mul.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/toom8h_mul.c; then echo $(srcdir)/toom8h_mul.c; else echo toom8h_mul.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-tuneup_.c: tuneup.c $(ANSI2KNR)
- $(CPP) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) `if test -f $(srcdir)/tuneup.c; then echo $(srcdir)/tuneup.c; else echo tuneup.c; fi` | sed 's/^# \([0-9]\)/#line \1/' | $(ANSI2KNR) > $@ || rm -f $@
-bdiv_q_.$(OBJEXT) bdiv_q_.lo bdiv_qr_.$(OBJEXT) bdiv_qr_.lo \
-binvert_.$(OBJEXT) binvert_.lo common_.$(OBJEXT) common_.lo \
-dcpi1_bdiv_q_.$(OBJEXT) dcpi1_bdiv_q_.lo dcpi1_bdiv_qr_.$(OBJEXT) \
-dcpi1_bdiv_qr_.lo dcpi1_div_qr_.$(OBJEXT) dcpi1_div_qr_.lo \
-dcpi1_divappr_q_.$(OBJEXT) dcpi1_divappr_q_.lo divrem1div_.$(OBJEXT) \
-divrem1div_.lo divrem1inv_.$(OBJEXT) divrem1inv_.lo \
-divrem2div_.$(OBJEXT) divrem2div_.lo divrem2inv_.$(OBJEXT) \
-divrem2inv_.lo divrem_1_.$(OBJEXT) divrem_1_.lo divrem_2_.$(OBJEXT) \
-divrem_2_.lo freq_.$(OBJEXT) freq_.lo gcd_.$(OBJEXT) gcd_.lo \
-gcdext_.$(OBJEXT) gcdext_.lo gcdext_double_.$(OBJEXT) \
-gcdext_double_.lo gcdext_single_.$(OBJEXT) gcdext_single_.lo \
-gcdextod_.$(OBJEXT) gcdextod_.lo gcdextos_.$(OBJEXT) gcdextos_.lo \
-get_str_.$(OBJEXT) get_str_.lo hgcd_.$(OBJEXT) hgcd_.lo \
-invert_.$(OBJEXT) invert_.lo invertappr_.$(OBJEXT) invertappr_.lo \
-jacbase1_.$(OBJEXT) jacbase1_.lo jacbase2_.$(OBJEXT) jacbase2_.lo \
-jacbase3_.$(OBJEXT) jacbase3_.lo matrix22_mul_.$(OBJEXT) \
-matrix22_mul_.lo mod_1_.$(OBJEXT) mod_1_.lo mod_1_div_.$(OBJEXT) \
-mod_1_div_.lo mod_1_inv_.$(OBJEXT) mod_1_inv_.lo modlinv_.$(OBJEXT) \
-modlinv_.lo mul_.$(OBJEXT) mul_.lo mul_fft_.$(OBJEXT) mul_fft_.lo \
-mul_n_.$(OBJEXT) mul_n_.lo mullo_n_.$(OBJEXT) mullo_n_.lo \
-mulmod_bnm1_.$(OBJEXT) mulmod_bnm1_.lo noop_.$(OBJEXT) noop_.lo \
-nussbaumer_mul_.$(OBJEXT) nussbaumer_mul_.lo powm_mod_.$(OBJEXT) \
-powm_mod_.lo powm_redc_.$(OBJEXT) powm_redc_.lo \
-pre_divrem_1_.$(OBJEXT) pre_divrem_1_.lo set_str_.$(OBJEXT) \
-set_str_.lo set_strb_.$(OBJEXT) set_strb_.lo set_strp_.$(OBJEXT) \
-set_strp_.lo set_strs_.$(OBJEXT) set_strs_.lo speed_.$(OBJEXT) \
-speed_.lo speed-ext_.$(OBJEXT) speed-ext_.lo sqr_.$(OBJEXT) sqr_.lo \
-sqr_basecase_.$(OBJEXT) sqr_basecase_.lo sqrmod_bnm1_.$(OBJEXT) \
-sqrmod_bnm1_.lo tdiv_qr_.$(OBJEXT) tdiv_qr_.lo time_.$(OBJEXT) \
-time_.lo toom22_mul_.$(OBJEXT) toom22_mul_.lo toom2_sqr_.$(OBJEXT) \
-toom2_sqr_.lo toom33_mul_.$(OBJEXT) toom33_mul_.lo \
-toom3_sqr_.$(OBJEXT) toom3_sqr_.lo toom44_mul_.$(OBJEXT) \
-toom44_mul_.lo toom4_sqr_.$(OBJEXT) toom4_sqr_.lo toom6_sqr_.$(OBJEXT) \
-toom6_sqr_.lo toom6h_mul_.$(OBJEXT) toom6h_mul_.lo \
-toom8_sqr_.$(OBJEXT) toom8_sqr_.lo toom8h_mul_.$(OBJEXT) \
-toom8h_mul_.lo tuneup_.$(OBJEXT) tuneup_.lo : $(ANSI2KNR)
mostlyclean-libtool:
-rm -f *.lo
installcheck: installcheck-am
install-strip:
- $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
- install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
- `test -z '$(STRIP)' || \
- echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
mostlyclean-generic:
clean-generic:
mostlyclean: mostlyclean-am
-mostlyclean-am: mostlyclean-compile mostlyclean-generic mostlyclean-kr \
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
mostlyclean-libtool
pdf: pdf-am
uninstall-am:
-.MAKE: $(top_builddir)/ansi2knr install-am install-strip
+.MAKE: install-am install-strip
.PHONY: CTAGS GTAGS all all-am check check-am clean clean-generic \
clean-libtool ctags distclean distclean-compile \
install-pdf-am install-ps install-ps-am install-strip \
installcheck installcheck-am installdirs maintainer-clean \
maintainer-clean-generic mostlyclean mostlyclean-compile \
- mostlyclean-generic mostlyclean-kr mostlyclean-libtool pdf \
- pdf-am ps ps-am tags uninstall uninstall-am
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags uninstall uninstall-am
$(top_builddir)/tests/libtests.la:
echo 'define(SQR_TOOM2_THRESHOLD_OVERRIDE,SQR_TOOM2_THRESHOLD_MAX)' >sqr_asm.asm
echo 'include(../mpn/sqr_basecase.asm)' >>sqr_asm.asm
+# FIXME: Should it depend on $(top_builddir)/fac_ui.h too?
+fac_ui.c: $(top_builddir)/mpz/fac_ui.c
+ echo "#define TUNE_PROGRAM_BUILD 1" >fac_ui.c
+ echo "#define __gmpz_fac_ui mpz_fac_ui_tune" >>fac_ui.c
+ echo "#define __gmpz_oddfac_1 mpz_oddfac_1_tune" >>fac_ui.c
+ echo "#include \"mpz/oddfac_1.c\"" >>fac_ui.c
+ echo "#include \"mpz/fac_ui.c\"" >>fac_ui.c
+
# .s assembler, no preprocessing.
#
.s.o:
EXAMPLE COMPARISONS - MULTIPLICATION
-mul_basecase takes a ".<r>" parameter which is the first (larger) size
-parameter. For example to show speeds for 20x1 up to 20x15 in cycles,
+mul_basecase takes a ".<r>" parameter. If positivie, it gives the second
+(smaller) operand size. For example to show speeds for 3x3 up to 20x3 in
+cycles,
- ./speed -s 1-15 -c mpn_mul_basecase.20
+ ./speed -s 3-20 -c mpn_mul_basecase.3
+
+A negative ".<-r>" parameter fixes the size of the product to the absolute
+value r. For example to show speeds for 10x10 up to 19x1 in cycles,
+
+ ./speed -s 10-19 -c mpn_mul_basecase.-20
mul_basecase with no parameter does an NxN multiply, so for example to show
speeds in cycles for 1x1, 2x2, 3x3, etc, up to 20x20, in cycles,
/* Shared speed subroutines.
-Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2009, 2010
-Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2008, 2009, 2010,
+2011, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
int speed_option_addrs = 0;
int speed_option_verbose = 0;
+int speed_option_cycles_broken = 0;
/* Provide __clz_tab even if it's not required, for the benefit of new code
s->r, -1.0 should be returned. See the various base routines below. */
double
-speed_measure (double (*fun) __GMP_PROTO ((struct speed_params *s)),
- struct speed_params *s)
+speed_measure (double (*fun) (struct speed_params *s), struct speed_params *s)
{
-#define TOLERANCE 1.005 /* 0.5% */
+#define TOLERANCE 1.01 /* 1% */
const int max_zeros = 10;
struct speed_params s_dummy;
{
speed_option_verbose = n;
}
+ else if (strcmp (s, "cycles-broken") == 0)
+ {
+ speed_option_cycles_broken = 1;
+ }
else
{
printf ("Unrecognised -o option: %s\n", s);
{
SPEED_ROUTINE_MPN_COPY (mpn_com);
}
+double
+speed_mpn_tabselect (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TABSELECT (mpn_tabselect);
+}
double
SPEED_ROUTINE_MPN_UNARY_4 (mpn_mul_4);
}
#endif
+#if HAVE_NATIVE_mpn_mul_5
+double
+speed_mpn_mul_5 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_5 (mpn_mul_5);
+}
+#endif
+#if HAVE_NATIVE_mpn_mul_6
+double
+speed_mpn_mul_6 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_6 (mpn_mul_6);
+}
+#endif
double
SPEED_ROUTINE_MPN_DIVREM_2 (mpn_divrem_2_inv);
}
+double
+speed_mpn_div_qr_2n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_DIV_QR_2 (mpn_div_qr_2, 1);
+}
+double
+speed_mpn_div_qr_2u (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_DIV_QR_2 (mpn_div_qr_2, 0);
+}
+
double
speed_mpn_mod_1 (struct speed_params *s)
{
SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p,mpn_mod_1_1p_cps);
}
double
+speed_mpn_mod_1_1_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p_1,mpn_mod_1_1p_cps_1);
+}
+double
+speed_mpn_mod_1_1_2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MOD_1_1 (mpn_mod_1_1p_2,mpn_mod_1_1p_cps_2);
+}
+double
speed_mpn_mod_1_2 (struct speed_params *s)
{
SPEED_ROUTINE_MPN_MOD_1_N (mpn_mod_1s_2p,mpn_mod_1s_2p_cps,2);
SPEED_ROUTINE_MPN_MU_BDIV_QR (mpn_mu_bdiv_qr, mpn_mu_bdiv_qr_itch);
}
+double
+speed_mpn_broot (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BROOT (mpn_broot);
+}
+double
+speed_mpn_broot_invm1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BROOT (mpn_broot_invm1);
+}
+double
+speed_mpn_brootinv (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BROOTINV (mpn_brootinv, 5*s->size);
+}
+
double
speed_mpn_binvert (struct speed_params *s)
{
SPEED_ROUTINE_MPN_BINARY_N (mpn_sub_n);
}
+double
+speed_mpn_add_err1_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_ERR1_N (mpn_add_err1_n);
+}
+double
+speed_mpn_sub_err1_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_ERR1_N (mpn_sub_err1_n);
+}
+double
+speed_mpn_add_err2_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_ERR2_N (mpn_add_err2_n);
+}
+double
+speed_mpn_sub_err2_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_ERR2_N (mpn_sub_err2_n);
+}
+double
+speed_mpn_add_err3_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_ERR3_N (mpn_add_err3_n);
+}
+double
+speed_mpn_sub_err3_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_ERR3_N (mpn_sub_err3_n);
+}
+
+
#if HAVE_NATIVE_mpn_add_n_sub_n
double
speed_mpn_add_n_sub_n (struct speed_params *s)
SPEED_ROUTINE_MPN_BINARY_N (mpn_sublsh1_n);
}
#endif
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+double
+speed_mpn_addlsh1_n_ip1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_COPY (mpn_addlsh1_n_ip1);
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh1_n_ip2
+double
+speed_mpn_addlsh1_n_ip2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_COPY (mpn_addlsh1_n_ip2);
+}
+#endif
+#if HAVE_NATIVE_mpn_sublsh1_n_ip1
+double
+speed_mpn_sublsh1_n_ip1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_COPY (mpn_sublsh1_n_ip1);
+}
+#endif
#if HAVE_NATIVE_mpn_rsblsh1_n
double
speed_mpn_rsblsh1_n (struct speed_params *s)
SPEED_ROUTINE_MPN_BINARY_N (mpn_sublsh2_n);
}
#endif
+#if HAVE_NATIVE_mpn_addlsh2_n_ip1
+double
+speed_mpn_addlsh2_n_ip1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_COPY (mpn_addlsh2_n_ip1);
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n_ip2
+double
+speed_mpn_addlsh2_n_ip2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_COPY (mpn_addlsh2_n_ip2);
+}
+#endif
+#if HAVE_NATIVE_mpn_sublsh2_n_ip1
+double
+speed_mpn_sublsh2_n_ip1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_COPY (mpn_sublsh2_n_ip1);
+}
+#endif
#if HAVE_NATIVE_mpn_rsblsh2_n
double
speed_mpn_rsblsh2_n (struct speed_params *s)
SPEED_ROUTINE_MPN_BINARY_N (mpn_rsblsh2_n);
}
#endif
+#if HAVE_NATIVE_mpn_addlsh_n
+double
+speed_mpn_addlsh_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_addlsh_n (wp, xp, yp, s->size, 7));
+}
+#endif
+#if HAVE_NATIVE_mpn_sublsh_n
+double
+speed_mpn_sublsh_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_sublsh_n (wp, xp, yp, s->size, 7));
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n_ip1
+double
+speed_mpn_addlsh_n_ip1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_addlsh_n_ip1 (wp, s->xp, s->size, 7));
+}
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n_ip2
+double
+speed_mpn_addlsh_n_ip2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_addlsh_n_ip2 (wp, s->xp, s->size, 7));
+}
+#endif
+#if HAVE_NATIVE_mpn_sublsh_n_ip1
+double
+speed_mpn_sublsh_n_ip1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_UNARY_1_CALL (mpn_sublsh_n_ip1 (wp, s->xp, s->size, 7));
+}
+#endif
+#if HAVE_NATIVE_mpn_rsblsh_n
+double
+speed_mpn_rsblsh_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_rsblsh_n (wp, xp, yp, s->size, 7));
+}
+#endif
#if HAVE_NATIVE_mpn_rsh1add_n
double
speed_mpn_rsh1add_n (struct speed_params *s)
}
#endif
+double
+speed_mpn_addcnd_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_addcnd_n (wp, xp, yp, s->size, 1));
+}
+double
+speed_mpn_subcnd_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_subcnd_n (wp, xp, yp, s->size, 1));
+}
+
/* mpn_and_n etc can be macros and so have to be handled with
SPEED_ROUTINE_MPN_BINARY_N_CALL forms */
double
speed_mpn_and_n (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_and_n (wp, s->xp, s->yp, s->size));
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_and_n (wp, xp, yp, s->size));
}
double
speed_mpn_andn_n (struct speed_params *s)
{
-SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_andn_n (wp, s->xp, s->yp, s->size));
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_andn_n (wp, xp, yp, s->size));
}
double
speed_mpn_nand_n (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nand_n (wp, s->xp, s->yp, s->size));
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nand_n (wp, xp, yp, s->size));
}
double
speed_mpn_ior_n (struct speed_params *s)
{
-SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_ior_n (wp, s->xp, s->yp, s->size));
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_ior_n (wp, xp, yp, s->size));
}
double
speed_mpn_iorn_n (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_iorn_n (wp, s->xp, s->yp, s->size));
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_iorn_n (wp, xp, yp, s->size));
}
double
speed_mpn_nior_n (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nior_n (wp, s->xp, s->yp, s->size));
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_nior_n (wp, xp, yp, s->size));
}
double
speed_mpn_xor_n (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xor_n (wp, s->xp, s->yp, s->size));
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xor_n (wp, xp, yp, s->size));
}
double
speed_mpn_xnor_n (struct speed_params *s)
{
- SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xnor_n (wp, s->xp, s->yp, s->size));
+ SPEED_ROUTINE_MPN_BINARY_N_CALL (mpn_xnor_n (wp, xp, yp, s->size));
}
}
#endif
+#if HAVE_NATIVE_mpn_sqr_diag_addlsh1
+double
+speed_mpn_sqr_diag_addlsh1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_SQR_DIAG_ADDLSH1_CALL (mpn_sqr_diag_addlsh1 (wp, tp, s->xp, s->size));
+}
+#endif
+
double
speed_mpn_toom2_sqr (struct speed_params *s)
{
{
SPEED_ROUTINE_MPN_TOOM53_FOR_TOOM42_MUL (mpn_toom53_mul);
}
+double
+speed_mpn_toom43_for_toom54_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM54_MUL (mpn_toom43_mul);
+}
+double
+speed_mpn_toom54_for_toom43_mul (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM54_FOR_TOOM43_MUL (mpn_toom54_mul);
+}
double
speed_mpn_nussbaumer_mul (struct speed_params *s)
SPEED_ROUTINE_MPN_MULLO_BASECASE (mpn_mullo_basecase);
}
+double
+speed_mpn_mulmid_basecase (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MULMID (mpn_mulmid_basecase);
+}
+
+double
+speed_mpn_mulmid (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MULMID (mpn_mulmid);
+}
+
+double
+speed_mpn_mulmid_n (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_MULMID_N (mpn_mulmid_n);
+}
+
+double
+speed_mpn_toom42_mulmid (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_TOOM42_MULMID (mpn_toom42_mulmid);
+}
+
double
speed_mpn_mulmod_bnm1 (struct speed_params *s)
{
double
speed_mpn_hgcd (struct speed_params *s)
{
- mp_ptr wp;
- mp_size_t hgcd_init_scratch = MPN_HGCD_MATRIX_INIT_ITCH (s->size);
- mp_size_t hgcd_scratch = mpn_hgcd_itch (s->size);
- mp_ptr ap;
- mp_ptr bp;
- mp_ptr tmp1;
-
- struct hgcd_matrix hgcd;
- int res;
- unsigned i;
- double t;
- TMP_DECL;
-
- if (s->size < 2)
- return -1;
-
- TMP_MARK;
-
- SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp);
- SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp);
-
- s->xp[s->size - 1] |= 1;
- s->yp[s->size - 1] |= 1;
-
- SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_scratch, s->align_wp);
- SPEED_TMP_ALLOC_LIMBS (wp, hgcd_scratch, s->align_wp);
-
- speed_starttime ();
- i = s->reps;
- do
- {
- MPN_COPY (ap, s->xp, s->size);
- MPN_COPY (bp, s->yp, s->size);
- mpn_hgcd_matrix_init (&hgcd, s->size, tmp1);
- res = mpn_hgcd (ap, bp, s->size, &hgcd, wp);
- }
- while (--i != 0);
- t = speed_endtime ();
- TMP_FREE;
- return t;
+ SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd, mpn_hgcd_itch);
}
double
speed_mpn_hgcd_lehmer (struct speed_params *s)
{
- mp_ptr wp;
- mp_size_t hgcd_init_scratch = MPN_HGCD_MATRIX_INIT_ITCH (s->size);
- mp_size_t hgcd_scratch = MPN_HGCD_LEHMER_ITCH (s->size);
- mp_ptr ap;
- mp_ptr bp;
- mp_ptr tmp1;
-
- struct hgcd_matrix hgcd;
- int res;
- unsigned i;
- double t;
- TMP_DECL;
-
- if (s->size < 2)
- return -1;
-
- TMP_MARK;
-
- SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp);
- SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp);
+ SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_lehmer, mpn_hgcd_lehmer_itch);
+}
- s->xp[s->size - 1] |= 1;
- s->yp[s->size - 1] |= 1;
+double
+speed_mpn_hgcd_appr (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_appr, mpn_hgcd_appr_itch);
+}
- SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_scratch, s->align_wp);
- SPEED_TMP_ALLOC_LIMBS (wp, hgcd_scratch, s->align_wp);
+double
+speed_mpn_hgcd_appr_lehmer (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd_appr_lehmer, mpn_hgcd_appr_lehmer_itch);
+}
- speed_starttime ();
- i = s->reps;
- do
- {
- MPN_COPY (ap, s->xp, s->size);
- MPN_COPY (bp, s->yp, s->size);
- mpn_hgcd_matrix_init (&hgcd, s->size, tmp1);
- res = mpn_hgcd_lehmer (ap, bp, s->size, &hgcd, wp);
- }
- while (--i != 0);
- t = speed_endtime ();
- TMP_FREE;
- return t;
+double
+speed_mpn_hgcd_reduce (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce, mpn_hgcd_reduce_itch);
+}
+double
+speed_mpn_hgcd_reduce_1 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_1, mpn_hgcd_reduce_1_itch);
+}
+double
+speed_mpn_hgcd_reduce_2 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL (mpn_hgcd_reduce_2, mpn_hgcd_reduce_2_itch);
}
double
{
SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_3);
}
+double
+speed_mpn_jacobi_base_4 (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPN_JACBASE (mpn_jacobi_base_4);
+}
double
SPEED_ROUTINE_MPZ_POWM (mpz_powm_redc);
}
double
+speed_mpz_powm_sec (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPZ_POWM (mpz_powm_sec);
+}
+double
speed_mpz_powm_ui (struct speed_params *s)
{
SPEED_ROUTINE_MPZ_POWM_UI (mpz_powm_ui);
return t;
}
+/* If r==0, calculate binomial(2^size,size),
+ otherwise calculate binomial(2^size,r). */
+
+double
+speed_mpz_bin_ui (struct speed_params *s)
+{
+ mpz_t w, x;
+ unsigned long k;
+ unsigned i;
+ double t;
+
+ mpz_init (w);
+ mpz_init_set_ui (x, 0);
+
+ mpz_setbit (x, s->size);
+
+ if (s->r != 0)
+ k = s->r;
+ else
+ k = s->size;
+
+ speed_starttime ();
+ i = s->reps;
+ do
+ {
+ mpz_bin_ui (w, x, k);
+ }
+ while (--i != 0);
+ t = speed_endtime ();
+
+ mpz_clear (w);
+ mpz_clear (x);
+ return t;
+}
/* The multiplies are successively dependent so the latency is measured, not
the issue rate. There's only 10 per loop so the code doesn't get too big
SPEED_ROUTINE_UDIV_QRNND_B;
}
-double
-speed_udiv_qrnnd_preinv1 (struct speed_params *s)
-{
- SPEED_ROUTINE_UDIV_QRNND_A (1);
- {
- udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
- udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
- udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
- udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
- udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
- udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
- udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
- udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
- udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
- udiv_qrnnd_preinv1 (q, r, r, q, d, dinv);
- }
- SPEED_ROUTINE_UDIV_QRNND_B;
-}
-
-double
-speed_udiv_qrnnd_preinv2 (struct speed_params *s)
-{
- SPEED_ROUTINE_UDIV_QRNND_A (1);
- {
- udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
- udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
- udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
- udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
- udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
- udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
- udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
- udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
- udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
- udiv_qrnnd_preinv2 (q, r, r, q, d, dinv);
- }
- SPEED_ROUTINE_UDIV_QRNND_B;
-}
-
double
speed_udiv_qrnnd_c (struct speed_params *s)
{
--- /dev/null
+/* mpn/generic/hgcd_appr.c forced to use Lehmer's quadratic algorithm. */
+
+/*
+Copyright 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef HGCD_APPR_THRESHOLD
+#define HGCD_APPR_THRESHOLD MP_SIZE_T_MAX
+#define __gmpn_hgcd_appr mpn_hgcd_appr_lehmer
+#define __gmpn_hgcd_appr_itch mpn_hgcd_appr_lehmer_itch
+
+#include "../mpn/generic/hgcd_appr.c"
--- /dev/null
+/* mpn/generic/hgcd.c forced to use Lehmer's quadratic algorithm. */
+
+/*
+Copyright 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef HGCD_THRESHOLD
+#define HGCD_THRESHOLD MP_SIZE_T_MAX
+#define __gmpn_hgcd mpn_hgcd_lehmer
+#define __gmpn_hgcd_itch mpn_hgcd_lehmer_itch
+
+#include "../mpn/generic/hgcd.c"
--- /dev/null
+/* mpn/generic/hgcd_reduce.c forced to use hgcd. */
+
+/*
+Copyright 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef HGCD_REDUCE_THRESHOLD
+#define HGCD_REDUCE_THRESHOLD MP_SIZE_T_MAX
+#define __gmpn_hgcd_reduce mpn_hgcd_reduce_1
+#define __gmpn_hgcd_reduce_itch mpn_hgcd_reduce_1_itch
+
+
+#include "../mpn/generic/hgcd_reduce.c"
--- /dev/null
+/* mpn/generic/hgcd_reduce.c forced to use hgcd_appr. */
+
+/*
+Copyright 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef HGCD_REDUCE_THRESHOLD
+#define HGCD_REDUCE_THRESHOLD 0
+#define __gmpn_hgcd_reduce mpn_hgcd_reduce_2
+#define __gmpn_hgcd_reduce_itch mpn_hgcd_reduce_2_itch
+
+#include "../mpn/generic/hgcd_reduce.c"
--- /dev/null
+/* mpn/generic/jacbase.c method 4.
+
+Copyright 2002, 2010 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef JACOBI_BASE_METHOD
+#define JACOBI_BASE_METHOD 4
+#define __gmpn_jacobi_base mpn_jacobi_base_4
+
+#include "mpn/generic/jacbase.c"
--- /dev/null
+/* mpn/generic/mod_1_1.c method 1.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef MOD_1_1P_METHOD
+#define MOD_1_1P_METHOD 1
+#undef mpn_mod_1_1p
+#undef mpn_mod_1_1p_cps
+#define mpn_mod_1_1p mpn_mod_1_1p_1
+#define mpn_mod_1_1p_cps mpn_mod_1_1p_cps_1
+
+#include "mpn/generic/mod_1_1.c"
--- /dev/null
+/* mpn/generic/mod_1_1.c method 2.
+
+Copyright 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#include "gmp.h"
+#include "gmp-impl.h"
+
+#undef MOD_1_1P_METHOD
+#define MOD_1_1P_METHOD 2
+#undef mpn_mod_1_1p
+#undef mpn_mod_1_1p_cps
+#define mpn_mod_1_1p mpn_mod_1_1p_2
+#define mpn_mod_1_1p_cps mpn_mod_1_1p_cps_2
+
+#include "mpn/generic/mod_1_1.c"
#define SPEED_EXTRA_PROTOS \
- double speed_mean_calls __GMP_PROTO ((struct speed_params *s)); \
- double speed_mean_open __GMP_PROTO ((struct speed_params *s)); \
- double speed_mean_open2 __GMP_PROTO ((struct speed_params *s));
+ double speed_mean_calls (struct speed_params *s); \
+ double speed_mean_open (struct speed_params *s); \
+ double speed_mean_open2 (struct speed_params *s);
#define SPEED_EXTRA_ROUTINES \
{ "mean_calls", speed_mean_calls }, \
/* Speed measuring program.
-Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2008, 2009, 2010 Free
-Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2008, 2009, 2010,
+2011, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
{ "mpn_add_n", speed_mpn_add_n, FLAG_R_OPTIONAL },
{ "mpn_sub_n", speed_mpn_sub_n, FLAG_R_OPTIONAL },
+ { "mpn_add_err1_n", speed_mpn_add_err1_n },
+ { "mpn_add_err2_n", speed_mpn_add_err2_n },
+ { "mpn_add_err3_n", speed_mpn_add_err3_n },
+ { "mpn_sub_err1_n", speed_mpn_sub_err1_n },
+ { "mpn_sub_err2_n", speed_mpn_sub_err2_n },
+ { "mpn_sub_err3_n", speed_mpn_sub_err3_n },
+
#if HAVE_NATIVE_mpn_add_n_sub_n
{ "mpn_add_n_sub_n", speed_mpn_add_n_sub_n, FLAG_R_OPTIONAL },
#endif
#if HAVE_NATIVE_mpn_mul_4
{ "mpn_mul_4", speed_mpn_mul_4, FLAG_R_OPTIONAL },
#endif
+#if HAVE_NATIVE_mpn_mul_5
+ { "mpn_mul_5", speed_mpn_mul_5, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_mul_6
+ { "mpn_mul_6", speed_mpn_mul_6, FLAG_R_OPTIONAL },
+#endif
{ "mpn_divrem_1", speed_mpn_divrem_1, FLAG_R },
{ "mpn_divrem_1f", speed_mpn_divrem_1f, FLAG_R },
{ "mpn_divrem_1c", speed_mpn_divrem_1c, FLAG_R },
{ "mpn_divrem_1cf", speed_mpn_divrem_1cf,FLAG_R },
#endif
- { "mpn_mod_1", speed_mpn_mod_1, FLAG_R_OPTIONAL },
+ { "mpn_mod_1", speed_mpn_mod_1, FLAG_R },
#if HAVE_NATIVE_mpn_mod_1c
- { "mpn_mod_1c", speed_mpn_mod_1c, FLAG_R_OPTIONAL },
+ { "mpn_mod_1c", speed_mpn_mod_1c, FLAG_R },
#endif
{ "mpn_preinv_divrem_1", speed_mpn_preinv_divrem_1, FLAG_R },
{ "mpn_preinv_divrem_1f", speed_mpn_preinv_divrem_1f, FLAG_R },
{ "mpn_preinv_mod_1", speed_mpn_preinv_mod_1, FLAG_R },
- { "mpn_mod_1_1", speed_mpn_mod_1_1, FLAG_R_OPTIONAL },
- { "mpn_mod_1s_2", speed_mpn_mod_1_2, FLAG_R_OPTIONAL },
- { "mpn_mod_1s_3", speed_mpn_mod_1_3, FLAG_R_OPTIONAL },
- { "mpn_mod_1s_4", speed_mpn_mod_1_4, FLAG_R_OPTIONAL },
+ { "mpn_mod_1_1", speed_mpn_mod_1_1, FLAG_R },
+ { "mpn_mod_1_1_1", speed_mpn_mod_1_1_1, FLAG_R },
+ { "mpn_mod_1_1_2", speed_mpn_mod_1_1_2, FLAG_R },
+ { "mpn_mod_1s_2", speed_mpn_mod_1_2, FLAG_R },
+ { "mpn_mod_1s_3", speed_mpn_mod_1_3, FLAG_R },
+ { "mpn_mod_1s_4", speed_mpn_mod_1_4, FLAG_R },
{ "mpn_divrem_1_div", speed_mpn_divrem_1_div, FLAG_R },
{ "mpn_divrem_1_inv", speed_mpn_divrem_1_inv, FLAG_R },
{ "mpn_divrem_2_div", speed_mpn_divrem_2_div, },
{ "mpn_divrem_2_inv", speed_mpn_divrem_2_inv, },
+ { "mpn_div_qr_2n", speed_mpn_div_qr_2n, },
+ { "mpn_div_qr_2u", speed_mpn_div_qr_2u, },
+
{ "mpn_divexact_1", speed_mpn_divexact_1, FLAG_R },
{ "mpn_divexact_by3", speed_mpn_divexact_by3 },
- { "mpn_bdiv_q_1", speed_mpn_bdiv_q_1, FLAG_R_OPTIONAL },
+ { "mpn_bdiv_q_1", speed_mpn_bdiv_q_1, FLAG_R },
{ "mpn_pi1_bdiv_q_1", speed_mpn_pi1_bdiv_q_1, FLAG_R_OPTIONAL },
{ "mpn_bdiv_dbm1c", speed_mpn_bdiv_dbm1c, FLAG_R_OPTIONAL },
{ "mpn_hgcd", speed_mpn_hgcd },
{ "mpn_hgcd_lehmer", speed_mpn_hgcd_lehmer },
+ { "mpn_hgcd_appr", speed_mpn_hgcd_appr },
+ { "mpn_hgcd_appr_lehmer", speed_mpn_hgcd_appr_lehmer },
+
+ { "mpn_hgcd_reduce", speed_mpn_hgcd_reduce },
+ { "mpn_hgcd_reduce_1", speed_mpn_hgcd_reduce_1 },
+ { "mpn_hgcd_reduce_2", speed_mpn_hgcd_reduce_2 },
{ "mpn_gcd_1", speed_mpn_gcd_1, FLAG_R_OPTIONAL },
{ "mpn_gcd_1N", speed_mpn_gcd_1N, FLAG_R_OPTIONAL },
{ "mpn_gcd", speed_mpn_gcd },
-#if 0
- { "mpn_gcd_binary", speed_mpn_gcd_binary },
- { "mpn_gcd_accel", speed_mpn_gcd_accel },
- { "find_a", speed_find_a, FLAG_NODATA },
-#endif
{ "mpn_gcdext", speed_mpn_gcdext },
{ "mpn_gcdext_single", speed_mpn_gcdext_single },
{ "mpn_jacobi_base_1", speed_mpn_jacobi_base_1 },
{ "mpn_jacobi_base_2", speed_mpn_jacobi_base_2 },
{ "mpn_jacobi_base_3", speed_mpn_jacobi_base_3 },
+ { "mpn_jacobi_base_4", speed_mpn_jacobi_base_4 },
{ "mpn_mul", speed_mpn_mul, FLAG_R_OPTIONAL },
{ "mpn_mul_basecase", speed_mpn_mul_basecase,FLAG_R_OPTIONAL },
#if HAVE_NATIVE_mpn_sqr_diagonal
{ "mpn_sqr_diagonal", speed_mpn_sqr_diagonal },
#endif
+#if HAVE_NATIVE_mpn_sqr_diag_addlsh1
+ { "mpn_sqr_diag_addlsh1", speed_mpn_sqr_diag_addlsh1 },
+#endif
{ "mpn_mul_n", speed_mpn_mul_n },
{ "mpn_sqr", speed_mpn_sqr },
{ "mpn_mullo_n", speed_mpn_mullo_n },
{ "mpn_mullo_basecase", speed_mpn_mullo_basecase },
+ { "mpn_mulmid_basecase", speed_mpn_mulmid_basecase, FLAG_R_OPTIONAL },
+ { "mpn_toom42_mulmid", speed_mpn_toom42_mulmid },
+ { "mpn_mulmid_n", speed_mpn_mulmid_n },
+ { "mpn_mulmid", speed_mpn_mulmid, FLAG_R_OPTIONAL },
+
{ "mpn_bc_mulmod_bnm1", speed_mpn_bc_mulmod_bnm1 },
{ "mpn_mulmod_bnm1", speed_mpn_mulmod_bnm1 },
{ "mpn_mulmod_bnm1_rounded", speed_mpn_mulmod_bnm1_rounded },
{ "mpn_sbpi1_bdiv_q", speed_mpn_sbpi1_bdiv_q },
{ "mpn_dcpi1_bdiv_q", speed_mpn_dcpi1_bdiv_q },
+ { "mpn_broot", speed_mpn_broot, FLAG_R },
+ { "mpn_broot_invm1", speed_mpn_broot_invm1, FLAG_R },
+ { "mpn_brootinv", speed_mpn_brootinv, FLAG_R },
+
{ "mpn_get_str", speed_mpn_get_str, FLAG_R_OPTIONAL },
{ "mpn_set_str", speed_mpn_set_str, FLAG_R_OPTIONAL },
{ "mpn_set_str_basecase", speed_mpn_bc_set_str, FLAG_R_OPTIONAL },
{ "mpz_add", speed_mpz_add },
{ "mpz_bin_uiui", speed_mpz_bin_uiui, FLAG_NODATA | FLAG_R_OPTIONAL },
+ { "mpz_bin_ui", speed_mpz_bin_ui, FLAG_NODATA | FLAG_R_OPTIONAL },
{ "mpz_fac_ui", speed_mpz_fac_ui, FLAG_NODATA },
{ "mpz_powm", speed_mpz_powm },
{ "mpz_powm_mod", speed_mpz_powm_mod },
{ "mpz_powm_redc", speed_mpz_powm_redc },
+ { "mpz_powm_sec", speed_mpz_powm_sec },
{ "mpz_powm_ui", speed_mpz_powm_ui, FLAG_R_OPTIONAL },
{ "mpz_mod", speed_mpz_mod },
#if HAVE_NATIVE_mpn_copyd
{ "mpn_copyd", speed_mpn_copyd },
#endif
+ { "mpn_tabselect", speed_mpn_tabselect, FLAG_R_OPTIONAL },
#if HAVE_NATIVE_mpn_addlsh1_n
- { "mpn_addlsh1_n", speed_mpn_addlsh1_n },
+ { "mpn_addlsh1_n", speed_mpn_addlsh1_n, FLAG_R_OPTIONAL },
#endif
#if HAVE_NATIVE_mpn_sublsh1_n
- { "mpn_sublsh1_n", speed_mpn_sublsh1_n },
+ { "mpn_sublsh1_n", speed_mpn_sublsh1_n, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_addlsh1_n_ip1
+ { "mpn_addlsh1_n_ip1", speed_mpn_addlsh1_n_ip1 },
+#endif
+#if HAVE_NATIVE_mpn_addlsh1_n_ip2
+ { "mpn_addlsh1_n_ip2", speed_mpn_addlsh1_n_ip2 },
+#endif
+#if HAVE_NATIVE_mpn_sublsh1_n_ip1
+ { "mpn_sublsh1_n_ip1", speed_mpn_sublsh1_n_ip1 },
#endif
#if HAVE_NATIVE_mpn_rsblsh1_n
- { "mpn_rsblsh1_n", speed_mpn_rsblsh1_n },
+ { "mpn_rsblsh1_n", speed_mpn_rsblsh1_n, FLAG_R_OPTIONAL },
#endif
#if HAVE_NATIVE_mpn_addlsh2_n
- { "mpn_addlsh2_n", speed_mpn_addlsh2_n },
+ { "mpn_addlsh2_n", speed_mpn_addlsh2_n, FLAG_R_OPTIONAL },
#endif
#if HAVE_NATIVE_mpn_sublsh2_n
- { "mpn_sublsh2_n", speed_mpn_sublsh2_n },
+ { "mpn_sublsh2_n", speed_mpn_sublsh2_n, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n_ip1
+ { "mpn_addlsh2_n_ip1", speed_mpn_addlsh2_n_ip1 },
+#endif
+#if HAVE_NATIVE_mpn_addlsh2_n_ip2
+ { "mpn_addlsh2_n_ip2", speed_mpn_addlsh2_n_ip2 },
+#endif
+#if HAVE_NATIVE_mpn_sublsh2_n_ip1
+ { "mpn_sublsh2_n_ip1", speed_mpn_sublsh2_n_ip1 },
#endif
#if HAVE_NATIVE_mpn_rsblsh2_n
- { "mpn_rsblsh2_n", speed_mpn_rsblsh2_n },
+ { "mpn_rsblsh2_n", speed_mpn_rsblsh2_n, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n
+ { "mpn_addlsh_n", speed_mpn_addlsh_n, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_sublsh_n
+ { "mpn_sublsh_n", speed_mpn_sublsh_n, FLAG_R_OPTIONAL },
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n_ip1
+ { "mpn_addlsh_n_ip1", speed_mpn_addlsh_n_ip1 },
+#endif
+#if HAVE_NATIVE_mpn_addlsh_n_ip2
+ { "mpn_addlsh_n_ip2", speed_mpn_addlsh_n_ip2 },
+#endif
+#if HAVE_NATIVE_mpn_sublsh_n_ip1
+ { "mpn_sublsh_n_ip1", speed_mpn_sublsh_n_ip1 },
+#endif
+#if HAVE_NATIVE_mpn_rsblsh_n
+ { "mpn_rsblsh_n", speed_mpn_rsblsh_n, FLAG_R_OPTIONAL },
#endif
#if HAVE_NATIVE_mpn_rsh1add_n
- { "mpn_rsh1add_n", speed_mpn_rsh1add_n },
+ { "mpn_rsh1add_n", speed_mpn_rsh1add_n, FLAG_R_OPTIONAL },
#endif
#if HAVE_NATIVE_mpn_rsh1sub_n
- { "mpn_rsh1sub_n", speed_mpn_rsh1sub_n },
+ { "mpn_rsh1sub_n", speed_mpn_rsh1sub_n, FLAG_R_OPTIONAL },
#endif
+ { "mpn_addcnd_n", speed_mpn_addcnd_n, FLAG_R_OPTIONAL },
+ { "mpn_subcnd_n", speed_mpn_subcnd_n, FLAG_R_OPTIONAL },
+
{ "MPN_ZERO", speed_MPN_ZERO },
{ "binvert_limb", speed_binvert_limb, FLAG_NODATA },
{ "count_trailing_zeros", speed_count_trailing_zeros, FLAG_NODATA | FLAG_R_OPTIONAL },
{ "udiv_qrnnd", speed_udiv_qrnnd, FLAG_R_OPTIONAL },
- { "udiv_qrnnd_preinv1", speed_udiv_qrnnd_preinv1, FLAG_R_OPTIONAL },
- { "udiv_qrnnd_preinv2", speed_udiv_qrnnd_preinv2, FLAG_R_OPTIONAL },
{ "udiv_qrnnd_c", speed_udiv_qrnnd_c, FLAG_R_OPTIONAL },
#if HAVE_NATIVE_mpn_udiv_qrnnd
{ "mpn_udiv_qrnnd", speed_mpn_udiv_qrnnd, FLAG_R_OPTIONAL },
fprintf (fp, "set key left\n");
/* designed to make it possible to see crossovers easily */
- fprintf (fp, "set data style lines\n");
+ fprintf (fp, "set style data lines\n");
fprintf (fp, "plot ");
for (i = 0; i < num_choices; i++)
/* Header for speed and threshold things.
-Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2008, 2009, 2010 Free
-Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2008, 2009, 2010, 2011,
+2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
extern double speed_cycletime;
extern int speed_precision;
extern char speed_time_string[];
-void speed_time_init __GMP_PROTO ((void));
-void speed_cycletime_fail __GMP_PROTO ((const char *str));
-void speed_cycletime_init __GMP_PROTO ((void));
-void speed_cycletime_need_cycles __GMP_PROTO ((void));
-void speed_cycletime_need_seconds __GMP_PROTO ((void));
-void speed_starttime __GMP_PROTO ((void));
-double speed_endtime __GMP_PROTO ((void));
+void speed_time_init (void);
+void speed_cycletime_fail (const char *str);
+void speed_cycletime_init (void);
+void speed_cycletime_need_cycles (void);
+void speed_cycletime_need_seconds (void);
+void speed_starttime (void);
+double speed_endtime (void);
struct speed_params {
struct {
mp_ptr ptr;
mp_size_t size;
- } src[3], dst[3];
+ } src[5], dst[4];
};
-typedef double (*speed_function_t) __GMP_PROTO ((struct speed_params *s));
+typedef double (*speed_function_t) (struct speed_params *);
-double speed_measure __GMP_PROTO ((speed_function_t fun, struct speed_params *s));
+double speed_measure (speed_function_t fun, struct speed_params *);
/* Prototypes for speed measuring routines */
-double speed_back_to_back __GMP_PROTO ((struct speed_params *s));
-double speed_count_leading_zeros __GMP_PROTO ((struct speed_params *s));
-double speed_count_trailing_zeros __GMP_PROTO ((struct speed_params *s));
-double speed_find_a __GMP_PROTO ((struct speed_params *s));
-double speed_gmp_allocate_free __GMP_PROTO ((struct speed_params *s));
-double speed_gmp_allocate_reallocate_free __GMP_PROTO ((struct speed_params *s));
-double speed_invert_limb __GMP_PROTO ((struct speed_params *s));
-double speed_malloc_free __GMP_PROTO ((struct speed_params *s));
-double speed_malloc_realloc_free __GMP_PROTO ((struct speed_params *s));
-double speed_memcpy __GMP_PROTO ((struct speed_params *s));
-double speed_binvert_limb __GMP_PROTO ((struct speed_params *s));
-double speed_binvert_limb_mul1 __GMP_PROTO ((struct speed_params *s));
-double speed_binvert_limb_loop __GMP_PROTO ((struct speed_params *s));
-double speed_binvert_limb_cond __GMP_PROTO ((struct speed_params *s));
-double speed_binvert_limb_arith __GMP_PROTO ((struct speed_params *s));
-
-double speed_mpf_init_clear __GMP_PROTO ((struct speed_params *s));
-
-double speed_mpn_add_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_addlsh1_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_addlsh2_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_add_n_sub_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_and_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_andn_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_addmul_1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_addmul_2 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_addmul_3 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_addmul_4 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_addmul_5 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_addmul_6 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_addmul_7 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_addmul_8 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_com __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_copyd __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_copyi __GMP_PROTO ((struct speed_params *s));
-double speed_MPN_COPY __GMP_PROTO ((struct speed_params *s));
-double speed_MPN_COPY_DECR __GMP_PROTO ((struct speed_params *s));
-double speed_MPN_COPY_INCR __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_divexact_1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_divexact_by3 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_bdiv_q_1 __GMP_PROTO ((struct speed_params *));
-double speed_mpn_pi1_bdiv_q_1 __GMP_PROTO ((struct speed_params *));
-double speed_mpn_bdiv_dbm1c __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_divrem_1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_divrem_1f __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_divrem_1c __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_divrem_1cf __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_divrem_1_div __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_divrem_1f_div __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_divrem_1_inv __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_divrem_1f_inv __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_divrem_2 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_divrem_2_div __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_divrem_2_inv __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_fib2_ui __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_matrix22_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_hgcd __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_hgcd_lehmer __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_gcd __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_gcd_1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_gcd_1N __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_gcdext __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_gcdext_double __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_gcdext_one_double __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_gcdext_one_single __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_gcdext_single __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_get_str __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_hamdist __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_ior_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_iorn_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_jacobi_base __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_jacobi_base_1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_jacobi_base_2 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_jacobi_base_3 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_lshift __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_lshiftc __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mod_1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mod_1c __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mod_1_div __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mod_1_inv __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mod_1_1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mod_1_2 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mod_1_3 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mod_1_4 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mod_34lsub1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_modexact_1_odd __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_modexact_1c_odd __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mul_1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mul_1_inplace __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mul_2 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mul_3 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mul_4 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mul_basecase __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mul_fft __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mul_fft_sqr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_fft_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_fft_sqr __GMP_PROTO ((struct speed_params *s));
+double speed_back_to_back (struct speed_params *);
+double speed_count_leading_zeros (struct speed_params *);
+double speed_count_trailing_zeros (struct speed_params *);
+double speed_find_a (struct speed_params *);
+double speed_gmp_allocate_free (struct speed_params *);
+double speed_gmp_allocate_reallocate_free (struct speed_params *);
+double speed_invert_limb (struct speed_params *);
+double speed_malloc_free (struct speed_params *);
+double speed_malloc_realloc_free (struct speed_params *);
+double speed_memcpy (struct speed_params *);
+double speed_binvert_limb (struct speed_params *);
+double speed_binvert_limb_mul1 (struct speed_params *);
+double speed_binvert_limb_loop (struct speed_params *);
+double speed_binvert_limb_cond (struct speed_params *);
+double speed_binvert_limb_arith (struct speed_params *);
+
+double speed_mpf_init_clear (struct speed_params *);
+
+double speed_mpn_add_n (struct speed_params *);
+double speed_mpn_add_err1_n (struct speed_params *);
+double speed_mpn_add_err2_n (struct speed_params *);
+double speed_mpn_add_err3_n (struct speed_params *);
+double speed_mpn_addcnd_n (struct speed_params *);
+double speed_mpn_addlsh_n (struct speed_params *);
+double speed_mpn_addlsh1_n (struct speed_params *);
+double speed_mpn_addlsh2_n (struct speed_params *);
+double speed_mpn_addlsh_n_ip1 (struct speed_params *);
+double speed_mpn_addlsh1_n_ip1 (struct speed_params *);
+double speed_mpn_addlsh2_n_ip1 (struct speed_params *);
+double speed_mpn_addlsh_n_ip2 (struct speed_params *);
+double speed_mpn_addlsh1_n_ip2 (struct speed_params *);
+double speed_mpn_addlsh2_n_ip2 (struct speed_params *);
+double speed_mpn_add_n_sub_n (struct speed_params *);
+double speed_mpn_and_n (struct speed_params *);
+double speed_mpn_andn_n (struct speed_params *);
+double speed_mpn_addmul_1 (struct speed_params *);
+double speed_mpn_addmul_2 (struct speed_params *);
+double speed_mpn_addmul_3 (struct speed_params *);
+double speed_mpn_addmul_4 (struct speed_params *);
+double speed_mpn_addmul_5 (struct speed_params *);
+double speed_mpn_addmul_6 (struct speed_params *);
+double speed_mpn_addmul_7 (struct speed_params *);
+double speed_mpn_addmul_8 (struct speed_params *);
+double speed_mpn_com (struct speed_params *);
+double speed_mpn_copyd (struct speed_params *);
+double speed_mpn_copyi (struct speed_params *);
+double speed_MPN_COPY (struct speed_params *);
+double speed_MPN_COPY_DECR (struct speed_params *);
+double speed_MPN_COPY_INCR (struct speed_params *);
+double speed_mpn_tabselect (struct speed_params *);
+double speed_mpn_divexact_1 (struct speed_params *);
+double speed_mpn_divexact_by3 (struct speed_params *);
+double speed_mpn_bdiv_q_1 (struct speed_params *);
+double speed_mpn_pi1_bdiv_q_1 (struct speed_params *);
+double speed_mpn_bdiv_dbm1c (struct speed_params *);
+double speed_mpn_divrem_1 (struct speed_params *);
+double speed_mpn_divrem_1f (struct speed_params *);
+double speed_mpn_divrem_1c (struct speed_params *);
+double speed_mpn_divrem_1cf (struct speed_params *);
+double speed_mpn_divrem_1_div (struct speed_params *);
+double speed_mpn_divrem_1f_div (struct speed_params *);
+double speed_mpn_divrem_1_inv (struct speed_params *);
+double speed_mpn_divrem_1f_inv (struct speed_params *);
+double speed_mpn_divrem_2 (struct speed_params *);
+double speed_mpn_divrem_2_div (struct speed_params *);
+double speed_mpn_divrem_2_inv (struct speed_params *);
+double speed_mpn_div_qr_2n (struct speed_params *);
+double speed_mpn_div_qr_2u (struct speed_params *);
+double speed_mpn_fib2_ui (struct speed_params *);
+double speed_mpn_matrix22_mul (struct speed_params *);
+double speed_mpn_hgcd (struct speed_params *);
+double speed_mpn_hgcd_lehmer (struct speed_params *);
+double speed_mpn_hgcd_appr (struct speed_params *);
+double speed_mpn_hgcd_appr_lehmer (struct speed_params *);
+double speed_mpn_hgcd_reduce (struct speed_params *);
+double speed_mpn_hgcd_reduce_1 (struct speed_params *);
+double speed_mpn_hgcd_reduce_2 (struct speed_params *);
+double speed_mpn_gcd (struct speed_params *);
+double speed_mpn_gcd_1 (struct speed_params *);
+double speed_mpn_gcd_1N (struct speed_params *);
+double speed_mpn_gcdext (struct speed_params *);
+double speed_mpn_gcdext_double (struct speed_params *);
+double speed_mpn_gcdext_one_double (struct speed_params *);
+double speed_mpn_gcdext_one_single (struct speed_params *);
+double speed_mpn_gcdext_single (struct speed_params *);
+double speed_mpn_get_str (struct speed_params *);
+double speed_mpn_hamdist (struct speed_params *);
+double speed_mpn_ior_n (struct speed_params *);
+double speed_mpn_iorn_n (struct speed_params *);
+double speed_mpn_jacobi_base (struct speed_params *);
+double speed_mpn_jacobi_base_1 (struct speed_params *);
+double speed_mpn_jacobi_base_2 (struct speed_params *);
+double speed_mpn_jacobi_base_3 (struct speed_params *);
+double speed_mpn_jacobi_base_4 (struct speed_params *);
+double speed_mpn_lshift (struct speed_params *);
+double speed_mpn_lshiftc (struct speed_params *);
+double speed_mpn_mod_1 (struct speed_params *);
+double speed_mpn_mod_1c (struct speed_params *);
+double speed_mpn_mod_1_div (struct speed_params *);
+double speed_mpn_mod_1_inv (struct speed_params *);
+double speed_mpn_mod_1_1 (struct speed_params *);
+double speed_mpn_mod_1_1_1 (struct speed_params *);
+double speed_mpn_mod_1_1_2 (struct speed_params *);
+double speed_mpn_mod_1_2 (struct speed_params *);
+double speed_mpn_mod_1_3 (struct speed_params *);
+double speed_mpn_mod_1_4 (struct speed_params *);
+double speed_mpn_mod_34lsub1 (struct speed_params *);
+double speed_mpn_modexact_1_odd (struct speed_params *);
+double speed_mpn_modexact_1c_odd (struct speed_params *);
+double speed_mpn_mul_1 (struct speed_params *);
+double speed_mpn_mul_1_inplace (struct speed_params *);
+double speed_mpn_mul_2 (struct speed_params *);
+double speed_mpn_mul_3 (struct speed_params *);
+double speed_mpn_mul_4 (struct speed_params *);
+double speed_mpn_mul_5 (struct speed_params *);
+double speed_mpn_mul_6 (struct speed_params *);
+double speed_mpn_mul (struct speed_params *);
+double speed_mpn_mul_basecase (struct speed_params *);
+double speed_mpn_mulmid (struct speed_params *);
+double speed_mpn_mulmid_basecase (struct speed_params *);
+double speed_mpn_mul_fft (struct speed_params *);
+double speed_mpn_mul_fft_sqr (struct speed_params *);
+double speed_mpn_fft_mul (struct speed_params *);
+double speed_mpn_fft_sqr (struct speed_params *);
#if WANT_OLD_FFT_FULL
-double speed_mpn_mul_fft_full __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mul_fft_full_sqr __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_mul_fft_full (struct speed_params *);
+double speed_mpn_mul_fft_full_sqr (struct speed_params *);
#endif
-double speed_mpn_nussbaumer_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_nussbaumer_mul_sqr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mul_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mul_n_sqr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mullo_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mullo_basecase __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_nand_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_nior_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_popcount __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_preinv_divrem_1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_preinv_divrem_1f __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_preinv_mod_1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sbpi1_div_qr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_dcpi1_div_qr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sbpi1_divappr_q __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_dcpi1_divappr_q __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mu_div_qr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mu_divappr_q __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mupi_div_qr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mu_div_q __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sbpi1_bdiv_qr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_dcpi1_bdiv_qr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sbpi1_bdiv_q __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_dcpi1_bdiv_q __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mu_bdiv_q __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mu_bdiv_qr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_invert __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_invertappr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_ni_invertappr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_binvert __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_redc_1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_redc_2 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_redc_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_rsblsh1_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_rsblsh2_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_rsh1add_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_rsh1sub_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_rshift __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sb_divrem_m3 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sb_divrem_m3_div __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sb_divrem_m3_inv __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_set_str __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_bc_set_str __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_dc_set_str __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_set_str_pre __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sqr_basecase __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sqr_diagonal __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sqr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sqrtrem __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_rootrem __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sub_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sublsh1_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sublsh2_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_submul_1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom2_sqr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom3_sqr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom4_sqr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom6_sqr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom8_sqr __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom22_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom33_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom44_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom6h_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom8h_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom32_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom42_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom43_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom63_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom32_for_toom43_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom43_for_toom32_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom32_for_toom53_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom53_for_toom32_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom42_for_toom53_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_toom53_for_toom42_mul __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mulmod_bnm1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_bc_mulmod_bnm1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_mulmod_bnm1_rounded __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_sqrmod_bnm1 __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_udiv_qrnnd __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_udiv_qrnnd_r __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_umul_ppmm __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_umul_ppmm_r __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_xnor_n __GMP_PROTO ((struct speed_params *s));
-double speed_mpn_xor_n __GMP_PROTO ((struct speed_params *s));
-double speed_MPN_ZERO __GMP_PROTO ((struct speed_params *s));
-
-double speed_mpq_init_clear __GMP_PROTO ((struct speed_params *s));
-
-double speed_mpz_add __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_bin_uiui __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_fac_ui __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_fib_ui __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_fib2_ui __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_init_clear __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_init_realloc_clear __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_jacobi __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_lucnum_ui __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_lucnum2_ui __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_mod __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_powm __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_powm_mod __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_powm_redc __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_powm_ui __GMP_PROTO ((struct speed_params *s));
-double speed_mpz_urandomb __GMP_PROTO ((struct speed_params *s));
-
-double speed_gmp_randseed __GMP_PROTO ((struct speed_params *s));
-double speed_gmp_randseed_ui __GMP_PROTO ((struct speed_params *s));
-
-double speed_noop __GMP_PROTO ((struct speed_params *s));
-double speed_noop_wxs __GMP_PROTO ((struct speed_params *s));
-double speed_noop_wxys __GMP_PROTO ((struct speed_params *s));
-
-double speed_operator_div __GMP_PROTO ((struct speed_params *s));
-double speed_operator_mod __GMP_PROTO ((struct speed_params *s));
-
-double speed_udiv_qrnnd __GMP_PROTO ((struct speed_params *s));
-double speed_udiv_qrnnd_preinv1 __GMP_PROTO ((struct speed_params *s));
-double speed_udiv_qrnnd_preinv2 __GMP_PROTO ((struct speed_params *s));
-double speed_udiv_qrnnd_c __GMP_PROTO ((struct speed_params *s));
-double speed_umul_ppmm __GMP_PROTO ((struct speed_params *s));
+double speed_mpn_nussbaumer_mul (struct speed_params *);
+double speed_mpn_nussbaumer_mul_sqr (struct speed_params *);
+double speed_mpn_mul_n (struct speed_params *);
+double speed_mpn_mul_n_sqr (struct speed_params *);
+double speed_mpn_mulmid_n (struct speed_params *);
+double speed_mpn_mullo_n (struct speed_params *);
+double speed_mpn_mullo_basecase (struct speed_params *);
+double speed_mpn_nand_n (struct speed_params *);
+double speed_mpn_nior_n (struct speed_params *);
+double speed_mpn_popcount (struct speed_params *);
+double speed_mpn_preinv_divrem_1 (struct speed_params *);
+double speed_mpn_preinv_divrem_1f (struct speed_params *);
+double speed_mpn_preinv_mod_1 (struct speed_params *);
+double speed_mpn_sbpi1_div_qr (struct speed_params *);
+double speed_mpn_dcpi1_div_qr (struct speed_params *);
+double speed_mpn_sbpi1_divappr_q (struct speed_params *);
+double speed_mpn_dcpi1_divappr_q (struct speed_params *);
+double speed_mpn_mu_div_qr (struct speed_params *);
+double speed_mpn_mu_divappr_q (struct speed_params *);
+double speed_mpn_mupi_div_qr (struct speed_params *);
+double speed_mpn_mu_div_q (struct speed_params *);
+double speed_mpn_sbpi1_bdiv_qr (struct speed_params *);
+double speed_mpn_dcpi1_bdiv_qr (struct speed_params *);
+double speed_mpn_sbpi1_bdiv_q (struct speed_params *);
+double speed_mpn_dcpi1_bdiv_q (struct speed_params *);
+double speed_mpn_mu_bdiv_q (struct speed_params *);
+double speed_mpn_mu_bdiv_qr (struct speed_params *);
+double speed_mpn_broot (struct speed_params *);
+double speed_mpn_broot_invm1 (struct speed_params *);
+double speed_mpn_brootinv (struct speed_params *);
+double speed_mpn_invert (struct speed_params *);
+double speed_mpn_invertappr (struct speed_params *);
+double speed_mpn_ni_invertappr (struct speed_params *);
+double speed_mpn_binvert (struct speed_params *);
+double speed_mpn_redc_1 (struct speed_params *);
+double speed_mpn_redc_2 (struct speed_params *);
+double speed_mpn_redc_n (struct speed_params *);
+double speed_mpn_rsblsh_n (struct speed_params *);
+double speed_mpn_rsblsh1_n (struct speed_params *);
+double speed_mpn_rsblsh2_n (struct speed_params *);
+double speed_mpn_rsh1add_n (struct speed_params *);
+double speed_mpn_rsh1sub_n (struct speed_params *);
+double speed_mpn_rshift (struct speed_params *);
+double speed_mpn_sb_divrem_m3 (struct speed_params *);
+double speed_mpn_sb_divrem_m3_div (struct speed_params *);
+double speed_mpn_sb_divrem_m3_inv (struct speed_params *);
+double speed_mpn_set_str (struct speed_params *);
+double speed_mpn_bc_set_str (struct speed_params *);
+double speed_mpn_dc_set_str (struct speed_params *);
+double speed_mpn_set_str_pre (struct speed_params *);
+double speed_mpn_sqr_basecase (struct speed_params *);
+double speed_mpn_sqr_diag_addlsh1 (struct speed_params *);
+double speed_mpn_sqr_diagonal (struct speed_params *);
+double speed_mpn_sqr (struct speed_params *);
+double speed_mpn_sqrtrem (struct speed_params *);
+double speed_mpn_rootrem (struct speed_params *);
+double speed_mpn_sub_n (struct speed_params *);
+double speed_mpn_sub_err1_n (struct speed_params *);
+double speed_mpn_sub_err2_n (struct speed_params *);
+double speed_mpn_sub_err3_n (struct speed_params *);
+double speed_mpn_subcnd_n (struct speed_params *);
+double speed_mpn_sublsh_n (struct speed_params *);
+double speed_mpn_sublsh1_n (struct speed_params *);
+double speed_mpn_sublsh2_n (struct speed_params *);
+double speed_mpn_sublsh_n_ip1 (struct speed_params *);
+double speed_mpn_sublsh1_n_ip1 (struct speed_params *);
+double speed_mpn_sublsh2_n_ip1 (struct speed_params *);
+double speed_mpn_submul_1 (struct speed_params *);
+double speed_mpn_toom2_sqr (struct speed_params *);
+double speed_mpn_toom3_sqr (struct speed_params *);
+double speed_mpn_toom4_sqr (struct speed_params *);
+double speed_mpn_toom6_sqr (struct speed_params *);
+double speed_mpn_toom8_sqr (struct speed_params *);
+double speed_mpn_toom22_mul (struct speed_params *);
+double speed_mpn_toom33_mul (struct speed_params *);
+double speed_mpn_toom44_mul (struct speed_params *);
+double speed_mpn_toom6h_mul (struct speed_params *);
+double speed_mpn_toom8h_mul (struct speed_params *);
+double speed_mpn_toom32_mul (struct speed_params *);
+double speed_mpn_toom42_mul (struct speed_params *);
+double speed_mpn_toom43_mul (struct speed_params *);
+double speed_mpn_toom63_mul (struct speed_params *);
+double speed_mpn_toom32_for_toom43_mul (struct speed_params *);
+double speed_mpn_toom43_for_toom32_mul (struct speed_params *);
+double speed_mpn_toom32_for_toom53_mul (struct speed_params *);
+double speed_mpn_toom53_for_toom32_mul (struct speed_params *);
+double speed_mpn_toom42_for_toom53_mul (struct speed_params *);
+double speed_mpn_toom53_for_toom42_mul (struct speed_params *);
+double speed_mpn_toom43_for_toom54_mul (struct speed_params *);
+double speed_mpn_toom54_for_toom43_mul (struct speed_params *);
+double speed_mpn_toom42_mulmid (struct speed_params *);
+double speed_mpn_mulmod_bnm1 (struct speed_params *);
+double speed_mpn_bc_mulmod_bnm1 (struct speed_params *);
+double speed_mpn_mulmod_bnm1_rounded (struct speed_params *);
+double speed_mpn_sqrmod_bnm1 (struct speed_params *);
+double speed_mpn_udiv_qrnnd (struct speed_params *);
+double speed_mpn_udiv_qrnnd_r (struct speed_params *);
+double speed_mpn_umul_ppmm (struct speed_params *);
+double speed_mpn_umul_ppmm_r (struct speed_params *);
+double speed_mpn_xnor_n (struct speed_params *);
+double speed_mpn_xor_n (struct speed_params *);
+double speed_MPN_ZERO (struct speed_params *);
+
+double speed_mpq_init_clear (struct speed_params *);
+
+double speed_mpz_add (struct speed_params *);
+double speed_mpz_bin_uiui (struct speed_params *);
+double speed_mpz_bin_ui (struct speed_params *);
+double speed_mpz_fac_ui (struct speed_params *);
+double speed_mpz_fib_ui (struct speed_params *);
+double speed_mpz_fib2_ui (struct speed_params *);
+double speed_mpz_init_clear (struct speed_params *);
+double speed_mpz_init_realloc_clear (struct speed_params *);
+double speed_mpz_jacobi (struct speed_params *);
+double speed_mpz_lucnum_ui (struct speed_params *);
+double speed_mpz_lucnum2_ui (struct speed_params *);
+double speed_mpz_mod (struct speed_params *);
+double speed_mpz_powm (struct speed_params *);
+double speed_mpz_powm_mod (struct speed_params *);
+double speed_mpz_powm_redc (struct speed_params *);
+double speed_mpz_powm_sec (struct speed_params *);
+double speed_mpz_powm_ui (struct speed_params *);
+double speed_mpz_urandomb (struct speed_params *);
+
+double speed_gmp_randseed (struct speed_params *);
+double speed_gmp_randseed_ui (struct speed_params *);
+
+double speed_noop (struct speed_params *);
+double speed_noop_wxs (struct speed_params *);
+double speed_noop_wxys (struct speed_params *);
+
+double speed_operator_div (struct speed_params *);
+double speed_operator_mod (struct speed_params *);
+
+double speed_udiv_qrnnd (struct speed_params *);
+double speed_udiv_qrnnd_preinv1 (struct speed_params *);
+double speed_udiv_qrnnd_preinv2 (struct speed_params *);
+double speed_udiv_qrnnd_preinv3 (struct speed_params *);
+double speed_udiv_qrnnd_c (struct speed_params *);
+double speed_umul_ppmm (struct speed_params *);
/* Prototypes for other routines */
/* low 32-bits in p[0], high 32-bits in p[1] */
-void speed_cyclecounter __GMP_PROTO ((unsigned p[2]));
+void speed_cyclecounter (unsigned p[2]);
-void mftb_function __GMP_PROTO ((unsigned p[2]));
+void mftb_function (unsigned p[2]);
/* In i386 gcc -fPIC, ebx is a fixed register and can't be declared a dummy
output or a clobber for the cpuid, hence an explicit save and restore. A
clobber as such doesn't provoke an error unfortunately (gcc 3.0), so use
the dummy output style in non-PIC, so there's an error if somehow -fPIC
- is used without a -DPIC to tell us about it. */
+ is used without a -DPIC to tell us about it. */
#if defined(__GNUC__) && ! defined (NO_ASM) \
&& (defined (__i386__) || defined (__i486__))
#if defined (PIC) || defined (__APPLE_CC__)
#endif
#endif
-double speed_cyclecounter_diff __GMP_PROTO ((const unsigned [2], const unsigned [2]));
-int gettimeofday_microseconds_p __GMP_PROTO ((void));
-int getrusage_microseconds_p __GMP_PROTO ((void));
-int cycles_works_p __GMP_PROTO ((void));
-long clk_tck __GMP_PROTO ((void));
-double freq_measure __GMP_PROTO ((const char *, double (*)(void)));
-
-int double_cmp_ptr __GMP_PROTO ((const double *, const double *));
-void pentium_wbinvd __GMP_PROTO ((void));
-typedef int (*qsort_function_t) __GMP_PROTO ((const void *, const void *));
-
-void noop __GMP_PROTO ((void));
-void noop_1 __GMP_PROTO ((mp_limb_t));
-void noop_wxs __GMP_PROTO ((mp_ptr, mp_srcptr, mp_size_t));
-void noop_wxys __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
-void mpn_cache_fill __GMP_PROTO ((mp_srcptr, mp_size_t));
-void mpn_cache_fill_dummy __GMP_PROTO ((mp_limb_t));
-void speed_cache_fill __GMP_PROTO ((struct speed_params *));
-void speed_operand_src __GMP_PROTO ((struct speed_params *, mp_ptr, mp_size_t));
-void speed_operand_dst __GMP_PROTO ((struct speed_params *, mp_ptr, mp_size_t));
+double speed_cyclecounter_diff (const unsigned [2], const unsigned [2]);
+int gettimeofday_microseconds_p (void);
+int getrusage_microseconds_p (void);
+int cycles_works_p (void);
+long clk_tck (void);
+double freq_measure (const char *, double (*)(void));
+
+int double_cmp_ptr (const double *, const double *);
+void pentium_wbinvd (void);
+typedef int (*qsort_function_t) (const void *, const void *);
+
+void noop (void);
+void noop_1 (mp_limb_t);
+void noop_wxs (mp_ptr, mp_srcptr, mp_size_t);
+void noop_wxys (mp_ptr, mp_srcptr, mp_srcptr, mp_size_t);
+void mpn_cache_fill (mp_srcptr, mp_size_t);
+void mpn_cache_fill_dummy (mp_limb_t);
+void speed_cache_fill (struct speed_params *);
+void speed_operand_src (struct speed_params *, mp_ptr, mp_size_t);
+void speed_operand_dst (struct speed_params *, mp_ptr, mp_size_t);
extern int speed_option_addrs;
extern int speed_option_verbose;
-void speed_option_set __GMP_PROTO((const char *));
+extern int speed_option_cycles_broken;
+void speed_option_set (const char *);
-mp_limb_t mpn_divrem_1_div __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
-mp_limb_t mpn_divrem_1_inv __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
-mp_limb_t mpn_divrem_2_div __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr));
-mp_limb_t mpn_divrem_2_inv __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr));
+mp_limb_t mpn_divrem_1_div (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_divrem_1_inv (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_divrem_2_div (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr);
+mp_limb_t mpn_divrem_2_inv (mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr);
-int mpn_jacobi_base_1 __GMP_PROTO ((mp_limb_t, mp_limb_t, int));
-int mpn_jacobi_base_2 __GMP_PROTO ((mp_limb_t, mp_limb_t, int));
-int mpn_jacobi_base_3 __GMP_PROTO ((mp_limb_t, mp_limb_t, int));
+int mpn_jacobi_base_1 (mp_limb_t, mp_limb_t, int);
+int mpn_jacobi_base_2 (mp_limb_t, mp_limb_t, int);
+int mpn_jacobi_base_3 (mp_limb_t, mp_limb_t, int);
+int mpn_jacobi_base_4 (mp_limb_t, mp_limb_t, int);
-mp_limb_t mpn_mod_1_div __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t));
-mp_limb_t mpn_mod_1_inv __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t));
+mp_limb_t mpn_mod_1_div (mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_mod_1_inv (mp_srcptr, mp_size_t, mp_limb_t);
-mp_size_t mpn_gcd_binary
- __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
-mp_size_t mpn_gcd_accel
- __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
-mp_size_t mpn_gcdext_one_double
- __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
-mp_size_t mpn_gcdext_one_single
- __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
-mp_size_t mpn_gcdext_single
- __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
-mp_size_t mpn_gcdext_double
- __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
+mp_limb_t mpn_mod_1_1p_1 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
+mp_limb_t mpn_mod_1_1p_2 (mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t [4]);
-mp_limb_t mpn_sb_divrem_mn_div __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
-mp_limb_t mpn_sb_divrem_mn_inv __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
+void mpn_mod_1_1p_cps_1 (mp_limb_t [4], mp_limb_t);
+void mpn_mod_1_1p_cps_2 (mp_limb_t [4], mp_limb_t);
-mp_size_t mpn_set_str_basecase __GMP_PROTO ((mp_ptr, const unsigned char *, size_t, int));
-void mpn_pre_set_str __GMP_PROTO ((mp_ptr, unsigned char *, size_t, powers_t *, mp_ptr));
+mp_size_t mpn_gcdext_one_double (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
+mp_size_t mpn_gcdext_one_single (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
+mp_size_t mpn_gcdext_single (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
+mp_size_t mpn_gcdext_double (mp_ptr, mp_ptr, mp_size_t *, mp_ptr, mp_size_t, mp_ptr, mp_size_t);
+mp_size_t mpn_hgcd_lehmer (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
+mp_size_t mpn_hgcd_lehmer_itch (mp_size_t);
-void mpz_powm_mod __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr));
-void mpz_powm_redc __GMP_PROTO ((mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr));
+mp_size_t mpn_hgcd_appr_lehmer (mp_ptr, mp_ptr, mp_size_t, struct hgcd_matrix *, mp_ptr);
+mp_size_t mpn_hgcd_appr_lehmer_itch (mp_size_t);
-int speed_routine_count_zeros_setup
- __GMP_PROTO ((struct speed_params *, mp_ptr, int, int));
+mp_size_t mpn_hgcd_reduce_1 (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr);
+mp_size_t mpn_hgcd_reduce_1_itch (mp_size_t, mp_size_t);
+
+mp_size_t mpn_hgcd_reduce_2 (struct hgcd_matrix *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_ptr);
+mp_size_t mpn_hgcd_reduce_2_itch (mp_size_t, mp_size_t);
+
+mp_limb_t mpn_sb_divrem_mn_div (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t);
+mp_limb_t mpn_sb_divrem_mn_inv (mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t);
+
+mp_size_t mpn_set_str_basecase (mp_ptr, const unsigned char *, size_t, int);
+void mpn_pre_set_str (mp_ptr, unsigned char *, size_t, powers_t *, mp_ptr);
+
+void mpz_powm_mod (mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr);
+void mpz_powm_redc (mpz_ptr, mpz_srcptr, mpz_srcptr, mpz_srcptr);
+
+int speed_routine_count_zeros_setup (struct speed_params *, mp_ptr, int, int);
/* "get" is called repeatedly until it ticks over, just in case on a fast
#define SPEED_RESTRICT_COND(cond) if (!(cond)) return -1.0;
/* For mpn_copy or similar. */
-#define SPEED_ROUTINE_MPN_COPY(function) \
+#define SPEED_ROUTINE_MPN_COPY_CALL(call) \
{ \
mp_ptr wp; \
unsigned i; \
speed_starttime (); \
i = s->reps; \
do \
- function (wp, s->xp, s->size); \
+ call; \
while (--i != 0); \
t = speed_endtime (); \
\
TMP_FREE; \
return t; \
}
+#define SPEED_ROUTINE_MPN_COPY(function) \
+ SPEED_ROUTINE_MPN_COPY_CALL (function (wp, s->xp, s->size))
+
+#define SPEED_ROUTINE_MPN_TABSELECT(function) \
+ SPEED_ROUTINE_MPN_COPY_CALL (function (wp, s->xp, s->size, 1, s->r))
#define SPEED_ROUTINE_MPN_COPYC(function) \
{ \
}
/* s->size is still in limbs, and it's limbs which are copied, but
- "function" takes a size in bytes not limbs. */
+ "function" takes a size in bytes not limbs. */
#define SPEED_ROUTINE_MPN_COPY_BYTES(function) \
{ \
mp_ptr wp; \
return t; \
}
+
+/* For mpn_aors_errK_n, where 1 <= K <= 3. */
+#define SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL(call, K) \
+ { \
+ mp_ptr wp; \
+ mp_ptr xp, yp; \
+ mp_ptr zp[K]; \
+ mp_limb_t ep[2*K]; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
+ \
+ /* (don't have a mechnanism to specify zp alignments) */ \
+ for (i = 0; i < K; i++) \
+ SPEED_TMP_ALLOC_LIMBS (zp[i], s->size, 0); \
+ \
+ xp = s->xp; \
+ yp = s->yp; \
+ \
+ if (s->r == 0) ; \
+ else if (s->r == 1) { xp = wp; } \
+ else if (s->r == 2) { yp = wp; } \
+ else if (s->r == 3) { xp = wp; yp = wp; } \
+ else if (s->r == 4) { yp = xp; } \
+ else { \
+ TMP_FREE; \
+ return -1.0; \
+ } \
+ \
+ /* initialize wp if operand overlap */ \
+ if (xp == wp || yp == wp) \
+ MPN_COPY (wp, s->xp, s->size); \
+ \
+ speed_operand_src (s, xp, s->size); \
+ speed_operand_src (s, yp, s->size); \
+ for (i = 0; i < K; i++) \
+ speed_operand_src (s, zp[i], s->size); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ call; \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_BINARY_ERR1_N(function) \
+ SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL ((*function) (wp, xp, yp, ep, zp[0], s->size, 0), 1)
+
+#define SPEED_ROUTINE_MPN_BINARY_ERR2_N(function) \
+ SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL ((*function) (wp, xp, yp, ep, zp[0], zp[1], s->size, 0), 2)
+
+#define SPEED_ROUTINE_MPN_BINARY_ERR3_N(function) \
+ SPEED_ROUTINE_MPN_BINARY_ERR_N_CALL ((*function) (wp, xp, yp, ep, zp[0], zp[1], zp[2], s->size, 0), 3)
+
+
/* For mpn_add_n, mpn_sub_n, or similar. */
#define SPEED_ROUTINE_MPN_ADDSUB_N_CALL(call) \
{ \
/* For mpn_mul, mpn_mul_basecase, xsize=r, ysize=s->size. */
#define SPEED_ROUTINE_MPN_MUL(function) \
{ \
- mp_ptr wp, xp; \
+ mp_ptr wp; \
mp_size_t size1; \
unsigned i; \
double t; \
TMP_DECL; \
\
size1 = (s->r == 0 ? s->size : s->r); \
+ if (size1 < 0) size1 = -size1 - s->size; \
\
- SPEED_RESTRICT_COND (s->size >= 1); \
- SPEED_RESTRICT_COND (size1 >= s->size); \
+ SPEED_RESTRICT_COND (size1 >= 1); \
+ SPEED_RESTRICT_COND (s->size >= size1); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (wp, size1 + s->size, s->align_wp); \
- SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp); \
\
- speed_operand_src (s, xp, size1); \
- speed_operand_src (s, s->yp, s->size); \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_src (s, s->yp, size1); \
speed_operand_dst (s, wp, size1 + s->size); \
speed_cache_fill (s); \
\
speed_starttime (); \
i = s->reps; \
do \
- function (wp, xp, size1, s->yp, s->size); \
+ function (wp, s->xp, s->size, s->yp, size1); \
while (--i != 0); \
t = speed_endtime (); \
\
return t; \
}
+/* For mpn_mulmid, mpn_mulmid_basecase, xsize=r, ysize=s->size. */
+#define SPEED_ROUTINE_MPN_MULMID(function) \
+ { \
+ mp_ptr wp, xp; \
+ mp_size_t size1; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ size1 = (s->r == 0 ? (2 * s->size - 1) : s->r); \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ SPEED_RESTRICT_COND (size1 >= s->size); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp); \
+ \
+ speed_operand_src (s, xp, size1); \
+ speed_operand_src (s, s->yp, s->size); \
+ speed_operand_dst (s, wp, size1 - s->size + 3); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (wp, xp, size1, s->yp, s->size); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_MULMID_N(function) \
+ { \
+ mp_ptr wp, xp; \
+ mp_size_t size1; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ size1 = 2 * s->size - 1; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp); \
+ \
+ speed_operand_src (s, xp, size1); \
+ speed_operand_src (s, s->yp, s->size); \
+ speed_operand_dst (s, wp, size1 - s->size + 3); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (wp, xp, s->yp, s->size); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_TOOM42_MULMID(function) \
+ { \
+ mp_ptr wp, xp, scratch; \
+ mp_size_t size1, scratch_size; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ size1 = 2 * s->size - 1; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, size1 - s->size + 3, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (xp, size1, s->align_xp); \
+ scratch_size = mpn_toom42_mulmid_itch (s->size); \
+ SPEED_TMP_ALLOC_LIMBS (scratch, scratch_size, 0); \
+ \
+ speed_operand_src (s, xp, size1); \
+ speed_operand_src (s, s->yp, s->size); \
+ speed_operand_dst (s, wp, size1 - s->size + 3); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (wp, xp, s->yp, s->size, scratch); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
#define SPEED_ROUTINE_MPN_MULMOD_BNM1_CALL(call) \
{ \
mp_ptr wp, tp; \
mpn_toom53_mul_itch (s->size, 11*s->size/20), \
MPN_TOOM53_MUL_MINSIZE)
+#define SPEED_ROUTINE_MPN_TOOM43_FOR_TOOM54_MUL(function) \
+ SPEED_ROUTINE_MPN_MUL_N_TSPACE \
+ (function (wp, s->xp, s->size, s->yp, 5*s->size/6, tspace), \
+ mpn_toom42_mul_itch (s->size, 5*s->size/6), \
+ MPN_TOOM54_MUL_MINSIZE)
+#define SPEED_ROUTINE_MPN_TOOM54_FOR_TOOM43_MUL(function) \
+ SPEED_ROUTINE_MPN_MUL_N_TSPACE \
+ (function (wp, s->xp, s->size, s->yp, 5*s->size/6, tspace), \
+ mpn_toom54_mul_itch (s->size, 5*s->size/6), \
+ MPN_TOOM54_MUL_MINSIZE)
+
#define SPEED_ROUTINE_MPN_SQR_CALL(call) \
#define SPEED_ROUTINE_MPN_SQR(function) \
SPEED_ROUTINE_MPN_SQR_CALL (function (wp, s->xp, s->size))
-#define SPEED_ROUTINE_MPN_SQR_DIAGONAL(function) \
- SPEED_ROUTINE_MPN_SQR (function)
-
+#define SPEED_ROUTINE_MPN_SQR_DIAG_ADDLSH1_CALL(call) \
+ { \
+ mp_ptr wp, tp; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 2); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (tp, 2 * s->size, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (wp, 2 * s->size, s->align_wp); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_src (s, tp, 2 * s->size); \
+ speed_operand_dst (s, wp, 2 * s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ call; \
+ while (--i != 0); \
+ t = speed_endtime () / 2; \
+ \
+ TMP_FREE; \
+ return t; \
+ }
#define SPEED_ROUTINE_MPN_SQR_TSPACE(call, tsize, minsize) \
{ \
i = s->reps; \
do { \
pfunc (inv, s->r); \
- function (s->xp, s->size, s->r, inv); \
+ function (s->xp, s->size, s->r << inv[1], inv); \
} while (--i != 0); \
\
return speed_endtime (); \
#define SPEED_ROUTINE_MPN_MUPI_DIV_QR(function,itchfn) \
{ \
unsigned i; \
- mp_ptr dp, tp, qp, rp, ip, scratch; \
+ mp_ptr dp, tp, qp, rp, ip, scratch, tmp; \
double t; \
- mp_size_t size1, itch; \
+ mp_size_t size1, itch; \
TMP_DECL; \
\
size1 = (s->r == 0 ? 2 * s->size : s->r); \
dp[s->size-1] |= GMP_NUMB_HIGHBIT; \
tp[size1 - 1] = dp[s->size-1] - 1; \
\
- mpn_invert (ip, dp, s->size, NULL); \
+ tmp = TMP_ALLOC_LIMBS (mpn_invert_itch (s->size)); \
+ mpn_invert (ip, dp, s->size, tmp); \
\
speed_operand_dst (s, qp, size1 - s->size); \
speed_operand_dst (s, rp, s->size); \
return t; \
}
+#define SPEED_ROUTINE_MPN_BROOT(function) \
+ { \
+ SPEED_RESTRICT_COND (s->r & 1); \
+ s->xp[0] |= 1; \
+ SPEED_ROUTINE_MPN_UNARY_1_CALL \
+ ((*function) (wp, s->xp, s->size, s->r)); \
+ }
+
+#define SPEED_ROUTINE_MPN_BROOTINV(function, itch) \
+ { \
+ mp_ptr wp, tp; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ TMP_MARK; \
+ SPEED_RESTRICT_COND (s->size >= 1); \
+ SPEED_RESTRICT_COND (s->r & 1); \
+ wp = TMP_ALLOC_LIMBS (s->size); \
+ tp = TMP_ALLOC_LIMBS ( (itch)); \
+ s->xp[0] |= 1; \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ (*function) (wp, s->xp, s->size, s->r, tp); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
+
#define SPEED_ROUTINE_MPN_INVERT(function,itchfn) \
{ \
long i; \
function (px[j-1], py[j-1], 0))
+#define SPEED_ROUTINE_MPN_HGCD_CALL(func, itchfunc) \
+ { \
+ mp_size_t hgcd_init_itch, hgcd_itch; \
+ mp_ptr ap, bp, wp, tmp1; \
+ struct hgcd_matrix hgcd; \
+ int res; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ if (s->size < 2) \
+ return -1; \
+ \
+ TMP_MARK; \
+ \
+ SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp); \
+ \
+ s->xp[s->size - 1] |= 1; \
+ s->yp[s->size - 1] |= 1; \
+ \
+ hgcd_init_itch = MPN_HGCD_MATRIX_INIT_ITCH (s->size); \
+ hgcd_itch = itchfunc (s->size); \
+ \
+ SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_itch, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (wp, hgcd_itch, s->align_wp); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_src (s, s->yp, s->size); \
+ speed_operand_dst (s, ap, s->size + 1); \
+ speed_operand_dst (s, bp, s->size + 1); \
+ speed_operand_dst (s, wp, hgcd_itch); \
+ speed_operand_dst (s, tmp1, hgcd_init_itch); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ { \
+ MPN_COPY (ap, s->xp, s->size); \
+ MPN_COPY (bp, s->yp, s->size); \
+ mpn_hgcd_matrix_init (&hgcd, s->size, tmp1); \
+ res = func (ap, bp, s->size, &hgcd, wp); \
+ } \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ TMP_FREE; \
+ return t; \
+ }
+
+#define SPEED_ROUTINE_MPN_HGCD_REDUCE_CALL(func, itchfunc) \
+ { \
+ mp_size_t hgcd_init_itch, hgcd_step_itch; \
+ mp_ptr ap, bp, wp, tmp1; \
+ struct hgcd_matrix hgcd; \
+ mp_size_t p = s->size/2; \
+ int res; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ if (s->size < 2) \
+ return -1; \
+ \
+ TMP_MARK; \
+ \
+ SPEED_TMP_ALLOC_LIMBS (ap, s->size + 1, s->align_xp); \
+ SPEED_TMP_ALLOC_LIMBS (bp, s->size + 1, s->align_yp); \
+ \
+ s->xp[s->size - 1] |= 1; \
+ s->yp[s->size - 1] |= 1; \
+ \
+ hgcd_init_itch = MPN_HGCD_MATRIX_INIT_ITCH (s->size); \
+ hgcd_step_itch = itchfunc (s->size, p); \
+ \
+ SPEED_TMP_ALLOC_LIMBS (tmp1, hgcd_init_itch, s->align_wp); \
+ SPEED_TMP_ALLOC_LIMBS (wp, hgcd_step_itch, s->align_wp); \
+ \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_src (s, s->yp, s->size); \
+ speed_operand_dst (s, ap, s->size + 1); \
+ speed_operand_dst (s, bp, s->size + 1); \
+ speed_operand_dst (s, wp, hgcd_step_itch); \
+ speed_operand_dst (s, tmp1, hgcd_init_itch); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ { \
+ MPN_COPY (ap, s->xp, s->size); \
+ MPN_COPY (bp, s->yp, s->size); \
+ mpn_hgcd_matrix_init (&hgcd, s->size, tmp1); \
+ res = func (&hgcd, ap, bp, s->size, p, wp); \
+ } \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ TMP_FREE; \
+ return t; \
+ }
+
/* Run some GCDs of s->size limbs each. The number of different data values
is decreased as s->size**2, since GCD is a quadratic algorithm.
SPEED_ROUTINE_MPN_GCD runs more times than SPEED_ROUTINE_MPN_GCDEXT
return t; \
}
+#define SPEED_ROUTINE_MPN_DIV_QR_2(function, norm) \
+ { \
+ mp_ptr wp, xp; \
+ mp_limb_t yp[2]; \
+ mp_limb_t rp[2]; \
+ unsigned i; \
+ double t; \
+ TMP_DECL; \
+ \
+ SPEED_RESTRICT_COND (s->size >= 2); \
+ \
+ TMP_MARK; \
+ SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
+ \
+ /* divisor must be normalized */ \
+ MPN_COPY (yp, s->yp_block, 2); \
+ if (norm) \
+ yp[1] |= GMP_NUMB_HIGHBIT; \
+ else \
+ { \
+ yp[1] &= ~GMP_NUMB_HIGHBIT; \
+ if (yp[1] == 0) \
+ yp[1] = 1; \
+ } \
+ speed_operand_src (s, s->xp, s->size); \
+ speed_operand_src (s, yp, 2); \
+ speed_operand_dst (s, wp, s->size); \
+ speed_operand_dst (s, rp, 2); \
+ speed_cache_fill (s); \
+ \
+ speed_starttime (); \
+ i = s->reps; \
+ do \
+ function (wp, rp, s->xp, s->size, yp); \
+ while (--i != 0); \
+ t = speed_endtime (); \
+ \
+ TMP_FREE; \
+ return t; \
+ }
#define SPEED_ROUTINE_MODLIMB_INVERT(function) \
{ \
for (i = 0; i < s->size; i++) \
xp[i] = s->xp[i] % base; \
\
- wn = ((mp_size_t) (s->size / mp_bases[base].chars_per_bit_exactly)) \
- / GMP_LIMB_BITS + 2; \
+ LIMBS_PER_DIGIT_IN_BASE (wn, s->size, base); \
SPEED_TMP_ALLOC_LIMBS (wp, wn, s->align_wp); \
\
/* use this during development to check wn is big enough */ \
}
-/* Run an accel gcd find_a() function over various data values. A set of
+/* Run an accel gcd find_a() function over various data values. A set of
values is used in case some run particularly fast or slow. The size
parameter is ignored, the amount of data tested is fixed. */
}
-#endif
-
-
#define SPEED_ROUTINE_MPN_BACK_TO_BACK(function) \
{ \
unsigned i; \
#define SPEED_ROUTINE_MPN_ZERO(function) \
SPEED_ROUTINE_MPN_ZERO_CALL (function (wp, s->size))
+
+
+#endif
/* Time routines for speed measurments.
-Copyright 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2010, 2011, 2012 Free Software
+Foundation, Inc.
This file is part of the GNU MP Library.
typedef uint64_t stck_t; /* gcc for s390 is quite new, always has uint64_t */
#define STCK(timestamp) \
do { \
- asm ("stck %0" : "=m" (timestamp)); \
+ asm ("stck %0" : "=Q" (timestamp)); \
} while (0)
#else
static const int have_stck = 0;
if (result != -1)
goto done;
+ /* FIXME: On linux, the cycle counter is not saved and restored over
+ * context switches, making it almost useless for precise cputime
+ * measurements. When available, it's better to use clock_gettime,
+ * which seems to have reasonable accuracy (tested on x86_32,
+ * linux-2.6.26, glibc-2.7). However, there are also some linux
+ * systems where clock_gettime is broken in one way or the other,
+ * like CLOCK_PROCESS_CPUTIME_ID not implemented (easy case) or
+ * kind-of implemented but broken (needs code to detect that), and
+ * on those systems a wall-clock cycle counter is the least bad
+ * fallback.
+ *
+ * So we need some code to disable the cycle counter on some but not
+ * all linux systems. */
#ifdef SIGILL
{
- RETSIGTYPE (*old_handler) __GMP_PROTO ((int));
+ RETSIGTYPE (*old_handler) (int);
unsigned cycles[2];
old_handler = signal (SIGILL, cycles_works_handler);
if (speed_option_verbose)
printf ("getrusage went backwards (attempt %d: %ld.%06ld -> %ld.%06ld)\n",
i,
- prev.ru_utime.tv_sec, prev.ru_utime.tv_usec,
- next.ru_utime.tv_sec, next.ru_utime.tv_usec);
+ (long) prev.ru_utime.tv_sec, (long) prev.ru_utime.tv_usec,
+ (long) next.ru_utime.tv_sec, (long) next.ru_utime.tv_usec);
result = 1;
break;
}
# define CGT_ID (ASSERT_FAIL (CGT_ID not determined), -1)
#endif
+#define CGT_DELAY_COUNT 1000
+
int
cgt_works_p (void)
{
cgt_unittime = unit.tv_sec + unit.tv_nsec * 1e-9;
printf ("clock_gettime is %s accurate\n",
unittime_string (cgt_unittime));
+
+ if (cgt_unittime < 10e-9)
+ {
+ /* Do we believe this? */
+ struct timespec start, end;
+ static volatile int counter;
+ double duration;
+ if (clock_gettime (CGT_ID, &start))
+ {
+ if (speed_option_verbose)
+ printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
+ result = 0;
+ return result;
+ }
+ /* Loop of at least 1000 memory accesses, ought to take at
+ least 100 ns*/
+ for (counter = 0; counter < CGT_DELAY_COUNT; counter++)
+ ;
+ if (clock_gettime (CGT_ID, &end))
+ {
+ if (speed_option_verbose)
+ printf ("clock_gettime id=%d error: %s\n", CGT_ID, strerror (errno));
+ result = 0;
+ return result;
+ }
+ duration = (end.tv_sec + end.tv_nsec * 1e-9
+ - start.tv_sec - start.tv_nsec * 1e-9);
+ if (speed_option_verbose)
+ printf ("delay loop of %d rounds took %s (according to clock_get_time)\n",
+ CGT_DELAY_COUNT, unittime_string (duration));
+ if (duration < 100e-9)
+ {
+ if (speed_option_verbose)
+ printf ("clock_gettime id=%d not believable\n", CGT_ID);
+ result = 0;
+ return result;
+ }
+ }
result = 1;
return result;
}
mftb_works_p (void)
{
unsigned a[2];
- RETSIGTYPE (*old_handler) __GMP_PROTO ((int));
+ RETSIGTYPE (*old_handler) (int);
double cycletime;
/* suppress a warning about a[] unused */
speed_cycletime_init ();
- if (have_cycles && cycles_works_p ())
+ if (!speed_option_cycles_broken && have_cycles && cycles_works_p ())
{
use_cycles = 1;
DEFAULT (speed_cycletime, 1.0);
use_cgt = 1;
speed_unittime = cgt_unittime;
DEFAULT (speed_precision, (cgt_unittime <= 0.1e-6 ? 10000 : 1000));
- strcpy (speed_time_string, "microsecond accurate getrusage()");
+ strcpy (speed_time_string, "microsecond accurate clock_gettime()");
}
else if (have_times && clk_tck() > 1000000)
{
--- /dev/null
+/* tune-gcd-p
+
+ Tune the choice for splitting p in divide-and-conquer gcd.
+
+Copyright 2008, 2010, 2011 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library.
+
+The GNU MP Library is free software; you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation; either version 3 of the License, or (at your
+option) any later version.
+
+The GNU MP Library is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
+License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
+
+#define TUNE_GCD_P 1
+
+#include "../mpn/gcd.c"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#include "speed.h"
+
+/* Search for minimum over a range. FIXME: Implement golden-section /
+ fibonacci search*/
+static int
+search (double *minp, double (*f)(void *, int), void *ctx, int start, int end)
+{
+ int x[4];
+ double y[4];
+
+ int best_i;
+
+ x[0] = start;
+ x[3] = end;
+
+ y[0] = f(ctx, x[0]);
+ y[3] = f(ctx, x[3]);
+
+ for (;;)
+ {
+ int i;
+ int length = x[3] - x[0];
+
+ x[1] = x[0] + length/3;
+ x[2] = x[0] + 2*length/3;
+
+ y[1] = f(ctx, x[1]);
+ y[2] = f(ctx, x[2]);
+
+#if 0
+ printf("%d: %f, %d: %f, %d:, %f %d: %f\n",
+ x[0], y[0], x[1], y[1], x[2], y[2], x[3], y[3]);
+#endif
+ for (best_i = 0, i = 1; i < 4; i++)
+ if (y[i] < y[best_i])
+ best_i = i;
+
+ if (length <= 4)
+ break;
+
+ if (best_i >= 2)
+ {
+ x[0] = x[1];
+ y[0] = y[1];
+ }
+ else
+ {
+ x[3] = x[2];
+ y[3] = y[2];
+ }
+ }
+ *minp = y[best_i];
+ return x[best_i];
+}
+
+static int
+compare_double(const void *ap, const void *bp)
+{
+ double a = * (const double *) ap;
+ double b = * (const double *) bp;
+
+ if (a < b)
+ return -1;
+ else if (a > b)
+ return 1;
+ else
+ return 0;
+}
+
+static double
+median (double *v, size_t n)
+{
+ qsort(v, n, sizeof(*v), compare_double);
+
+ return v[n/2];
+}
+
+#define TIME(res, code) do { \
+ double time_measurement[5]; \
+ unsigned time_i; \
+ \
+ for (time_i = 0; time_i < 5; time_i++) \
+ { \
+ speed_starttime(); \
+ code; \
+ time_measurement[time_i] = speed_endtime(); \
+ } \
+ res = median(time_measurement, 5); \
+} while (0)
+
+struct bench_data
+{
+ mp_size_t n;
+ mp_ptr ap;
+ mp_ptr bp;
+ mp_ptr up;
+ mp_ptr vp;
+ mp_ptr gp;
+};
+
+static double
+bench_gcd (void *ctx, int p)
+{
+ struct bench_data *data = ctx;
+ double t;
+
+ p_table[data->n] = p;
+ TIME(t, {
+ MPN_COPY (data->up, data->ap, data->n);
+ MPN_COPY (data->vp, data->bp, data->n);
+ mpn_gcd (data->gp, data->up, data->n, data->vp, data->n);
+ });
+
+ return t;
+}
+
+int
+main(int argc, char **argv)
+{
+ gmp_randstate_t rands; struct bench_data data;
+ mp_size_t n;
+
+ TMP_DECL;
+
+ /* Unbuffered so if output is redirected to a file it isn't lost if the
+ program is killed part way through. */
+ setbuf (stdout, NULL);
+ setbuf (stderr, NULL);
+
+ gmp_randinit_default (rands);
+
+ TMP_MARK;
+
+ data.ap = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+ data.bp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+ data.up = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+ data.vp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+ data.gp = TMP_ALLOC_LIMBS (P_TABLE_SIZE);
+
+ mpn_random (data.ap, P_TABLE_SIZE);
+ mpn_random (data.bp, P_TABLE_SIZE);
+
+ memset (p_table, 0, sizeof(p_table));
+
+ for (n = 100; n < P_TABLE_SIZE; n++)
+ {
+ mp_size_t p;
+ mp_size_t best_p;
+ double best_time;
+ double lehmer_time;
+
+ if (data.ap[n-1] == 0)
+ data.ap[n-1] = 1;
+
+ if (data.bp[n-1] == 0)
+ data.bp[n-1] = 1;
+
+ data.n = n;
+
+ lehmer_time = bench_gcd (&data, 0);
+
+ best_p = search (&best_time, bench_gcd, &data, n/5, 4*n/5);
+ if (best_time > lehmer_time)
+ best_p = 0;
+
+ printf("%6d %6d %5.3g", n, best_p, (double) best_p / n);
+ if (best_p > 0)
+ {
+ double speedup = 100 * (lehmer_time - best_time) / lehmer_time;
+ printf(" %5.3g%%", speedup);
+ if (speedup < 1.0)
+ {
+ printf(" (ignored)");
+ best_p = 0;
+ }
+ }
+ printf("\n");
+
+ p_table[n] = best_p;
+ }
+ TMP_FREE;
+ gmp_randclear(rands);
+ return 0;
+}
/* Create tuned thresholds for various algorithms.
-Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2008, 2009, 2010, 2011
-Free Software Foundation, Inc.
+Copyright 1999, 2000, 2001, 2002, 2003, 2005, 2006, 2008, 2009, 2010,
+2011, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
mp_size_t mul_toom32_to_toom53_threshold = MP_SIZE_T_MAX;
mp_size_t mul_toom42_to_toom53_threshold = MP_SIZE_T_MAX;
mp_size_t mul_toom42_to_toom63_threshold = MP_SIZE_T_MAX;
+mp_size_t mul_toom43_to_toom54_threshold = MP_SIZE_T_MAX;
mp_size_t mul_fft_threshold = MP_SIZE_T_MAX;
mp_size_t mul_fft_modf_threshold = MP_SIZE_T_MAX;
mp_size_t sqr_basecase_threshold = MP_SIZE_T_MAX;
mp_size_t mullo_basecase_threshold = MP_SIZE_T_MAX;
mp_size_t mullo_dc_threshold = MP_SIZE_T_MAX;
mp_size_t mullo_mul_n_threshold = MP_SIZE_T_MAX;
+mp_size_t mulmid_toom42_threshold = MP_SIZE_T_MAX;
mp_size_t mulmod_bnm1_threshold = MP_SIZE_T_MAX;
mp_size_t sqrmod_bnm1_threshold = MP_SIZE_T_MAX;
+mp_size_t div_qr_2_pi2_threshold = MP_SIZE_T_MAX;
mp_size_t dc_div_qr_threshold = MP_SIZE_T_MAX;
mp_size_t dc_divappr_q_threshold = MP_SIZE_T_MAX;
mp_size_t mu_div_qr_threshold = MP_SIZE_T_MAX;
mp_size_t redc_1_to_redc_2_threshold = MP_SIZE_T_MAX;
mp_size_t redc_1_to_redc_n_threshold = MP_SIZE_T_MAX;
mp_size_t redc_2_to_redc_n_threshold = MP_SIZE_T_MAX;
-mp_size_t powm_threshold = MP_SIZE_T_MAX;
mp_size_t matrix22_strassen_threshold = MP_SIZE_T_MAX;
mp_size_t hgcd_threshold = MP_SIZE_T_MAX;
+mp_size_t hgcd_appr_threshold = MP_SIZE_T_MAX;
+mp_size_t hgcd_reduce_threshold = MP_SIZE_T_MAX;
mp_size_t gcd_dc_threshold = MP_SIZE_T_MAX;
mp_size_t gcdext_dc_threshold = MP_SIZE_T_MAX;
mp_size_t divrem_1_norm_threshold = MP_SIZE_T_MAX;
mp_size_t divrem_1_unnorm_threshold = MP_SIZE_T_MAX;
mp_size_t mod_1_norm_threshold = MP_SIZE_T_MAX;
mp_size_t mod_1_unnorm_threshold = MP_SIZE_T_MAX;
+int mod_1_1p_method = 0;
mp_size_t mod_1n_to_mod_1_1_threshold = MP_SIZE_T_MAX;
mp_size_t mod_1u_to_mod_1_1_threshold = MP_SIZE_T_MAX;
mp_size_t mod_1_1_to_mod_1_2_threshold = MP_SIZE_T_MAX;
mp_size_t get_str_precompute_threshold = MP_SIZE_T_MAX;
mp_size_t set_str_dc_threshold = MP_SIZE_T_MAX;
mp_size_t set_str_precompute_threshold = MP_SIZE_T_MAX;
+mp_size_t fac_odd_threshold = 0;
+mp_size_t fac_dsc_threshold = FAC_DSC_THRESHOLD_LIMIT;
mp_size_t fft_modf_sqr_threshold = MP_SIZE_T_MAX;
mp_size_t fft_modf_mul_threshold = MP_SIZE_T_MAX;
#ifndef HAVE_NATIVE_mpn_mod_1
#define HAVE_NATIVE_mpn_mod_1 0
#endif
+#ifndef HAVE_NATIVE_mpn_mod_1_1p
+#define HAVE_NATIVE_mpn_mod_1_1p 0
+#endif
#ifndef HAVE_NATIVE_mpn_modexact_1_odd
#define HAVE_NATIVE_mpn_modexact_1_odd 0
#endif
}
-/* Measuring for recompiled mpn/generic/divrem_1.c and mpn/generic/mod_1.c */
+/* Measuring for recompiled mpn/generic/divrem_1.c, mpn/generic/mod_1.c
+ * and mpz/fac_ui.c */
-mp_limb_t mpn_divrem_1_tune
- __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t));
-mp_limb_t mpn_mod_1_tune
- __GMP_PROTO ((mp_srcptr, mp_size_t, mp_limb_t));
+mp_limb_t mpn_divrem_1_tune (mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t);
+mp_limb_t mpn_mod_1_tune (mp_srcptr, mp_size_t, mp_limb_t);
+void mpz_fac_ui_tune (mpz_ptr, unsigned long);
double
speed_mpn_mod_1_tune (struct speed_params *s)
{
SPEED_ROUTINE_MPN_DIVREM_1 (mpn_divrem_1_tune);
}
+double
+speed_mpz_fac_ui_tune (struct speed_params *s)
+{
+ SPEED_ROUTINE_MPZ_FAC_UI (mpz_fac_ui_tune);
+}
double
tune_mul_n (void)
{
static struct param_t param;
+ mp_size_t next_toom_start;
+ int something_changed;
param.function = speed_mpn_mul_n;
param.max_size = MUL_TOOM22_THRESHOLD_LIMIT-1;
one (&mul_toom22_threshold, ¶m);
- param.name = "MUL_TOOM33_THRESHOLD";
- param.min_size = MAX (mul_toom22_threshold, MPN_TOOM33_MUL_MINSIZE);
- param.max_size = MUL_TOOM33_THRESHOLD_LIMIT-1;
- one (&mul_toom33_threshold, ¶m);
+ param.noprint = 1;
+
+ /* Threshold sequence loop. Disable functions that would be used in a very
+ narrow range, re-measuring things when that happens. */
+ something_changed = 1;
+ while (something_changed)
+ {
+ something_changed = 0;
+
+ next_toom_start = mul_toom22_threshold;
+
+ if (mul_toom33_threshold != 0)
+ {
+ param.name = "MUL_TOOM33_THRESHOLD";
+ param.min_size = MAX (next_toom_start, MPN_TOOM33_MUL_MINSIZE);
+ param.max_size = MUL_TOOM33_THRESHOLD_LIMIT-1;
+ one (&mul_toom33_threshold, ¶m);
+
+ if (next_toom_start * 1.05 >= mul_toom33_threshold)
+ {
+ mul_toom33_threshold = 0;
+ something_changed = 1;
+ }
+ }
+
+ next_toom_start = MAX (next_toom_start, mul_toom33_threshold);
+
+ if (mul_toom44_threshold != 0)
+ {
+ param.name = "MUL_TOOM44_THRESHOLD";
+ param.min_size = MAX (next_toom_start, MPN_TOOM44_MUL_MINSIZE);
+ param.max_size = MUL_TOOM44_THRESHOLD_LIMIT-1;
+ one (&mul_toom44_threshold, ¶m);
+
+ if (next_toom_start * 1.05 >= mul_toom44_threshold)
+ {
+ mul_toom44_threshold = 0;
+ something_changed = 1;
+ }
+ }
+
+ next_toom_start = MAX (next_toom_start, mul_toom44_threshold);
- param.name = "MUL_TOOM44_THRESHOLD";
- param.min_size = MAX (mul_toom33_threshold, MPN_TOOM44_MUL_MINSIZE);
- param.max_size = MUL_TOOM44_THRESHOLD_LIMIT-1;
- one (&mul_toom44_threshold, ¶m);
+ if (mul_toom6h_threshold != 0)
+ {
+ param.name = "MUL_TOOM6H_THRESHOLD";
+ param.min_size = MAX (next_toom_start, MPN_TOOM6H_MUL_MINSIZE);
+ param.max_size = MUL_TOOM6H_THRESHOLD_LIMIT-1;
+ one (&mul_toom6h_threshold, ¶m);
+
+ if (next_toom_start * 1.05 >= mul_toom6h_threshold)
+ {
+ mul_toom6h_threshold = 0;
+ something_changed = 1;
+ }
+ }
- param.name = "MUL_TOOM6H_THRESHOLD";
- param.min_size = MAX (mul_toom44_threshold, MPN_TOOM6H_MUL_MINSIZE);
- param.max_size = MUL_TOOM6H_THRESHOLD_LIMIT-1;
- one (&mul_toom6h_threshold, ¶m);
+ next_toom_start = MAX (next_toom_start, mul_toom6h_threshold);
+
+ if (mul_toom8h_threshold != 0)
+ {
+ param.name = "MUL_TOOM8H_THRESHOLD";
+ param.min_size = MAX (next_toom_start, MPN_TOOM8H_MUL_MINSIZE);
+ param.max_size = MUL_TOOM8H_THRESHOLD_LIMIT-1;
+ one (&mul_toom8h_threshold, ¶m);
+
+ if (next_toom_start * 1.05 >= mul_toom8h_threshold)
+ {
+ mul_toom8h_threshold = 0;
+ something_changed = 1;
+ }
+ }
+ }
- param.name = "MUL_TOOM8H_THRESHOLD";
- param.min_size = MAX (mul_toom6h_threshold, MPN_TOOM8H_MUL_MINSIZE);
- param.max_size = MUL_TOOM8H_THRESHOLD_LIMIT-1;
- one (&mul_toom8h_threshold, ¶m);
+ print_define ("MUL_TOOM33_THRESHOLD", MUL_TOOM33_THRESHOLD);
+ print_define ("MUL_TOOM44_THRESHOLD", MUL_TOOM44_THRESHOLD);
+ print_define ("MUL_TOOM6H_THRESHOLD", MUL_TOOM6H_THRESHOLD);
+ print_define ("MUL_TOOM8H_THRESHOLD", MUL_TOOM8H_THRESHOLD);
/* disabled until tuned */
MUL_FFT_THRESHOLD = MP_SIZE_T_MAX;
param.function = speed_mpn_toom32_for_toom43_mul;
param.function2 = speed_mpn_toom43_for_toom32_mul;
param.name = "MUL_TOOM32_TO_TOOM43_THRESHOLD";
- param.min_size = MPN_TOOM43_MUL_MINSIZE;
+ param.min_size = MPN_TOOM43_MUL_MINSIZE * 24 / 17;
one (&thres, ¶m);
- mul_toom32_to_toom43_threshold = 17*thres/24;
+ mul_toom32_to_toom43_threshold = thres * 17 / 24;
print_define ("MUL_TOOM32_TO_TOOM43_THRESHOLD", mul_toom32_to_toom43_threshold);
param.function = speed_mpn_toom32_for_toom53_mul;
param.function2 = speed_mpn_toom53_for_toom32_mul;
param.name = "MUL_TOOM32_TO_TOOM53_THRESHOLD";
- param.min_size = MPN_TOOM53_MUL_MINSIZE;
+ param.min_size = MPN_TOOM53_MUL_MINSIZE * 30 / 19;
one (&thres, ¶m);
- mul_toom32_to_toom53_threshold = 19*thres/30;
+ mul_toom32_to_toom53_threshold = thres * 19 / 30;
print_define ("MUL_TOOM32_TO_TOOM53_THRESHOLD", mul_toom32_to_toom53_threshold);
param.function = speed_mpn_toom42_for_toom53_mul;
param.function2 = speed_mpn_toom53_for_toom42_mul;
param.name = "MUL_TOOM42_TO_TOOM53_THRESHOLD";
- param.min_size = MPN_TOOM53_MUL_MINSIZE;
+ param.min_size = MPN_TOOM53_MUL_MINSIZE * 20 / 11;
one (&thres, ¶m);
- mul_toom42_to_toom53_threshold = 11*thres/20;
+ mul_toom42_to_toom53_threshold = thres * 11 / 20;
print_define ("MUL_TOOM42_TO_TOOM53_THRESHOLD", mul_toom42_to_toom53_threshold);
param.function = speed_mpn_toom42_mul;
param.function2 = speed_mpn_toom63_mul;
param.name = "MUL_TOOM42_TO_TOOM63_THRESHOLD";
- param.min_size = MPN_TOOM63_MUL_MINSIZE;
+ param.min_size = MPN_TOOM63_MUL_MINSIZE * 2;
one (&thres, ¶m);
- mul_toom42_to_toom63_threshold = thres/2;
+ mul_toom42_to_toom63_threshold = thres / 2;
print_define ("MUL_TOOM42_TO_TOOM63_THRESHOLD", mul_toom42_to_toom63_threshold);
+
+ /* Use ratio 5/6 when measuring, the middle of the range 2/3 to 1. */
+ param.function = speed_mpn_toom43_for_toom54_mul;
+ param.function2 = speed_mpn_toom54_for_toom43_mul;
+ param.name = "MUL_TOOM43_TO_TOOM54_THRESHOLD";
+ param.min_size = MPN_TOOM54_MUL_MINSIZE * 6 / 5;
+ one (&thres, ¶m);
+ mul_toom43_to_toom54_threshold = thres * 5 / 6;
+ print_define ("MUL_TOOM43_TO_TOOM54_THRESHOLD", mul_toom43_to_toom54_threshold);
}
#endif
}
+void
+tune_mulmid (void)
+{
+ static struct param_t param;
+
+ param.name = "MULMID_TOOM42_THRESHOLD";
+ param.function = speed_mpn_mulmid_n;
+ param.min_size = 4;
+ param.max_size = 100;
+ one (&mulmid_toom42_threshold, ¶m);
+}
+
void
tune_mulmod_bnm1 (void)
{
{
static struct param_t param;
- mp_size_t toom3_start = MAX (sqr_toom2_threshold, sqr_basecase_threshold);
+ mp_size_t next_toom_start;
+ int something_changed;
param.function = speed_mpn_sqr;
+ param.noprint = 1;
+
+ /* Threshold sequence loop. Disable functions that would be used in a very
+ narrow range, re-measuring things when that happens. */
+ something_changed = 1;
+ while (something_changed)
+ {
+ something_changed = 0;
+
+ next_toom_start = MAX (sqr_toom2_threshold, sqr_basecase_threshold);
+
+ sqr_toom3_threshold = SQR_TOOM3_THRESHOLD_LIMIT;
+ param.name = "SQR_TOOM3_THRESHOLD";
+ param.min_size = MAX (next_toom_start, MPN_TOOM3_SQR_MINSIZE);
+ param.max_size = SQR_TOOM3_THRESHOLD_LIMIT-1;
+ one (&sqr_toom3_threshold, ¶m);
+
+ next_toom_start = MAX (next_toom_start, sqr_toom3_threshold);
+
+ if (sqr_toom4_threshold != 0)
+ {
+ param.name = "SQR_TOOM4_THRESHOLD";
+ sqr_toom4_threshold = SQR_TOOM4_THRESHOLD_LIMIT;
+ param.min_size = MAX (next_toom_start, MPN_TOOM4_SQR_MINSIZE);
+ param.max_size = SQR_TOOM4_THRESHOLD_LIMIT-1;
+ one (&sqr_toom4_threshold, ¶m);
+
+ if (next_toom_start * 1.05 >= sqr_toom4_threshold)
+ {
+ sqr_toom4_threshold = 0;
+ something_changed = 1;
+ }
+ }
+
+ next_toom_start = MAX (next_toom_start, sqr_toom4_threshold);
+
+ if (sqr_toom6_threshold != 0)
+ {
+ param.name = "SQR_TOOM6_THRESHOLD";
+ sqr_toom6_threshold = SQR_TOOM6_THRESHOLD_LIMIT;
+ param.min_size = MAX (next_toom_start, MPN_TOOM6_SQR_MINSIZE);
+ param.max_size = SQR_TOOM6_THRESHOLD_LIMIT-1;
+ one (&sqr_toom6_threshold, ¶m);
+
+ if (next_toom_start * 1.05 >= sqr_toom6_threshold)
+ {
+ sqr_toom6_threshold = 0;
+ something_changed = 1;
+ }
+ }
+
+ next_toom_start = MAX (next_toom_start, sqr_toom6_threshold);
+
+ if (sqr_toom8_threshold != 0)
+ {
+ param.name = "SQR_TOOM8_THRESHOLD";
+ sqr_toom8_threshold = SQR_TOOM8_THRESHOLD_LIMIT;
+ param.min_size = MAX (next_toom_start, MPN_TOOM8_SQR_MINSIZE);
+ param.max_size = SQR_TOOM8_THRESHOLD_LIMIT-1;
+ one (&sqr_toom8_threshold, ¶m);
+
+ if (next_toom_start * 1.05 >= sqr_toom8_threshold)
+ {
+ sqr_toom8_threshold = 0;
+ something_changed = 1;
+ }
+ }
+ }
- param.name = "SQR_TOOM3_THRESHOLD";
- param.min_size = MAX (toom3_start, MPN_TOOM3_SQR_MINSIZE);
- param.max_size = SQR_TOOM3_THRESHOLD_LIMIT-1;
- one (&sqr_toom3_threshold, ¶m);
-
- param.name = "SQR_TOOM4_THRESHOLD";
- param.min_size = MAX (sqr_toom3_threshold, MPN_TOOM4_SQR_MINSIZE);
- param.max_size = SQR_TOOM4_THRESHOLD_LIMIT-1;
- one (&sqr_toom4_threshold, ¶m);
-
- param.name = "SQR_TOOM6_THRESHOLD";
- param.min_size = MAX (sqr_toom4_threshold, MPN_TOOM6_SQR_MINSIZE);
- param.max_size = SQR_TOOM6_THRESHOLD_LIMIT-1;
- one (&sqr_toom6_threshold, ¶m);
-
- param.name = "SQR_TOOM8_THRESHOLD";
- param.min_size = MAX (sqr_toom6_threshold, MPN_TOOM8_SQR_MINSIZE);
- param.max_size = SQR_TOOM8_THRESHOLD_LIMIT-1;
- one (&sqr_toom8_threshold, ¶m);
+ print_define ("SQR_TOOM3_THRESHOLD", SQR_TOOM3_THRESHOLD);
+ print_define ("SQR_TOOM4_THRESHOLD", SQR_TOOM4_THRESHOLD);
+ print_define ("SQR_TOOM6_THRESHOLD", SQR_TOOM6_THRESHOLD);
+ print_define ("SQR_TOOM8_THRESHOLD", SQR_TOOM8_THRESHOLD);
}
}
param.name = "MU_DIV_QR_THRESHOLD";
param.function = speed_mpn_dcpi1_div_qr;
param.function2 = speed_mpn_mu_div_qr;
- param.min_size = 6;
+ param.min_size = mul_toom22_threshold;
param.max_size = 5000;
param.step_factor = 0.02;
one (&mu_div_qr_threshold, ¶m);
param.name = "MU_DIVAPPR_Q_THRESHOLD";
param.function = speed_mpn_dcpi1_divappr_q;
param.function2 = speed_mpn_mu_divappr_q;
- param.min_size = 6;
+ param.min_size = mul_toom22_threshold;
param.max_size = 5000;
param.step_factor = 0.02;
one (&mu_divappr_q_threshold, ¶m);
param.name = "MU_BDIV_QR_THRESHOLD";
param.function = speed_mpn_dcpi1_bdiv_qr;
param.function2 = speed_mpn_mu_bdiv_qr;
- param.min_size = 4;
+ param.min_size = mul_toom22_threshold;
param.max_size = 5000;
param.step_factor = 0.02;
one (&mu_bdiv_qr_threshold, ¶m);
param.name = "MU_BDIV_Q_THRESHOLD";
param.function = speed_mpn_dcpi1_bdiv_q;
param.function2 = speed_mpn_mu_bdiv_q;
- param.min_size = 4;
+ param.min_size = mul_toom22_threshold;
param.max_size = 5000;
param.step_factor = 0.02;
one (&mu_bdiv_q_threshold, ¶m);
param.min_is_always = 1;
param.max_size = TUNE_REDC_2_MAX;
param.noprint = 1;
+ param.stop_factor = 1.5;
one (&redc_1_to_redc_2_threshold, ¶m);
}
{
param.noprint = 1;
one (&redc_2_to_redc_n_threshold, ¶m);
}
- if (redc_1_to_redc_2_threshold >= TUNE_REDC_2_MAX - 1)
- {
- /* Disable REDC_2. This is not supposed to happen. */
- print_define ("REDC_1_TO_REDC_2_THRESHOLD", REDC_2_TO_REDC_N_THRESHOLD);
- print_define_remark ("REDC_2_TO_REDC_N_THRESHOLD", 0, "anomaly: never REDC_2");
- }
- else
+ if (redc_1_to_redc_2_threshold >= redc_2_to_redc_n_threshold)
{
- print_define ("REDC_1_TO_REDC_2_THRESHOLD", REDC_1_TO_REDC_2_THRESHOLD);
- print_define ("REDC_2_TO_REDC_N_THRESHOLD", REDC_2_TO_REDC_N_THRESHOLD);
+ redc_2_to_redc_n_threshold = 0; /* disable redc_2 */
+
+ /* Never use redc2, measure redc_1 -> redc_n cutoff, store result as
+ REDC_1_TO_REDC_2_THRESHOLD. */
+ {
+ static struct param_t param;
+ param.name = "REDC_1_TO_REDC_2_THRESHOLD";
+ param.function = speed_mpn_redc_1;
+ param.function2 = speed_mpn_redc_n;
+ param.min_size = 16;
+ param.noprint = 1;
+ one (&redc_1_to_redc_2_threshold, ¶m);
+ }
}
+ print_define ("REDC_1_TO_REDC_2_THRESHOLD", REDC_1_TO_REDC_2_THRESHOLD);
+ print_define ("REDC_2_TO_REDC_N_THRESHOLD", REDC_2_TO_REDC_N_THRESHOLD);
#else
{
static struct param_t param;
one (&hgcd_threshold, ¶m);
}
+void
+tune_hgcd_appr (void)
+{
+ static struct param_t param;
+ param.name = "HGCD_APPR_THRESHOLD";
+ param.function = speed_mpn_hgcd_appr;
+ /* We seem to get strange results for small sizes */
+ param.min_size = 50;
+ param.stop_since_change = 150;
+ one (&hgcd_appr_threshold, ¶m);
+}
+
+void
+tune_hgcd_reduce (void)
+{
+ static struct param_t param;
+ param.name = "HGCD_REDUCE_THRESHOLD";
+ param.function = speed_mpn_hgcd_reduce;
+ param.min_size = 30;
+ param.max_size = 7000;
+ param.step_factor = 0.04;
+ one (&hgcd_reduce_threshold, ¶m);
+}
+
void
tune_gcd_dc (void)
{
one (&gcdext_dc_threshold, ¶m);
}
+/* In tune_powm_sec we compute the table used by the win_size function. The
+ cutoff points are in exponent bits, disregarding other operand sizes. It is
+ not possible to use the one framework since it currently uses a granilarity
+ of full limbs.
+*/
+
+/* This win_size replaces the variant in the powm code, allowing us to
+ control k in the k-ary algorithms. */
+int winsize;
+int
+win_size (mp_bitcnt_t eb)
+{
+ return winsize;
+}
+
+void
+tune_powm_sec (void)
+{
+ mp_size_t n;
+ int k, i;
+ mp_size_t itch;
+ mp_bitcnt_t nbits, nbits_next, possible_nbits_cutoff;
+ const int n_max = 3000 / GMP_NUMB_BITS;
+ const int n_measurements = 5;
+ mp_ptr rp, bp, ep, mp, tp;
+ double ttab[n_measurements], tk, tkp1;
+ TMP_DECL;
+ TMP_MARK;
+
+ possible_nbits_cutoff = 0;
+
+ k = 1;
+
+ winsize = 10; /* the itch function needs this */
+ itch = mpn_powm_sec_itch (n_max, n_max, n_max);
+
+ rp = TMP_ALLOC_LIMBS (n_max);
+ bp = TMP_ALLOC_LIMBS (n_max);
+ ep = TMP_ALLOC_LIMBS (n_max);
+ mp = TMP_ALLOC_LIMBS (n_max);
+ tp = TMP_ALLOC_LIMBS (itch);
+
+ mpn_random (bp, n_max);
+ mpn_random (mp, n_max);
+ mp[0] |= 1;
+
+/* How about taking the M operand size into account?
+
+ An operation R=powm(B,E,N) will take time O(log(E)*M(log(N))) (assuming
+ B = O(M)).
+
+ Using k-ary and no sliding window, the precomputation will need time
+ O(2^(k-1)*M(log(N))) and the main computation will need O(log(E)*S(N)) +
+ O(log(E)/k*M(N)), for the squarings, multiplications, respectively.
+
+ An operation R=powm_sec(B,E,N) will take time like powm.
+
+ Using k-ary, the precomputation will need time O(2^k*M(log(N))) and the
+ main computation will need O(log(E)*S(N)) + O(log(E)/k*M(N)) +
+ O(log(E)/k*2^k*log(N)), for the squarings, multiplications, and full
+ table reads, respectively. */
+
+ printf ("#define POWM_SEC_TABLE ");
+
+ for (nbits = 1; nbits <= n_max * GMP_NUMB_BITS; )
+ {
+ n = (nbits - 1) / GMP_NUMB_BITS + 1;
+
+ /* Generate E such that sliding-window for k and k+1 works equally
+ well/poorly (but sliding is not used in powm_sec, of course). */
+ for (i = 0; i < n; i++)
+ ep[i] = ~CNST_LIMB(0);
+
+ /* Truncate E to be exactly nbits large. */
+ if (nbits % GMP_NUMB_BITS != 0)
+ mpn_rshift (ep, ep, n, GMP_NUMB_BITS - nbits % GMP_NUMB_BITS);
+ ep[n - 1] |= CNST_LIMB(1) << (nbits - 1) % GMP_NUMB_BITS;
+
+ winsize = k;
+ for (i = 0; i < n_measurements; i++)
+ {
+ speed_starttime ();
+ mpn_powm_sec (rp, bp, n, ep, n, mp, n, tp);
+ ttab[i] = speed_endtime ();
+ }
+ tk = median (ttab, n_measurements);
+
+ winsize = k + 1;
+ speed_starttime ();
+ for (i = 0; i < n_measurements; i++)
+ {
+ speed_starttime ();
+ mpn_powm_sec (rp, bp, n, ep, n, mp, n, tp);
+ ttab[i] = speed_endtime ();
+ }
+ tkp1 = median (ttab, n_measurements);
+/*
+ printf ("testing: %ld, %d", nbits, k, ep[n-1]);
+ printf (" %10.5f %10.5f\n", tk, tkp1);
+*/
+ if (tkp1 < tk)
+ {
+ if (possible_nbits_cutoff)
+ {
+ /* Two consecutive sizes indicate k increase, obey. */
+ if (k > 1)
+ printf (",");
+ printf ("%ld", (long) possible_nbits_cutoff);
+ k++;
+ possible_nbits_cutoff = 0;
+ }
+ else
+ {
+ /* One measurement indicate k increase, save nbits for further
+ consideration. */
+ possible_nbits_cutoff = nbits;
+ }
+ }
+ else
+ possible_nbits_cutoff = 0;
+
+ nbits_next = nbits * 65 / 64;
+ nbits = nbits_next + (nbits_next == nbits);
+ }
+ printf ("\n");
+ TMP_FREE;
+}
+
/* size_extra==1 reflects the fact that with high<divisor one division is
always skipped. Forcing high<divisor while testing ensures consistency
param.stop_factor = 2.0;
-double (*tuned_speed_mpn_divrem_1) __GMP_PROTO ((struct speed_params *));
+double (*tuned_speed_mpn_divrem_1) (struct speed_params *);
void
tune_divrem_1 (void)
return;
}
+ if (!HAVE_NATIVE_mpn_mod_1_1p)
+ {
+ static struct param_t param;
+ double t1, t2;
+
+ s.size = 10;
+ s.r = randlimb_half ();
+
+ t1 = tuneup_measure (speed_mpn_mod_1_1_1, ¶m, &s);
+ t2 = tuneup_measure (speed_mpn_mod_1_1_2, ¶m, &s);
+
+ if (t1 == -1.0 || t2 == -1.0)
+ {
+ printf ("Oops, can't measure all mpn_mod_1_1 methods at %ld\n",
+ (long) s.size);
+ abort ();
+ }
+ mod_1_1p_method = (t1 < t2) ? 1 : 2;
+ print_define ("MOD_1_1P_METHOD", mod_1_1p_method);
+ }
+
if (UDIV_PREINV_ALWAYS)
{
print_define ("MOD_1_NORM_THRESHOLD", 0L);
static struct param_t param;
param.check_size = 256;
- s.r = randlimb_norm () / 5;
+ s.r = randlimb_half ();
param.noprint = 1;
param.function = speed_mpn_mod_1_1;
one (&divrem_2_threshold, ¶m);
}
+void
+tune_div_qr_2 (void)
+{
+ static struct param_t param;
+ param.name = "DIV_QR_2_PI2_THRESHOLD";
+ param.function = speed_mpn_div_qr_2n;
+ param.check_size = 500;
+ param.min_size = 4;
+ one (&div_qr_2_pi2_threshold, ¶m);
+}
/* mpn_divexact_1 is vaguely expected to be used on smallish divisors, so
tune for that. Its speed can differ on odd or even divisor, so take an
tune_jacobi_base (void)
{
static struct param_t param;
- double t1, t2, t3;
+ double t1, t2, t3, t4;
int method;
s.size = GMP_LIMB_BITS * 3 / 4;
if (option_trace >= 1)
printf ("size=%ld, mpn_jacobi_base_3 %.9f\n", (long) s.size, t3);
- if (t1 == -1.0 || t2 == -1.0 || t3 == -1.0)
+ t4 = tuneup_measure (speed_mpn_jacobi_base_4, ¶m, &s);
+ if (option_trace >= 1)
+ printf ("size=%ld, mpn_jacobi_base_4 %.9f\n", (long) s.size, t4);
+
+ if (t1 == -1.0 || t2 == -1.0 || t3 == -1.0 || t4 == -1.0)
{
printf ("Oops, can't measure all mpn_jacobi_base methods at %ld\n",
(long) s.size);
abort ();
}
- if (t1 < t2 && t1 < t3)
+ if (t1 < t2 && t1 < t3 && t1 < t4)
method = 1;
- else if (t2 < t3)
+ else if (t2 < t3 && t2 < t4)
method = 2;
- else
+ else if (t3 < t4)
method = 3;
+ else
+ method = 4;
print_define ("JACOBI_BASE_METHOD", method);
}
for (i = 0; i < s->size; i++)
str[i] = s->xp[i] % base;
- wn = ((mp_size_t) (s->size / mp_bases[base].chars_per_bit_exactly))
- / GMP_LIMB_BITS + 2;
+ LIMBS_PER_DIGIT_IN_BASE (wn, s->size, base);
SPEED_TMP_ALLOC_LIMBS (wp, wn, s->align_wp);
/* use this during development to check wn is big enough */
fft (¶m);
}
+void
+tune_fac_ui (void)
+{
+ static struct param_t param;
+
+ param.function = speed_mpz_fac_ui_tune;
+
+ param.name = "FAC_DSC_THRESHOLD";
+ param.min_size = 70;
+ param.max_size = FAC_DSC_THRESHOLD_LIMIT;
+ one (&fac_dsc_threshold, ¶m);
+
+ param.name = "FAC_ODD_THRESHOLD";
+ param.min_size = 22;
+ param.stop_factor = 1.7;
+ param.min_is_always = 1;
+ one (&fac_odd_threshold, ¶m);
+}
+
void
all (void)
{
tune_divrem_1 ();
tune_mod_1 ();
tune_preinv_divrem_1 ();
+#if 0
tune_divrem_2 ();
+#endif
+ tune_div_qr_2 ();
tune_divexact_1 ();
tune_modexact_1_odd ();
printf("\n");
tune_sqr ();
printf("\n");
+ tune_mulmid ();
+ printf("\n");
+
tune_mulmod_bnm1 ();
tune_sqrmod_bnm1 ();
printf("\n");
tune_mu_bdiv ();
printf("\n");
+ tune_powm_sec ();
+ printf("\n");
+
tune_matrix22_mul ();
tune_hgcd ();
+ tune_hgcd_appr ();
+ tune_hgcd_reduce();
tune_gcd_dc ();
tune_gcdext_dc ();
tune_jacobi_base ();
tune_set_str ();
printf("\n");
+ tune_fac_ui ();
+ printf("\n");
+
time (&end_time);
printf ("/* Tuneup completed successfully, took %ld seconds */\n",
(long) (end_time - start_time));